{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.834616614325042, "global_step": 256000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999907727286149e-05, "loss": 0.9528, "step": 5 }, { "epoch": 0.0, "learning_rate": 4.999815454572298e-05, "loss": 1.0191, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.9997231818584466e-05, "loss": 0.9677, "step": 15 }, { "epoch": 0.0, "learning_rate": 4.9996309091445954e-05, "loss": 1.0326, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.999538636430744e-05, "loss": 1.0034, "step": 25 }, { "epoch": 0.0, "learning_rate": 4.999446363716892e-05, "loss": 1.0042, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.999354091003042e-05, "loss": 1.0682, "step": 35 }, { "epoch": 0.0, "learning_rate": 4.9992618182891905e-05, "loss": 0.9857, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.999169545575339e-05, "loss": 1.01, "step": 45 }, { "epoch": 0.0, "learning_rate": 4.9990772728614874e-05, "loss": 0.989, "step": 50 }, { "epoch": 0.0, "learning_rate": 4.998985000147637e-05, "loss": 0.9856, "step": 55 }, { "epoch": 0.0, "learning_rate": 4.9988927274337857e-05, "loss": 1.0217, "step": 60 }, { "epoch": 0.0, "learning_rate": 4.998800454719934e-05, "loss": 0.9198, "step": 65 }, { "epoch": 0.0, "learning_rate": 4.9987081820060826e-05, "loss": 1.0525, "step": 70 }, { "epoch": 0.0, "learning_rate": 4.998615909292232e-05, "loss": 1.0705, "step": 75 }, { "epoch": 0.0, "learning_rate": 4.998523636578381e-05, "loss": 0.9338, "step": 80 }, { "epoch": 0.0, "learning_rate": 4.998431363864529e-05, "loss": 0.9665, "step": 85 }, { "epoch": 0.0, "learning_rate": 4.998339091150678e-05, "loss": 1.0085, "step": 90 }, { "epoch": 0.0, "learning_rate": 4.9982468184368265e-05, "loss": 1.0675, "step": 95 }, { "epoch": 0.0, "learning_rate": 4.998154545722975e-05, "loss": 0.9891, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.998062273009124e-05, "loss": 1.055, "step": 105 }, { "epoch": 0.0, "learning_rate": 4.997970000295273e-05, "loss": 1.0032, "step": 110 }, { "epoch": 0.0, "learning_rate": 4.9978777275814216e-05, "loss": 0.9769, "step": 115 }, { "epoch": 0.0, "learning_rate": 4.9977854548675704e-05, "loss": 1.0839, "step": 120 }, { "epoch": 0.0, "learning_rate": 4.997693182153719e-05, "loss": 1.0876, "step": 125 }, { "epoch": 0.0, "learning_rate": 4.997600909439868e-05, "loss": 1.0305, "step": 130 }, { "epoch": 0.0, "learning_rate": 4.997508636726017e-05, "loss": 0.9196, "step": 135 }, { "epoch": 0.0, "learning_rate": 4.9974163640121655e-05, "loss": 0.9994, "step": 140 }, { "epoch": 0.0, "learning_rate": 4.997324091298314e-05, "loss": 1.008, "step": 145 }, { "epoch": 0.0, "learning_rate": 4.997231818584463e-05, "loss": 0.9524, "step": 150 }, { "epoch": 0.0, "learning_rate": 4.997139545870612e-05, "loss": 1.0251, "step": 155 }, { "epoch": 0.0, "learning_rate": 4.99704727315676e-05, "loss": 0.9604, "step": 160 }, { "epoch": 0.0, "learning_rate": 4.9969550004429095e-05, "loss": 1.0695, "step": 165 }, { "epoch": 0.0, "learning_rate": 4.996862727729058e-05, "loss": 0.967, "step": 170 }, { "epoch": 0.0, "learning_rate": 4.9967704550152064e-05, "loss": 0.9715, "step": 175 }, { "epoch": 0.0, "learning_rate": 4.996678182301355e-05, "loss": 0.9976, "step": 180 }, { "epoch": 0.0, "learning_rate": 4.9965859095875046e-05, "loss": 1.0235, "step": 185 }, { "epoch": 0.0, "learning_rate": 4.9964936368736534e-05, "loss": 1.0077, "step": 190 }, { "epoch": 0.0, "learning_rate": 4.9964013641598015e-05, "loss": 0.9921, "step": 195 }, { "epoch": 0.0, "learning_rate": 4.99630909144595e-05, "loss": 1.0489, "step": 200 }, { "epoch": 0.0, "learning_rate": 4.9962168187321e-05, "loss": 1.0232, "step": 205 }, { "epoch": 0.0, "learning_rate": 4.9961245460182485e-05, "loss": 0.9818, "step": 210 }, { "epoch": 0.0, "learning_rate": 4.9960322733043966e-05, "loss": 1.0347, "step": 215 }, { "epoch": 0.0, "learning_rate": 4.9959400005905454e-05, "loss": 1.0089, "step": 220 }, { "epoch": 0.0, "learning_rate": 4.995847727876695e-05, "loss": 0.9752, "step": 225 }, { "epoch": 0.0, "learning_rate": 4.995755455162843e-05, "loss": 1.0043, "step": 230 }, { "epoch": 0.0, "learning_rate": 4.995663182448992e-05, "loss": 0.994, "step": 235 }, { "epoch": 0.0, "learning_rate": 4.9955709097351406e-05, "loss": 1.0075, "step": 240 }, { "epoch": 0.0, "learning_rate": 4.9954786370212893e-05, "loss": 0.9185, "step": 245 }, { "epoch": 0.0, "learning_rate": 4.995386364307438e-05, "loss": 0.9101, "step": 250 }, { "epoch": 0.0, "learning_rate": 4.995294091593587e-05, "loss": 0.9362, "step": 255 }, { "epoch": 0.0, "learning_rate": 4.995201818879736e-05, "loss": 0.948, "step": 260 }, { "epoch": 0.0, "learning_rate": 4.9951095461658845e-05, "loss": 1.0611, "step": 265 }, { "epoch": 0.0, "learning_rate": 4.995017273452033e-05, "loss": 1.0085, "step": 270 }, { "epoch": 0.0, "learning_rate": 4.994925000738182e-05, "loss": 1.0018, "step": 275 }, { "epoch": 0.0, "learning_rate": 4.994832728024331e-05, "loss": 0.941, "step": 280 }, { "epoch": 0.0, "learning_rate": 4.9947404553104796e-05, "loss": 0.9757, "step": 285 }, { "epoch": 0.0, "learning_rate": 4.9946481825966284e-05, "loss": 1.0074, "step": 290 }, { "epoch": 0.0, "learning_rate": 4.994555909882777e-05, "loss": 0.9883, "step": 295 }, { "epoch": 0.0, "learning_rate": 4.994463637168926e-05, "loss": 0.9757, "step": 300 }, { "epoch": 0.0, "learning_rate": 4.994371364455074e-05, "loss": 1.0218, "step": 305 }, { "epoch": 0.0, "learning_rate": 4.994279091741223e-05, "loss": 0.9538, "step": 310 }, { "epoch": 0.0, "learning_rate": 4.994186819027372e-05, "loss": 1.0483, "step": 315 }, { "epoch": 0.0, "learning_rate": 4.994094546313521e-05, "loss": 1.0316, "step": 320 }, { "epoch": 0.0, "learning_rate": 4.994002273599669e-05, "loss": 0.9486, "step": 325 }, { "epoch": 0.0, "learning_rate": 4.993910000885818e-05, "loss": 1.0318, "step": 330 }, { "epoch": 0.0, "learning_rate": 4.9938177281719675e-05, "loss": 0.9906, "step": 335 }, { "epoch": 0.0, "learning_rate": 4.9937254554581156e-05, "loss": 0.9916, "step": 340 }, { "epoch": 0.0, "learning_rate": 4.9936331827442644e-05, "loss": 1.0364, "step": 345 }, { "epoch": 0.0, "learning_rate": 4.993540910030413e-05, "loss": 1.0262, "step": 350 }, { "epoch": 0.0, "learning_rate": 4.9934486373165626e-05, "loss": 0.9732, "step": 355 }, { "epoch": 0.0, "learning_rate": 4.993356364602711e-05, "loss": 0.9199, "step": 360 }, { "epoch": 0.0, "learning_rate": 4.9932640918888595e-05, "loss": 1.0342, "step": 365 }, { "epoch": 0.0, "learning_rate": 4.993171819175008e-05, "loss": 0.9275, "step": 370 }, { "epoch": 0.0, "learning_rate": 4.993079546461157e-05, "loss": 0.9716, "step": 375 }, { "epoch": 0.0, "learning_rate": 4.992987273747306e-05, "loss": 1.0296, "step": 380 }, { "epoch": 0.0, "learning_rate": 4.9928950010334546e-05, "loss": 0.9812, "step": 385 }, { "epoch": 0.0, "learning_rate": 4.9928027283196034e-05, "loss": 1.0041, "step": 390 }, { "epoch": 0.0, "learning_rate": 4.992710455605752e-05, "loss": 1.0149, "step": 395 }, { "epoch": 0.0, "learning_rate": 4.992618182891901e-05, "loss": 0.998, "step": 400 }, { "epoch": 0.0, "learning_rate": 4.99252591017805e-05, "loss": 0.9825, "step": 405 }, { "epoch": 0.0, "learning_rate": 4.9924336374641986e-05, "loss": 1.0836, "step": 410 }, { "epoch": 0.0, "learning_rate": 4.992341364750347e-05, "loss": 1.0269, "step": 415 }, { "epoch": 0.0, "learning_rate": 4.992249092036496e-05, "loss": 0.9462, "step": 420 }, { "epoch": 0.0, "learning_rate": 4.992156819322645e-05, "loss": 1.0397, "step": 425 }, { "epoch": 0.0, "learning_rate": 4.992064546608794e-05, "loss": 1.0216, "step": 430 }, { "epoch": 0.0, "learning_rate": 4.991972273894942e-05, "loss": 0.9743, "step": 435 }, { "epoch": 0.0, "learning_rate": 4.991880001181091e-05, "loss": 0.9766, "step": 440 }, { "epoch": 0.0, "learning_rate": 4.99178772846724e-05, "loss": 1.0531, "step": 445 }, { "epoch": 0.0, "learning_rate": 4.991695455753388e-05, "loss": 0.9962, "step": 450 }, { "epoch": 0.01, "learning_rate": 4.991603183039537e-05, "loss": 0.9282, "step": 455 }, { "epoch": 0.01, "learning_rate": 4.991510910325686e-05, "loss": 0.9991, "step": 460 }, { "epoch": 0.01, "learning_rate": 4.991418637611835e-05, "loss": 0.9585, "step": 465 }, { "epoch": 0.01, "learning_rate": 4.991326364897983e-05, "loss": 0.9337, "step": 470 }, { "epoch": 0.01, "learning_rate": 4.991234092184132e-05, "loss": 0.9207, "step": 475 }, { "epoch": 0.01, "learning_rate": 4.991141819470281e-05, "loss": 0.9575, "step": 480 }, { "epoch": 0.01, "learning_rate": 4.99104954675643e-05, "loss": 1.0227, "step": 485 }, { "epoch": 0.01, "learning_rate": 4.9909572740425784e-05, "loss": 0.9686, "step": 490 }, { "epoch": 0.01, "learning_rate": 4.990865001328727e-05, "loss": 0.9957, "step": 495 }, { "epoch": 0.01, "learning_rate": 4.990772728614876e-05, "loss": 0.9522, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.990680455901025e-05, "loss": 1.0233, "step": 505 }, { "epoch": 0.01, "learning_rate": 4.9905881831871736e-05, "loss": 0.9211, "step": 510 }, { "epoch": 0.01, "learning_rate": 4.9904959104733224e-05, "loss": 0.946, "step": 515 }, { "epoch": 0.01, "learning_rate": 4.990403637759471e-05, "loss": 0.9306, "step": 520 }, { "epoch": 0.01, "learning_rate": 4.99031136504562e-05, "loss": 0.9949, "step": 525 }, { "epoch": 0.01, "learning_rate": 4.990219092331769e-05, "loss": 0.9668, "step": 530 }, { "epoch": 0.01, "learning_rate": 4.9901268196179175e-05, "loss": 0.9482, "step": 535 }, { "epoch": 0.01, "learning_rate": 4.990034546904066e-05, "loss": 0.9728, "step": 540 }, { "epoch": 0.01, "learning_rate": 4.9899422741902144e-05, "loss": 0.9631, "step": 545 }, { "epoch": 0.01, "learning_rate": 4.989850001476364e-05, "loss": 0.974, "step": 550 }, { "epoch": 0.01, "learning_rate": 4.9897577287625126e-05, "loss": 0.9897, "step": 555 }, { "epoch": 0.01, "learning_rate": 4.989665456048661e-05, "loss": 0.9909, "step": 560 }, { "epoch": 0.01, "learning_rate": 4.9895731833348095e-05, "loss": 0.9949, "step": 565 }, { "epoch": 0.01, "learning_rate": 4.989480910620959e-05, "loss": 1.0296, "step": 570 }, { "epoch": 0.01, "learning_rate": 4.989388637907108e-05, "loss": 0.9903, "step": 575 }, { "epoch": 0.01, "learning_rate": 4.989296365193256e-05, "loss": 0.9881, "step": 580 }, { "epoch": 0.01, "learning_rate": 4.989204092479405e-05, "loss": 1.0673, "step": 585 }, { "epoch": 0.01, "learning_rate": 4.989111819765554e-05, "loss": 1.0506, "step": 590 }, { "epoch": 0.01, "learning_rate": 4.989019547051703e-05, "loss": 0.9108, "step": 595 }, { "epoch": 0.01, "learning_rate": 4.988927274337851e-05, "loss": 1.0428, "step": 600 }, { "epoch": 0.01, "learning_rate": 4.988835001624e-05, "loss": 1.004, "step": 605 }, { "epoch": 0.01, "learning_rate": 4.9887427289101486e-05, "loss": 0.9228, "step": 610 }, { "epoch": 0.01, "learning_rate": 4.9886504561962974e-05, "loss": 0.9692, "step": 615 }, { "epoch": 0.01, "learning_rate": 4.988558183482446e-05, "loss": 0.9684, "step": 620 }, { "epoch": 0.01, "learning_rate": 4.988465910768595e-05, "loss": 1.0761, "step": 625 }, { "epoch": 0.01, "learning_rate": 4.988373638054744e-05, "loss": 0.9794, "step": 630 }, { "epoch": 0.01, "learning_rate": 4.9882813653408925e-05, "loss": 0.9983, "step": 635 }, { "epoch": 0.01, "learning_rate": 4.988189092627041e-05, "loss": 0.993, "step": 640 }, { "epoch": 0.01, "learning_rate": 4.98809681991319e-05, "loss": 1.0008, "step": 645 }, { "epoch": 0.01, "learning_rate": 4.988004547199339e-05, "loss": 0.8834, "step": 650 }, { "epoch": 0.01, "learning_rate": 4.987912274485488e-05, "loss": 0.9633, "step": 655 }, { "epoch": 0.01, "learning_rate": 4.9878200017716365e-05, "loss": 0.9727, "step": 660 }, { "epoch": 0.01, "learning_rate": 4.987727729057785e-05, "loss": 0.9831, "step": 665 }, { "epoch": 0.01, "learning_rate": 4.987635456343934e-05, "loss": 0.9661, "step": 670 }, { "epoch": 0.01, "learning_rate": 4.987543183630083e-05, "loss": 0.9811, "step": 675 }, { "epoch": 0.01, "learning_rate": 4.9874509109162316e-05, "loss": 0.9873, "step": 680 }, { "epoch": 0.01, "learning_rate": 4.9873586382023804e-05, "loss": 1.0074, "step": 685 }, { "epoch": 0.01, "learning_rate": 4.9872663654885285e-05, "loss": 0.9677, "step": 690 }, { "epoch": 0.01, "learning_rate": 4.987174092774677e-05, "loss": 0.998, "step": 695 }, { "epoch": 0.01, "learning_rate": 4.987081820060827e-05, "loss": 0.9264, "step": 700 }, { "epoch": 0.01, "learning_rate": 4.9869895473469755e-05, "loss": 1.0097, "step": 705 }, { "epoch": 0.01, "learning_rate": 4.9868972746331236e-05, "loss": 0.996, "step": 710 }, { "epoch": 0.01, "learning_rate": 4.9868050019192724e-05, "loss": 0.9297, "step": 715 }, { "epoch": 0.01, "learning_rate": 4.986712729205422e-05, "loss": 0.9251, "step": 720 }, { "epoch": 0.01, "learning_rate": 4.98662045649157e-05, "loss": 1.0195, "step": 725 }, { "epoch": 0.01, "learning_rate": 4.986528183777719e-05, "loss": 0.9447, "step": 730 }, { "epoch": 0.01, "learning_rate": 4.9864359110638675e-05, "loss": 0.9538, "step": 735 }, { "epoch": 0.01, "learning_rate": 4.986343638350017e-05, "loss": 0.9653, "step": 740 }, { "epoch": 0.01, "learning_rate": 4.986251365636165e-05, "loss": 0.9194, "step": 745 }, { "epoch": 0.01, "learning_rate": 4.986159092922314e-05, "loss": 0.975, "step": 750 }, { "epoch": 0.01, "learning_rate": 4.986066820208463e-05, "loss": 0.9534, "step": 755 }, { "epoch": 0.01, "learning_rate": 4.9859745474946115e-05, "loss": 0.9435, "step": 760 }, { "epoch": 0.01, "learning_rate": 4.98588227478076e-05, "loss": 0.9684, "step": 765 }, { "epoch": 0.01, "learning_rate": 4.985790002066909e-05, "loss": 0.9541, "step": 770 }, { "epoch": 0.01, "learning_rate": 4.985697729353058e-05, "loss": 0.9813, "step": 775 }, { "epoch": 0.01, "learning_rate": 4.9856054566392066e-05, "loss": 1.0425, "step": 780 }, { "epoch": 0.01, "learning_rate": 4.9855131839253554e-05, "loss": 1.0582, "step": 785 }, { "epoch": 0.01, "learning_rate": 4.985420911211504e-05, "loss": 0.9428, "step": 790 }, { "epoch": 0.01, "learning_rate": 4.985328638497653e-05, "loss": 0.948, "step": 795 }, { "epoch": 0.01, "learning_rate": 4.985236365783801e-05, "loss": 1.0074, "step": 800 }, { "epoch": 0.01, "learning_rate": 4.9851440930699505e-05, "loss": 0.9166, "step": 805 }, { "epoch": 0.01, "learning_rate": 4.985051820356099e-05, "loss": 0.9683, "step": 810 }, { "epoch": 0.01, "learning_rate": 4.984959547642248e-05, "loss": 1.0138, "step": 815 }, { "epoch": 0.01, "learning_rate": 4.984867274928396e-05, "loss": 0.871, "step": 820 }, { "epoch": 0.01, "learning_rate": 4.984775002214546e-05, "loss": 0.9022, "step": 825 }, { "epoch": 0.01, "learning_rate": 4.9846827295006945e-05, "loss": 0.8935, "step": 830 }, { "epoch": 0.01, "learning_rate": 4.9845904567868426e-05, "loss": 0.9646, "step": 835 }, { "epoch": 0.01, "learning_rate": 4.9844981840729914e-05, "loss": 0.9367, "step": 840 }, { "epoch": 0.01, "learning_rate": 4.98440591135914e-05, "loss": 0.9544, "step": 845 }, { "epoch": 0.01, "learning_rate": 4.9843136386452896e-05, "loss": 0.9892, "step": 850 }, { "epoch": 0.01, "learning_rate": 4.984221365931438e-05, "loss": 0.9645, "step": 855 }, { "epoch": 0.01, "learning_rate": 4.9841290932175865e-05, "loss": 0.9908, "step": 860 }, { "epoch": 0.01, "learning_rate": 4.984036820503735e-05, "loss": 1.0409, "step": 865 }, { "epoch": 0.01, "learning_rate": 4.983944547789884e-05, "loss": 0.9582, "step": 870 }, { "epoch": 0.01, "learning_rate": 4.983852275076033e-05, "loss": 0.9863, "step": 875 }, { "epoch": 0.01, "learning_rate": 4.9837600023621816e-05, "loss": 0.9867, "step": 880 }, { "epoch": 0.01, "learning_rate": 4.9836677296483304e-05, "loss": 0.9045, "step": 885 }, { "epoch": 0.01, "learning_rate": 4.983575456934479e-05, "loss": 1.0254, "step": 890 }, { "epoch": 0.01, "learning_rate": 4.983483184220628e-05, "loss": 0.9339, "step": 895 }, { "epoch": 0.01, "learning_rate": 4.983390911506777e-05, "loss": 0.9808, "step": 900 }, { "epoch": 0.01, "learning_rate": 4.9832986387929256e-05, "loss": 0.9423, "step": 905 }, { "epoch": 0.01, "learning_rate": 4.9832063660790743e-05, "loss": 0.967, "step": 910 }, { "epoch": 0.01, "learning_rate": 4.983114093365223e-05, "loss": 0.9697, "step": 915 }, { "epoch": 0.01, "learning_rate": 4.983021820651372e-05, "loss": 1.0483, "step": 920 }, { "epoch": 0.01, "learning_rate": 4.982929547937521e-05, "loss": 0.9211, "step": 925 }, { "epoch": 0.01, "learning_rate": 4.982837275223669e-05, "loss": 0.9467, "step": 930 }, { "epoch": 0.01, "learning_rate": 4.982745002509818e-05, "loss": 0.9999, "step": 935 }, { "epoch": 0.01, "learning_rate": 4.982652729795967e-05, "loss": 0.9345, "step": 940 }, { "epoch": 0.01, "learning_rate": 4.982560457082115e-05, "loss": 0.9662, "step": 945 }, { "epoch": 0.01, "learning_rate": 4.982468184368264e-05, "loss": 1.0115, "step": 950 }, { "epoch": 0.01, "learning_rate": 4.9823759116544134e-05, "loss": 0.9175, "step": 955 }, { "epoch": 0.01, "learning_rate": 4.982283638940562e-05, "loss": 0.9968, "step": 960 }, { "epoch": 0.01, "learning_rate": 4.98219136622671e-05, "loss": 1.0712, "step": 965 }, { "epoch": 0.01, "learning_rate": 4.982099093512859e-05, "loss": 1.0678, "step": 970 }, { "epoch": 0.01, "learning_rate": 4.9820068207990085e-05, "loss": 1.0283, "step": 975 }, { "epoch": 0.01, "learning_rate": 4.981914548085157e-05, "loss": 0.886, "step": 980 }, { "epoch": 0.01, "learning_rate": 4.9818222753713054e-05, "loss": 0.9796, "step": 985 }, { "epoch": 0.01, "learning_rate": 4.981730002657454e-05, "loss": 0.9672, "step": 990 }, { "epoch": 0.01, "learning_rate": 4.981637729943603e-05, "loss": 0.9905, "step": 995 }, { "epoch": 0.01, "learning_rate": 4.981545457229752e-05, "loss": 0.9823, "step": 1000 }, { "epoch": 0.01, "eval_loss": 0.9163055419921875, "eval_runtime": 69.3681, "eval_samples_per_second": 28.832, "eval_steps_per_second": 14.416, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.9814531845159006e-05, "loss": 1.0212, "step": 1005 }, { "epoch": 0.01, "learning_rate": 4.9813609118020494e-05, "loss": 0.9344, "step": 1010 }, { "epoch": 0.01, "learning_rate": 4.981268639088198e-05, "loss": 1.0144, "step": 1015 }, { "epoch": 0.01, "learning_rate": 4.981176366374347e-05, "loss": 0.9369, "step": 1020 }, { "epoch": 0.01, "learning_rate": 4.981084093660496e-05, "loss": 1.0223, "step": 1025 }, { "epoch": 0.01, "learning_rate": 4.9809918209466445e-05, "loss": 0.9232, "step": 1030 }, { "epoch": 0.01, "learning_rate": 4.980899548232793e-05, "loss": 0.9884, "step": 1035 }, { "epoch": 0.01, "learning_rate": 4.980807275518942e-05, "loss": 1.0576, "step": 1040 }, { "epoch": 0.01, "learning_rate": 4.980715002805091e-05, "loss": 0.9989, "step": 1045 }, { "epoch": 0.01, "learning_rate": 4.9806227300912396e-05, "loss": 0.9099, "step": 1050 }, { "epoch": 0.01, "learning_rate": 4.9805304573773884e-05, "loss": 0.8981, "step": 1055 }, { "epoch": 0.01, "learning_rate": 4.980438184663537e-05, "loss": 1.0568, "step": 1060 }, { "epoch": 0.01, "learning_rate": 4.980345911949686e-05, "loss": 0.95, "step": 1065 }, { "epoch": 0.01, "learning_rate": 4.980253639235835e-05, "loss": 0.9245, "step": 1070 }, { "epoch": 0.01, "learning_rate": 4.980161366521983e-05, "loss": 0.9722, "step": 1075 }, { "epoch": 0.01, "learning_rate": 4.980069093808132e-05, "loss": 0.9846, "step": 1080 }, { "epoch": 0.01, "learning_rate": 4.979976821094281e-05, "loss": 0.9681, "step": 1085 }, { "epoch": 0.01, "learning_rate": 4.97988454838043e-05, "loss": 0.9273, "step": 1090 }, { "epoch": 0.01, "learning_rate": 4.979792275666578e-05, "loss": 0.9582, "step": 1095 }, { "epoch": 0.01, "learning_rate": 4.979700002952727e-05, "loss": 0.9246, "step": 1100 }, { "epoch": 0.01, "learning_rate": 4.979607730238876e-05, "loss": 0.9643, "step": 1105 }, { "epoch": 0.01, "learning_rate": 4.9795154575250244e-05, "loss": 0.995, "step": 1110 }, { "epoch": 0.01, "learning_rate": 4.979423184811173e-05, "loss": 1.0119, "step": 1115 }, { "epoch": 0.01, "learning_rate": 4.979330912097322e-05, "loss": 0.9503, "step": 1120 }, { "epoch": 0.01, "learning_rate": 4.9792386393834714e-05, "loss": 0.8601, "step": 1125 }, { "epoch": 0.01, "learning_rate": 4.9791463666696195e-05, "loss": 0.966, "step": 1130 }, { "epoch": 0.01, "learning_rate": 4.979054093955768e-05, "loss": 1.019, "step": 1135 }, { "epoch": 0.01, "learning_rate": 4.978961821241917e-05, "loss": 1.0305, "step": 1140 }, { "epoch": 0.01, "learning_rate": 4.978869548528066e-05, "loss": 0.9977, "step": 1145 }, { "epoch": 0.01, "learning_rate": 4.9787772758142147e-05, "loss": 0.9303, "step": 1150 }, { "epoch": 0.01, "learning_rate": 4.9786850031003634e-05, "loss": 1.0011, "step": 1155 }, { "epoch": 0.01, "learning_rate": 4.978592730386512e-05, "loss": 0.9717, "step": 1160 }, { "epoch": 0.01, "learning_rate": 4.978500457672661e-05, "loss": 0.9862, "step": 1165 }, { "epoch": 0.01, "learning_rate": 4.97840818495881e-05, "loss": 1.0526, "step": 1170 }, { "epoch": 0.01, "learning_rate": 4.9783159122449586e-05, "loss": 0.9647, "step": 1175 }, { "epoch": 0.01, "learning_rate": 4.9782236395311074e-05, "loss": 0.918, "step": 1180 }, { "epoch": 0.01, "learning_rate": 4.9781313668172555e-05, "loss": 0.9985, "step": 1185 }, { "epoch": 0.01, "learning_rate": 4.978039094103405e-05, "loss": 0.9678, "step": 1190 }, { "epoch": 0.01, "learning_rate": 4.977946821389554e-05, "loss": 0.9725, "step": 1195 }, { "epoch": 0.01, "learning_rate": 4.9778545486757025e-05, "loss": 0.965, "step": 1200 }, { "epoch": 0.01, "learning_rate": 4.9777622759618506e-05, "loss": 0.9597, "step": 1205 }, { "epoch": 0.01, "learning_rate": 4.977670003248e-05, "loss": 0.9257, "step": 1210 }, { "epoch": 0.01, "learning_rate": 4.977577730534149e-05, "loss": 0.9117, "step": 1215 }, { "epoch": 0.01, "learning_rate": 4.977485457820297e-05, "loss": 0.9739, "step": 1220 }, { "epoch": 0.01, "learning_rate": 4.977393185106446e-05, "loss": 0.9321, "step": 1225 }, { "epoch": 0.01, "learning_rate": 4.9773009123925945e-05, "loss": 0.9558, "step": 1230 }, { "epoch": 0.01, "learning_rate": 4.977208639678744e-05, "loss": 0.9229, "step": 1235 }, { "epoch": 0.01, "learning_rate": 4.977116366964892e-05, "loss": 0.8697, "step": 1240 }, { "epoch": 0.01, "learning_rate": 4.977024094251041e-05, "loss": 0.8818, "step": 1245 }, { "epoch": 0.01, "learning_rate": 4.97693182153719e-05, "loss": 0.9556, "step": 1250 }, { "epoch": 0.01, "learning_rate": 4.976839548823339e-05, "loss": 0.9473, "step": 1255 }, { "epoch": 0.01, "learning_rate": 4.976747276109487e-05, "loss": 0.9607, "step": 1260 }, { "epoch": 0.01, "learning_rate": 4.976655003395636e-05, "loss": 0.9463, "step": 1265 }, { "epoch": 0.01, "learning_rate": 4.976562730681785e-05, "loss": 0.9805, "step": 1270 }, { "epoch": 0.01, "learning_rate": 4.9764704579679336e-05, "loss": 0.8787, "step": 1275 }, { "epoch": 0.01, "learning_rate": 4.9763781852540824e-05, "loss": 1.0168, "step": 1280 }, { "epoch": 0.01, "learning_rate": 4.976285912540231e-05, "loss": 0.9632, "step": 1285 }, { "epoch": 0.01, "learning_rate": 4.97619363982638e-05, "loss": 1.0005, "step": 1290 }, { "epoch": 0.01, "learning_rate": 4.976101367112528e-05, "loss": 0.9415, "step": 1295 }, { "epoch": 0.01, "learning_rate": 4.9760090943986775e-05, "loss": 0.9197, "step": 1300 }, { "epoch": 0.01, "learning_rate": 4.975916821684826e-05, "loss": 1.0212, "step": 1305 }, { "epoch": 0.01, "learning_rate": 4.975824548970975e-05, "loss": 0.9405, "step": 1310 }, { "epoch": 0.01, "learning_rate": 4.975732276257123e-05, "loss": 0.9399, "step": 1315 }, { "epoch": 0.01, "learning_rate": 4.975640003543273e-05, "loss": 0.9238, "step": 1320 }, { "epoch": 0.01, "learning_rate": 4.9755477308294215e-05, "loss": 0.9159, "step": 1325 }, { "epoch": 0.01, "learning_rate": 4.97545545811557e-05, "loss": 0.923, "step": 1330 }, { "epoch": 0.01, "learning_rate": 4.9753631854017183e-05, "loss": 0.9468, "step": 1335 }, { "epoch": 0.01, "learning_rate": 4.975270912687868e-05, "loss": 1.0371, "step": 1340 }, { "epoch": 0.01, "learning_rate": 4.9751786399740166e-05, "loss": 0.8419, "step": 1345 }, { "epoch": 0.01, "learning_rate": 4.975086367260165e-05, "loss": 0.8929, "step": 1350 }, { "epoch": 0.02, "learning_rate": 4.9749940945463135e-05, "loss": 0.9852, "step": 1355 }, { "epoch": 0.02, "learning_rate": 4.974901821832463e-05, "loss": 0.9332, "step": 1360 }, { "epoch": 0.02, "learning_rate": 4.974809549118612e-05, "loss": 0.9442, "step": 1365 }, { "epoch": 0.02, "learning_rate": 4.97471727640476e-05, "loss": 0.9414, "step": 1370 }, { "epoch": 0.02, "learning_rate": 4.9746250036909086e-05, "loss": 0.9526, "step": 1375 }, { "epoch": 0.02, "learning_rate": 4.9745327309770574e-05, "loss": 0.9654, "step": 1380 }, { "epoch": 0.02, "learning_rate": 4.974440458263206e-05, "loss": 0.9155, "step": 1385 }, { "epoch": 0.02, "learning_rate": 4.974348185549355e-05, "loss": 0.914, "step": 1390 }, { "epoch": 0.02, "learning_rate": 4.974255912835504e-05, "loss": 1.0817, "step": 1395 }, { "epoch": 0.02, "learning_rate": 4.9741636401216525e-05, "loss": 0.9778, "step": 1400 }, { "epoch": 0.02, "learning_rate": 4.974071367407801e-05, "loss": 0.9294, "step": 1405 }, { "epoch": 0.02, "learning_rate": 4.97397909469395e-05, "loss": 0.9869, "step": 1410 }, { "epoch": 0.02, "learning_rate": 4.973886821980099e-05, "loss": 1.0016, "step": 1415 }, { "epoch": 0.02, "learning_rate": 4.973794549266248e-05, "loss": 0.9656, "step": 1420 }, { "epoch": 0.02, "learning_rate": 4.9737022765523965e-05, "loss": 0.9673, "step": 1425 }, { "epoch": 0.02, "learning_rate": 4.973610003838545e-05, "loss": 1.0154, "step": 1430 }, { "epoch": 0.02, "learning_rate": 4.973517731124694e-05, "loss": 0.9812, "step": 1435 }, { "epoch": 0.02, "learning_rate": 4.973425458410843e-05, "loss": 0.9212, "step": 1440 }, { "epoch": 0.02, "learning_rate": 4.973333185696991e-05, "loss": 0.9743, "step": 1445 }, { "epoch": 0.02, "learning_rate": 4.9732409129831404e-05, "loss": 0.91, "step": 1450 }, { "epoch": 0.02, "learning_rate": 4.973148640269289e-05, "loss": 0.9271, "step": 1455 }, { "epoch": 0.02, "learning_rate": 4.973056367555437e-05, "loss": 0.9351, "step": 1460 }, { "epoch": 0.02, "learning_rate": 4.972964094841586e-05, "loss": 1.0561, "step": 1465 }, { "epoch": 0.02, "learning_rate": 4.9728718221277355e-05, "loss": 0.9273, "step": 1470 }, { "epoch": 0.02, "learning_rate": 4.972779549413884e-05, "loss": 1.0007, "step": 1475 }, { "epoch": 0.02, "learning_rate": 4.9726872767000324e-05, "loss": 0.9116, "step": 1480 }, { "epoch": 0.02, "learning_rate": 4.972595003986181e-05, "loss": 1.0223, "step": 1485 }, { "epoch": 0.02, "learning_rate": 4.972502731272331e-05, "loss": 0.9844, "step": 1490 }, { "epoch": 0.02, "learning_rate": 4.972410458558479e-05, "loss": 0.995, "step": 1495 }, { "epoch": 0.02, "learning_rate": 4.9723181858446276e-05, "loss": 1.0208, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.9722259131307764e-05, "loss": 0.9638, "step": 1505 }, { "epoch": 0.02, "learning_rate": 4.972133640416926e-05, "loss": 0.99, "step": 1510 }, { "epoch": 0.02, "learning_rate": 4.972041367703074e-05, "loss": 0.9295, "step": 1515 }, { "epoch": 0.02, "learning_rate": 4.971949094989223e-05, "loss": 0.9179, "step": 1520 }, { "epoch": 0.02, "learning_rate": 4.9718568222753715e-05, "loss": 0.9931, "step": 1525 }, { "epoch": 0.02, "learning_rate": 4.97176454956152e-05, "loss": 0.9537, "step": 1530 }, { "epoch": 0.02, "learning_rate": 4.971672276847669e-05, "loss": 0.9223, "step": 1535 }, { "epoch": 0.02, "learning_rate": 4.971580004133818e-05, "loss": 0.9609, "step": 1540 }, { "epoch": 0.02, "learning_rate": 4.9714877314199666e-05, "loss": 0.9262, "step": 1545 }, { "epoch": 0.02, "learning_rate": 4.9713954587061154e-05, "loss": 0.9042, "step": 1550 }, { "epoch": 0.02, "learning_rate": 4.971303185992264e-05, "loss": 0.879, "step": 1555 }, { "epoch": 0.02, "learning_rate": 4.971210913278413e-05, "loss": 0.9784, "step": 1560 }, { "epoch": 0.02, "learning_rate": 4.971118640564562e-05, "loss": 0.9631, "step": 1565 }, { "epoch": 0.02, "learning_rate": 4.97102636785071e-05, "loss": 0.9338, "step": 1570 }, { "epoch": 0.02, "learning_rate": 4.9709340951368593e-05, "loss": 0.9788, "step": 1575 }, { "epoch": 0.02, "learning_rate": 4.970841822423008e-05, "loss": 0.9767, "step": 1580 }, { "epoch": 0.02, "learning_rate": 4.970749549709157e-05, "loss": 0.9496, "step": 1585 }, { "epoch": 0.02, "learning_rate": 4.970657276995305e-05, "loss": 0.9349, "step": 1590 }, { "epoch": 0.02, "learning_rate": 4.970565004281454e-05, "loss": 0.9666, "step": 1595 }, { "epoch": 0.02, "learning_rate": 4.970472731567603e-05, "loss": 0.9572, "step": 1600 }, { "epoch": 0.02, "learning_rate": 4.9703804588537514e-05, "loss": 0.897, "step": 1605 }, { "epoch": 0.02, "learning_rate": 4.9702881861399e-05, "loss": 0.9582, "step": 1610 }, { "epoch": 0.02, "learning_rate": 4.970195913426049e-05, "loss": 0.9802, "step": 1615 }, { "epoch": 0.02, "learning_rate": 4.9701036407121984e-05, "loss": 0.9707, "step": 1620 }, { "epoch": 0.02, "learning_rate": 4.9700113679983465e-05, "loss": 1.0545, "step": 1625 }, { "epoch": 0.02, "learning_rate": 4.969919095284495e-05, "loss": 1.0397, "step": 1630 }, { "epoch": 0.02, "learning_rate": 4.969826822570644e-05, "loss": 0.9567, "step": 1635 }, { "epoch": 0.02, "learning_rate": 4.9697345498567935e-05, "loss": 0.9651, "step": 1640 }, { "epoch": 0.02, "learning_rate": 4.9696422771429417e-05, "loss": 0.8989, "step": 1645 }, { "epoch": 0.02, "learning_rate": 4.9695500044290904e-05, "loss": 0.9712, "step": 1650 }, { "epoch": 0.02, "learning_rate": 4.969457731715239e-05, "loss": 0.9457, "step": 1655 }, { "epoch": 0.02, "learning_rate": 4.969365459001388e-05, "loss": 0.8902, "step": 1660 }, { "epoch": 0.02, "learning_rate": 4.969273186287537e-05, "loss": 0.9271, "step": 1665 }, { "epoch": 0.02, "learning_rate": 4.9691809135736856e-05, "loss": 0.9742, "step": 1670 }, { "epoch": 0.02, "learning_rate": 4.9690886408598344e-05, "loss": 0.8781, "step": 1675 }, { "epoch": 0.02, "learning_rate": 4.9689963681459825e-05, "loss": 0.9409, "step": 1680 }, { "epoch": 0.02, "learning_rate": 4.968904095432132e-05, "loss": 0.8848, "step": 1685 }, { "epoch": 0.02, "learning_rate": 4.968811822718281e-05, "loss": 0.9564, "step": 1690 }, { "epoch": 0.02, "learning_rate": 4.9687195500044295e-05, "loss": 0.9187, "step": 1695 }, { "epoch": 0.02, "learning_rate": 4.9686272772905776e-05, "loss": 0.9254, "step": 1700 }, { "epoch": 0.02, "learning_rate": 4.968535004576727e-05, "loss": 0.9735, "step": 1705 }, { "epoch": 0.02, "learning_rate": 4.968442731862876e-05, "loss": 0.9436, "step": 1710 }, { "epoch": 0.02, "learning_rate": 4.9683504591490246e-05, "loss": 0.9722, "step": 1715 }, { "epoch": 0.02, "learning_rate": 4.968258186435173e-05, "loss": 1.0297, "step": 1720 }, { "epoch": 0.02, "learning_rate": 4.968165913721322e-05, "loss": 1.032, "step": 1725 }, { "epoch": 0.02, "learning_rate": 4.968073641007471e-05, "loss": 0.9525, "step": 1730 }, { "epoch": 0.02, "learning_rate": 4.967981368293619e-05, "loss": 0.9169, "step": 1735 }, { "epoch": 0.02, "learning_rate": 4.967889095579768e-05, "loss": 0.9302, "step": 1740 }, { "epoch": 0.02, "learning_rate": 4.9677968228659173e-05, "loss": 0.9729, "step": 1745 }, { "epoch": 0.02, "learning_rate": 4.967704550152066e-05, "loss": 0.987, "step": 1750 }, { "epoch": 0.02, "learning_rate": 4.967612277438214e-05, "loss": 1.0023, "step": 1755 }, { "epoch": 0.02, "learning_rate": 4.967520004724363e-05, "loss": 0.9451, "step": 1760 }, { "epoch": 0.02, "learning_rate": 4.967427732010512e-05, "loss": 0.8672, "step": 1765 }, { "epoch": 0.02, "learning_rate": 4.9673354592966606e-05, "loss": 0.9136, "step": 1770 }, { "epoch": 0.02, "learning_rate": 4.9672431865828094e-05, "loss": 0.9866, "step": 1775 }, { "epoch": 0.02, "learning_rate": 4.967150913868958e-05, "loss": 0.9476, "step": 1780 }, { "epoch": 0.02, "learning_rate": 4.967058641155107e-05, "loss": 0.9513, "step": 1785 }, { "epoch": 0.02, "learning_rate": 4.966966368441256e-05, "loss": 0.8406, "step": 1790 }, { "epoch": 0.02, "learning_rate": 4.9668740957274045e-05, "loss": 0.9794, "step": 1795 }, { "epoch": 0.02, "learning_rate": 4.966781823013553e-05, "loss": 0.9559, "step": 1800 }, { "epoch": 0.02, "learning_rate": 4.966689550299702e-05, "loss": 0.9934, "step": 1805 }, { "epoch": 0.02, "learning_rate": 4.966597277585851e-05, "loss": 0.8781, "step": 1810 }, { "epoch": 0.02, "learning_rate": 4.9665050048719997e-05, "loss": 0.9544, "step": 1815 }, { "epoch": 0.02, "learning_rate": 4.9664127321581484e-05, "loss": 0.9348, "step": 1820 }, { "epoch": 0.02, "learning_rate": 4.966320459444297e-05, "loss": 0.9189, "step": 1825 }, { "epoch": 0.02, "learning_rate": 4.966228186730445e-05, "loss": 0.9645, "step": 1830 }, { "epoch": 0.02, "learning_rate": 4.966135914016595e-05, "loss": 0.9999, "step": 1835 }, { "epoch": 0.02, "learning_rate": 4.9660436413027436e-05, "loss": 0.9977, "step": 1840 }, { "epoch": 0.02, "learning_rate": 4.965951368588892e-05, "loss": 0.9563, "step": 1845 }, { "epoch": 0.02, "learning_rate": 4.9658590958750405e-05, "loss": 0.894, "step": 1850 }, { "epoch": 0.02, "learning_rate": 4.96576682316119e-05, "loss": 0.9486, "step": 1855 }, { "epoch": 0.02, "learning_rate": 4.965674550447339e-05, "loss": 0.9479, "step": 1860 }, { "epoch": 0.02, "learning_rate": 4.965582277733487e-05, "loss": 0.8957, "step": 1865 }, { "epoch": 0.02, "learning_rate": 4.9654900050196356e-05, "loss": 0.9121, "step": 1870 }, { "epoch": 0.02, "learning_rate": 4.965397732305785e-05, "loss": 0.9519, "step": 1875 }, { "epoch": 0.02, "learning_rate": 4.965305459591933e-05, "loss": 0.9467, "step": 1880 }, { "epoch": 0.02, "learning_rate": 4.965213186878082e-05, "loss": 0.9377, "step": 1885 }, { "epoch": 0.02, "learning_rate": 4.965120914164231e-05, "loss": 0.9199, "step": 1890 }, { "epoch": 0.02, "learning_rate": 4.96502864145038e-05, "loss": 0.9834, "step": 1895 }, { "epoch": 0.02, "learning_rate": 4.964936368736528e-05, "loss": 0.9413, "step": 1900 }, { "epoch": 0.02, "learning_rate": 4.964844096022677e-05, "loss": 1.0098, "step": 1905 }, { "epoch": 0.02, "learning_rate": 4.964751823308826e-05, "loss": 0.9179, "step": 1910 }, { "epoch": 0.02, "learning_rate": 4.964659550594975e-05, "loss": 0.8882, "step": 1915 }, { "epoch": 0.02, "learning_rate": 4.9645672778811235e-05, "loss": 0.9114, "step": 1920 }, { "epoch": 0.02, "learning_rate": 4.964475005167272e-05, "loss": 0.8972, "step": 1925 }, { "epoch": 0.02, "learning_rate": 4.964382732453421e-05, "loss": 0.9269, "step": 1930 }, { "epoch": 0.02, "learning_rate": 4.96429045973957e-05, "loss": 0.9646, "step": 1935 }, { "epoch": 0.02, "learning_rate": 4.9641981870257186e-05, "loss": 0.8999, "step": 1940 }, { "epoch": 0.02, "learning_rate": 4.9641059143118674e-05, "loss": 0.9176, "step": 1945 }, { "epoch": 0.02, "learning_rate": 4.964013641598016e-05, "loss": 0.925, "step": 1950 }, { "epoch": 0.02, "learning_rate": 4.963921368884164e-05, "loss": 0.968, "step": 1955 }, { "epoch": 0.02, "learning_rate": 4.963829096170314e-05, "loss": 0.9234, "step": 1960 }, { "epoch": 0.02, "learning_rate": 4.9637368234564625e-05, "loss": 0.8557, "step": 1965 }, { "epoch": 0.02, "learning_rate": 4.963644550742611e-05, "loss": 0.8902, "step": 1970 }, { "epoch": 0.02, "learning_rate": 4.9635522780287594e-05, "loss": 0.8784, "step": 1975 }, { "epoch": 0.02, "learning_rate": 4.963460005314908e-05, "loss": 0.9618, "step": 1980 }, { "epoch": 0.02, "learning_rate": 4.963367732601058e-05, "loss": 0.9345, "step": 1985 }, { "epoch": 0.02, "learning_rate": 4.963275459887206e-05, "loss": 0.8647, "step": 1990 }, { "epoch": 0.02, "learning_rate": 4.9631831871733546e-05, "loss": 0.9338, "step": 1995 }, { "epoch": 0.02, "learning_rate": 4.9630909144595033e-05, "loss": 0.8817, "step": 2000 }, { "epoch": 0.02, "eval_loss": 0.902228593826294, "eval_runtime": 69.4784, "eval_samples_per_second": 28.786, "eval_steps_per_second": 14.393, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.962998641745653e-05, "loss": 0.9763, "step": 2005 }, { "epoch": 0.02, "learning_rate": 4.962906369031801e-05, "loss": 0.9369, "step": 2010 }, { "epoch": 0.02, "learning_rate": 4.96281409631795e-05, "loss": 0.9301, "step": 2015 }, { "epoch": 0.02, "learning_rate": 4.9627218236040985e-05, "loss": 0.9339, "step": 2020 }, { "epoch": 0.02, "learning_rate": 4.962629550890248e-05, "loss": 0.9933, "step": 2025 }, { "epoch": 0.02, "learning_rate": 4.962537278176396e-05, "loss": 0.9727, "step": 2030 }, { "epoch": 0.02, "learning_rate": 4.962445005462545e-05, "loss": 0.9612, "step": 2035 }, { "epoch": 0.02, "learning_rate": 4.9623527327486936e-05, "loss": 0.9564, "step": 2040 }, { "epoch": 0.02, "learning_rate": 4.9622604600348424e-05, "loss": 0.9767, "step": 2045 }, { "epoch": 0.02, "learning_rate": 4.962168187320991e-05, "loss": 0.9219, "step": 2050 }, { "epoch": 0.02, "learning_rate": 4.96207591460714e-05, "loss": 1.0031, "step": 2055 }, { "epoch": 0.02, "learning_rate": 4.961983641893289e-05, "loss": 0.9013, "step": 2060 }, { "epoch": 0.02, "learning_rate": 4.961891369179437e-05, "loss": 0.9349, "step": 2065 }, { "epoch": 0.02, "learning_rate": 4.961799096465586e-05, "loss": 0.9588, "step": 2070 }, { "epoch": 0.02, "learning_rate": 4.961706823751735e-05, "loss": 0.933, "step": 2075 }, { "epoch": 0.02, "learning_rate": 4.961614551037884e-05, "loss": 0.9897, "step": 2080 }, { "epoch": 0.02, "learning_rate": 4.961522278324032e-05, "loss": 0.8893, "step": 2085 }, { "epoch": 0.02, "learning_rate": 4.9614300056101815e-05, "loss": 0.9394, "step": 2090 }, { "epoch": 0.02, "learning_rate": 4.96133773289633e-05, "loss": 0.9936, "step": 2095 }, { "epoch": 0.02, "learning_rate": 4.961245460182479e-05, "loss": 0.9259, "step": 2100 }, { "epoch": 0.02, "learning_rate": 4.961153187468627e-05, "loss": 0.9478, "step": 2105 }, { "epoch": 0.02, "learning_rate": 4.9610609147547766e-05, "loss": 0.9522, "step": 2110 }, { "epoch": 0.02, "learning_rate": 4.9609686420409254e-05, "loss": 0.9385, "step": 2115 }, { "epoch": 0.02, "learning_rate": 4.9608763693270735e-05, "loss": 0.9151, "step": 2120 }, { "epoch": 0.02, "learning_rate": 4.960784096613222e-05, "loss": 0.9716, "step": 2125 }, { "epoch": 0.02, "learning_rate": 4.960691823899371e-05, "loss": 0.9874, "step": 2130 }, { "epoch": 0.02, "learning_rate": 4.9605995511855205e-05, "loss": 0.9258, "step": 2135 }, { "epoch": 0.02, "learning_rate": 4.9605072784716686e-05, "loss": 0.8855, "step": 2140 }, { "epoch": 0.02, "learning_rate": 4.9604150057578174e-05, "loss": 0.9252, "step": 2145 }, { "epoch": 0.02, "learning_rate": 4.960322733043966e-05, "loss": 0.9703, "step": 2150 }, { "epoch": 0.02, "learning_rate": 4.960230460330115e-05, "loss": 0.9375, "step": 2155 }, { "epoch": 0.02, "learning_rate": 4.960138187616264e-05, "loss": 0.9382, "step": 2160 }, { "epoch": 0.02, "learning_rate": 4.9600459149024126e-05, "loss": 0.9805, "step": 2165 }, { "epoch": 0.02, "learning_rate": 4.9599536421885614e-05, "loss": 0.9462, "step": 2170 }, { "epoch": 0.02, "learning_rate": 4.95986136947471e-05, "loss": 0.8737, "step": 2175 }, { "epoch": 0.02, "learning_rate": 4.959769096760859e-05, "loss": 0.9045, "step": 2180 }, { "epoch": 0.02, "learning_rate": 4.959676824047008e-05, "loss": 1.0564, "step": 2185 }, { "epoch": 0.02, "learning_rate": 4.9595845513331565e-05, "loss": 0.9327, "step": 2190 }, { "epoch": 0.02, "learning_rate": 4.959492278619305e-05, "loss": 0.9536, "step": 2195 }, { "epoch": 0.02, "learning_rate": 4.959400005905454e-05, "loss": 0.9314, "step": 2200 }, { "epoch": 0.02, "learning_rate": 4.959307733191603e-05, "loss": 0.9502, "step": 2205 }, { "epoch": 0.02, "learning_rate": 4.9592154604777516e-05, "loss": 0.9507, "step": 2210 }, { "epoch": 0.02, "learning_rate": 4.9591231877639e-05, "loss": 0.883, "step": 2215 }, { "epoch": 0.02, "learning_rate": 4.959030915050049e-05, "loss": 0.9431, "step": 2220 }, { "epoch": 0.02, "learning_rate": 4.958938642336198e-05, "loss": 0.9667, "step": 2225 }, { "epoch": 0.02, "learning_rate": 4.958846369622346e-05, "loss": 0.9382, "step": 2230 }, { "epoch": 0.02, "learning_rate": 4.958754096908495e-05, "loss": 0.9669, "step": 2235 }, { "epoch": 0.02, "learning_rate": 4.9586618241946443e-05, "loss": 0.9516, "step": 2240 }, { "epoch": 0.02, "learning_rate": 4.958569551480793e-05, "loss": 0.9688, "step": 2245 }, { "epoch": 0.02, "learning_rate": 4.958477278766941e-05, "loss": 0.925, "step": 2250 }, { "epoch": 0.02, "learning_rate": 4.95838500605309e-05, "loss": 0.9376, "step": 2255 }, { "epoch": 0.03, "learning_rate": 4.9582927333392395e-05, "loss": 0.8851, "step": 2260 }, { "epoch": 0.03, "learning_rate": 4.9582004606253876e-05, "loss": 0.8848, "step": 2265 }, { "epoch": 0.03, "learning_rate": 4.9581081879115364e-05, "loss": 0.8766, "step": 2270 }, { "epoch": 0.03, "learning_rate": 4.958015915197685e-05, "loss": 0.969, "step": 2275 }, { "epoch": 0.03, "learning_rate": 4.957923642483834e-05, "loss": 0.9547, "step": 2280 }, { "epoch": 0.03, "learning_rate": 4.957831369769983e-05, "loss": 0.8766, "step": 2285 }, { "epoch": 0.03, "learning_rate": 4.9577390970561315e-05, "loss": 0.9174, "step": 2290 }, { "epoch": 0.03, "learning_rate": 4.95764682434228e-05, "loss": 0.9502, "step": 2295 }, { "epoch": 0.03, "learning_rate": 4.957554551628429e-05, "loss": 0.8659, "step": 2300 }, { "epoch": 0.03, "learning_rate": 4.957462278914578e-05, "loss": 0.9744, "step": 2305 }, { "epoch": 0.03, "learning_rate": 4.9573700062007267e-05, "loss": 0.9683, "step": 2310 }, { "epoch": 0.03, "learning_rate": 4.9572777334868754e-05, "loss": 0.8697, "step": 2315 }, { "epoch": 0.03, "learning_rate": 4.957185460773024e-05, "loss": 0.8883, "step": 2320 }, { "epoch": 0.03, "learning_rate": 4.957093188059173e-05, "loss": 0.9244, "step": 2325 }, { "epoch": 0.03, "learning_rate": 4.957000915345322e-05, "loss": 0.955, "step": 2330 }, { "epoch": 0.03, "learning_rate": 4.9569086426314706e-05, "loss": 0.8663, "step": 2335 }, { "epoch": 0.03, "learning_rate": 4.956816369917619e-05, "loss": 0.9201, "step": 2340 }, { "epoch": 0.03, "learning_rate": 4.956724097203768e-05, "loss": 0.9217, "step": 2345 }, { "epoch": 0.03, "learning_rate": 4.956631824489917e-05, "loss": 0.9242, "step": 2350 }, { "epoch": 0.03, "learning_rate": 4.956539551776066e-05, "loss": 0.879, "step": 2355 }, { "epoch": 0.03, "learning_rate": 4.956447279062214e-05, "loss": 0.9512, "step": 2360 }, { "epoch": 0.03, "learning_rate": 4.9563550063483626e-05, "loss": 0.9428, "step": 2365 }, { "epoch": 0.03, "learning_rate": 4.956262733634512e-05, "loss": 0.975, "step": 2370 }, { "epoch": 0.03, "learning_rate": 4.95617046092066e-05, "loss": 0.9477, "step": 2375 }, { "epoch": 0.03, "learning_rate": 4.956078188206809e-05, "loss": 0.9455, "step": 2380 }, { "epoch": 0.03, "learning_rate": 4.955985915492958e-05, "loss": 0.9657, "step": 2385 }, { "epoch": 0.03, "learning_rate": 4.955893642779107e-05, "loss": 0.9588, "step": 2390 }, { "epoch": 0.03, "learning_rate": 4.955801370065255e-05, "loss": 0.948, "step": 2395 }, { "epoch": 0.03, "learning_rate": 4.955709097351404e-05, "loss": 0.8793, "step": 2400 }, { "epoch": 0.03, "learning_rate": 4.955616824637553e-05, "loss": 0.9066, "step": 2405 }, { "epoch": 0.03, "learning_rate": 4.9555245519237023e-05, "loss": 0.9042, "step": 2410 }, { "epoch": 0.03, "learning_rate": 4.9554322792098505e-05, "loss": 0.9603, "step": 2415 }, { "epoch": 0.03, "learning_rate": 4.955340006495999e-05, "loss": 0.9223, "step": 2420 }, { "epoch": 0.03, "learning_rate": 4.955247733782148e-05, "loss": 0.9303, "step": 2425 }, { "epoch": 0.03, "learning_rate": 4.955155461068297e-05, "loss": 0.8921, "step": 2430 }, { "epoch": 0.03, "learning_rate": 4.9550631883544456e-05, "loss": 0.9227, "step": 2435 }, { "epoch": 0.03, "learning_rate": 4.9549709156405944e-05, "loss": 0.9504, "step": 2440 }, { "epoch": 0.03, "learning_rate": 4.954878642926743e-05, "loss": 0.9857, "step": 2445 }, { "epoch": 0.03, "learning_rate": 4.954786370212891e-05, "loss": 0.8837, "step": 2450 }, { "epoch": 0.03, "learning_rate": 4.954694097499041e-05, "loss": 0.9707, "step": 2455 }, { "epoch": 0.03, "learning_rate": 4.9546018247851895e-05, "loss": 0.8469, "step": 2460 }, { "epoch": 0.03, "learning_rate": 4.954509552071338e-05, "loss": 0.9527, "step": 2465 }, { "epoch": 0.03, "learning_rate": 4.9544172793574864e-05, "loss": 0.9632, "step": 2470 }, { "epoch": 0.03, "learning_rate": 4.954325006643636e-05, "loss": 0.9213, "step": 2475 }, { "epoch": 0.03, "learning_rate": 4.9542327339297847e-05, "loss": 0.8841, "step": 2480 }, { "epoch": 0.03, "learning_rate": 4.9541404612159334e-05, "loss": 0.9003, "step": 2485 }, { "epoch": 0.03, "learning_rate": 4.9540481885020816e-05, "loss": 0.9348, "step": 2490 }, { "epoch": 0.03, "learning_rate": 4.953955915788231e-05, "loss": 0.8818, "step": 2495 }, { "epoch": 0.03, "learning_rate": 4.95386364307438e-05, "loss": 0.93, "step": 2500 }, { "epoch": 0.03, "learning_rate": 4.953771370360528e-05, "loss": 0.9747, "step": 2505 }, { "epoch": 0.03, "learning_rate": 4.953679097646677e-05, "loss": 0.9829, "step": 2510 }, { "epoch": 0.03, "learning_rate": 4.9535868249328255e-05, "loss": 0.9788, "step": 2515 }, { "epoch": 0.03, "learning_rate": 4.953494552218975e-05, "loss": 0.9139, "step": 2520 }, { "epoch": 0.03, "learning_rate": 4.953402279505123e-05, "loss": 0.8658, "step": 2525 }, { "epoch": 0.03, "learning_rate": 4.953310006791272e-05, "loss": 0.9056, "step": 2530 }, { "epoch": 0.03, "learning_rate": 4.9532177340774206e-05, "loss": 0.9618, "step": 2535 }, { "epoch": 0.03, "learning_rate": 4.9531254613635694e-05, "loss": 0.9547, "step": 2540 }, { "epoch": 0.03, "learning_rate": 4.953033188649718e-05, "loss": 0.9248, "step": 2545 }, { "epoch": 0.03, "learning_rate": 4.952940915935867e-05, "loss": 0.9729, "step": 2550 }, { "epoch": 0.03, "learning_rate": 4.952848643222016e-05, "loss": 0.9183, "step": 2555 }, { "epoch": 0.03, "learning_rate": 4.9527563705081645e-05, "loss": 0.9498, "step": 2560 }, { "epoch": 0.03, "learning_rate": 4.952664097794313e-05, "loss": 0.9322, "step": 2565 }, { "epoch": 0.03, "learning_rate": 4.952571825080462e-05, "loss": 0.9511, "step": 2570 }, { "epoch": 0.03, "learning_rate": 4.952479552366611e-05, "loss": 0.8613, "step": 2575 }, { "epoch": 0.03, "learning_rate": 4.95238727965276e-05, "loss": 0.9468, "step": 2580 }, { "epoch": 0.03, "learning_rate": 4.9522950069389085e-05, "loss": 0.9009, "step": 2585 }, { "epoch": 0.03, "learning_rate": 4.952202734225057e-05, "loss": 0.8722, "step": 2590 }, { "epoch": 0.03, "learning_rate": 4.952110461511206e-05, "loss": 0.9285, "step": 2595 }, { "epoch": 0.03, "learning_rate": 4.952018188797354e-05, "loss": 0.9011, "step": 2600 }, { "epoch": 0.03, "learning_rate": 4.9519259160835036e-05, "loss": 0.9072, "step": 2605 }, { "epoch": 0.03, "learning_rate": 4.9518336433696524e-05, "loss": 0.9014, "step": 2610 }, { "epoch": 0.03, "learning_rate": 4.9517413706558005e-05, "loss": 0.9301, "step": 2615 }, { "epoch": 0.03, "learning_rate": 4.951649097941949e-05, "loss": 0.8798, "step": 2620 }, { "epoch": 0.03, "learning_rate": 4.951556825228099e-05, "loss": 0.9559, "step": 2625 }, { "epoch": 0.03, "learning_rate": 4.9514645525142475e-05, "loss": 0.9431, "step": 2630 }, { "epoch": 0.03, "learning_rate": 4.9513722798003956e-05, "loss": 0.9399, "step": 2635 }, { "epoch": 0.03, "learning_rate": 4.9512800070865444e-05, "loss": 0.9535, "step": 2640 }, { "epoch": 0.03, "learning_rate": 4.951187734372694e-05, "loss": 0.9377, "step": 2645 }, { "epoch": 0.03, "learning_rate": 4.951095461658842e-05, "loss": 0.9307, "step": 2650 }, { "epoch": 0.03, "learning_rate": 4.951003188944991e-05, "loss": 0.8665, "step": 2655 }, { "epoch": 0.03, "learning_rate": 4.9509109162311396e-05, "loss": 0.9796, "step": 2660 }, { "epoch": 0.03, "learning_rate": 4.9508186435172883e-05, "loss": 0.9979, "step": 2665 }, { "epoch": 0.03, "learning_rate": 4.950726370803437e-05, "loss": 0.9009, "step": 2670 }, { "epoch": 0.03, "learning_rate": 4.950634098089586e-05, "loss": 0.8862, "step": 2675 }, { "epoch": 0.03, "learning_rate": 4.950541825375735e-05, "loss": 0.912, "step": 2680 }, { "epoch": 0.03, "learning_rate": 4.9504495526618835e-05, "loss": 0.9306, "step": 2685 }, { "epoch": 0.03, "learning_rate": 4.950357279948032e-05, "loss": 0.8745, "step": 2690 }, { "epoch": 0.03, "learning_rate": 4.950265007234181e-05, "loss": 0.9624, "step": 2695 }, { "epoch": 0.03, "learning_rate": 4.95017273452033e-05, "loss": 0.9508, "step": 2700 }, { "epoch": 0.03, "learning_rate": 4.9500804618064786e-05, "loss": 0.9161, "step": 2705 }, { "epoch": 0.03, "learning_rate": 4.9499881890926274e-05, "loss": 0.9256, "step": 2710 }, { "epoch": 0.03, "learning_rate": 4.949895916378776e-05, "loss": 0.8713, "step": 2715 }, { "epoch": 0.03, "learning_rate": 4.949803643664925e-05, "loss": 0.9489, "step": 2720 }, { "epoch": 0.03, "learning_rate": 4.949711370951073e-05, "loss": 0.8723, "step": 2725 }, { "epoch": 0.03, "learning_rate": 4.9496190982372225e-05, "loss": 0.9667, "step": 2730 }, { "epoch": 0.03, "learning_rate": 4.949526825523371e-05, "loss": 0.8552, "step": 2735 }, { "epoch": 0.03, "learning_rate": 4.94943455280952e-05, "loss": 0.9299, "step": 2740 }, { "epoch": 0.03, "learning_rate": 4.949342280095668e-05, "loss": 0.8918, "step": 2745 }, { "epoch": 0.03, "learning_rate": 4.949250007381817e-05, "loss": 0.9717, "step": 2750 }, { "epoch": 0.03, "learning_rate": 4.9491577346679665e-05, "loss": 0.927, "step": 2755 }, { "epoch": 0.03, "learning_rate": 4.9490654619541146e-05, "loss": 0.9151, "step": 2760 }, { "epoch": 0.03, "learning_rate": 4.9489731892402634e-05, "loss": 0.8908, "step": 2765 }, { "epoch": 0.03, "learning_rate": 4.948880916526412e-05, "loss": 0.8946, "step": 2770 }, { "epoch": 0.03, "learning_rate": 4.9487886438125616e-05, "loss": 0.8898, "step": 2775 }, { "epoch": 0.03, "learning_rate": 4.94869637109871e-05, "loss": 0.9873, "step": 2780 }, { "epoch": 0.03, "learning_rate": 4.9486040983848585e-05, "loss": 0.9226, "step": 2785 }, { "epoch": 0.03, "learning_rate": 4.948511825671007e-05, "loss": 0.9297, "step": 2790 }, { "epoch": 0.03, "learning_rate": 4.948419552957157e-05, "loss": 0.8768, "step": 2795 }, { "epoch": 0.03, "learning_rate": 4.948327280243305e-05, "loss": 0.9459, "step": 2800 }, { "epoch": 0.03, "learning_rate": 4.9482350075294536e-05, "loss": 0.8728, "step": 2805 }, { "epoch": 0.03, "learning_rate": 4.9481427348156024e-05, "loss": 0.8828, "step": 2810 }, { "epoch": 0.03, "learning_rate": 4.948050462101751e-05, "loss": 0.9468, "step": 2815 }, { "epoch": 0.03, "learning_rate": 4.9479581893879e-05, "loss": 0.9054, "step": 2820 }, { "epoch": 0.03, "learning_rate": 4.947865916674049e-05, "loss": 0.9677, "step": 2825 }, { "epoch": 0.03, "learning_rate": 4.9477736439601976e-05, "loss": 0.9217, "step": 2830 }, { "epoch": 0.03, "learning_rate": 4.947681371246346e-05, "loss": 0.8946, "step": 2835 }, { "epoch": 0.03, "learning_rate": 4.947589098532495e-05, "loss": 0.9658, "step": 2840 }, { "epoch": 0.03, "learning_rate": 4.947496825818644e-05, "loss": 0.89, "step": 2845 }, { "epoch": 0.03, "learning_rate": 4.947404553104793e-05, "loss": 0.8996, "step": 2850 }, { "epoch": 0.03, "learning_rate": 4.947312280390941e-05, "loss": 0.9191, "step": 2855 }, { "epoch": 0.03, "learning_rate": 4.94722000767709e-05, "loss": 0.9102, "step": 2860 }, { "epoch": 0.03, "learning_rate": 4.947127734963239e-05, "loss": 0.9337, "step": 2865 }, { "epoch": 0.03, "learning_rate": 4.947035462249388e-05, "loss": 0.9256, "step": 2870 }, { "epoch": 0.03, "learning_rate": 4.946943189535536e-05, "loss": 0.9237, "step": 2875 }, { "epoch": 0.03, "learning_rate": 4.9468509168216854e-05, "loss": 0.9636, "step": 2880 }, { "epoch": 0.03, "learning_rate": 4.946758644107834e-05, "loss": 0.9449, "step": 2885 }, { "epoch": 0.03, "learning_rate": 4.946666371393982e-05, "loss": 0.9064, "step": 2890 }, { "epoch": 0.03, "learning_rate": 4.946574098680131e-05, "loss": 0.8846, "step": 2895 }, { "epoch": 0.03, "learning_rate": 4.94648182596628e-05, "loss": 0.9319, "step": 2900 }, { "epoch": 0.03, "learning_rate": 4.9463895532524293e-05, "loss": 0.9126, "step": 2905 }, { "epoch": 0.03, "learning_rate": 4.9462972805385774e-05, "loss": 0.8465, "step": 2910 }, { "epoch": 0.03, "learning_rate": 4.946205007824726e-05, "loss": 0.8605, "step": 2915 }, { "epoch": 0.03, "learning_rate": 4.946112735110875e-05, "loss": 0.8529, "step": 2920 }, { "epoch": 0.03, "learning_rate": 4.946020462397024e-05, "loss": 0.9138, "step": 2925 }, { "epoch": 0.03, "learning_rate": 4.9459281896831726e-05, "loss": 0.8675, "step": 2930 }, { "epoch": 0.03, "learning_rate": 4.9458359169693214e-05, "loss": 0.9355, "step": 2935 }, { "epoch": 0.03, "learning_rate": 4.94574364425547e-05, "loss": 0.9137, "step": 2940 }, { "epoch": 0.03, "learning_rate": 4.945651371541619e-05, "loss": 0.9371, "step": 2945 }, { "epoch": 0.03, "learning_rate": 4.945559098827768e-05, "loss": 0.8604, "step": 2950 }, { "epoch": 0.03, "learning_rate": 4.9454668261139165e-05, "loss": 0.9922, "step": 2955 }, { "epoch": 0.03, "learning_rate": 4.945374553400065e-05, "loss": 0.8959, "step": 2960 }, { "epoch": 0.03, "learning_rate": 4.9452822806862134e-05, "loss": 0.857, "step": 2965 }, { "epoch": 0.03, "learning_rate": 4.945190007972363e-05, "loss": 1.0169, "step": 2970 }, { "epoch": 0.03, "learning_rate": 4.9450977352585117e-05, "loss": 0.9255, "step": 2975 }, { "epoch": 0.03, "learning_rate": 4.9450054625446604e-05, "loss": 0.9289, "step": 2980 }, { "epoch": 0.03, "learning_rate": 4.9449131898308085e-05, "loss": 0.9147, "step": 2985 }, { "epoch": 0.03, "learning_rate": 4.944820917116958e-05, "loss": 0.9258, "step": 2990 }, { "epoch": 0.03, "learning_rate": 4.944728644403107e-05, "loss": 1.0115, "step": 2995 }, { "epoch": 0.03, "learning_rate": 4.944636371689255e-05, "loss": 0.9647, "step": 3000 }, { "epoch": 0.03, "eval_loss": 0.8878881931304932, "eval_runtime": 69.4114, "eval_samples_per_second": 28.814, "eval_steps_per_second": 14.407, "step": 3000 }, { "epoch": 0.03, "learning_rate": 4.944544098975404e-05, "loss": 0.9206, "step": 3005 }, { "epoch": 0.03, "learning_rate": 4.944451826261553e-05, "loss": 0.9414, "step": 3010 }, { "epoch": 0.03, "learning_rate": 4.944359553547702e-05, "loss": 0.936, "step": 3015 }, { "epoch": 0.03, "learning_rate": 4.94426728083385e-05, "loss": 0.931, "step": 3020 }, { "epoch": 0.03, "learning_rate": 4.944175008119999e-05, "loss": 0.9143, "step": 3025 }, { "epoch": 0.03, "learning_rate": 4.944082735406148e-05, "loss": 0.9447, "step": 3030 }, { "epoch": 0.03, "learning_rate": 4.9439904626922964e-05, "loss": 0.9224, "step": 3035 }, { "epoch": 0.03, "learning_rate": 4.943898189978445e-05, "loss": 0.9102, "step": 3040 }, { "epoch": 0.03, "learning_rate": 4.943805917264594e-05, "loss": 0.9447, "step": 3045 }, { "epoch": 0.03, "learning_rate": 4.943713644550743e-05, "loss": 0.9635, "step": 3050 }, { "epoch": 0.03, "learning_rate": 4.9436213718368915e-05, "loss": 0.9947, "step": 3055 }, { "epoch": 0.03, "learning_rate": 4.94352909912304e-05, "loss": 0.9025, "step": 3060 }, { "epoch": 0.03, "learning_rate": 4.943436826409189e-05, "loss": 0.9053, "step": 3065 }, { "epoch": 0.03, "learning_rate": 4.943344553695338e-05, "loss": 0.9683, "step": 3070 }, { "epoch": 0.03, "learning_rate": 4.943252280981487e-05, "loss": 0.8564, "step": 3075 }, { "epoch": 0.03, "learning_rate": 4.9431600082676355e-05, "loss": 0.9131, "step": 3080 }, { "epoch": 0.03, "learning_rate": 4.943067735553784e-05, "loss": 0.8703, "step": 3085 }, { "epoch": 0.03, "learning_rate": 4.942975462839933e-05, "loss": 0.935, "step": 3090 }, { "epoch": 0.03, "learning_rate": 4.942883190126082e-05, "loss": 0.9751, "step": 3095 }, { "epoch": 0.03, "learning_rate": 4.9427909174122306e-05, "loss": 0.9019, "step": 3100 }, { "epoch": 0.03, "learning_rate": 4.9426986446983794e-05, "loss": 0.8947, "step": 3105 }, { "epoch": 0.03, "learning_rate": 4.9426063719845275e-05, "loss": 0.9182, "step": 3110 }, { "epoch": 0.03, "learning_rate": 4.942514099270676e-05, "loss": 0.8858, "step": 3115 }, { "epoch": 0.03, "learning_rate": 4.942421826556826e-05, "loss": 0.8749, "step": 3120 }, { "epoch": 0.03, "learning_rate": 4.9423295538429745e-05, "loss": 0.8493, "step": 3125 }, { "epoch": 0.03, "learning_rate": 4.9422372811291226e-05, "loss": 0.8454, "step": 3130 }, { "epoch": 0.03, "learning_rate": 4.9421450084152714e-05, "loss": 0.8643, "step": 3135 }, { "epoch": 0.03, "learning_rate": 4.942052735701421e-05, "loss": 0.916, "step": 3140 }, { "epoch": 0.03, "learning_rate": 4.941960462987569e-05, "loss": 0.931, "step": 3145 }, { "epoch": 0.03, "learning_rate": 4.941868190273718e-05, "loss": 0.9, "step": 3150 }, { "epoch": 0.03, "learning_rate": 4.9417759175598666e-05, "loss": 0.9073, "step": 3155 }, { "epoch": 0.03, "learning_rate": 4.941683644846016e-05, "loss": 0.8909, "step": 3160 }, { "epoch": 0.04, "learning_rate": 4.941591372132164e-05, "loss": 1.0224, "step": 3165 }, { "epoch": 0.04, "learning_rate": 4.941499099418313e-05, "loss": 0.9275, "step": 3170 }, { "epoch": 0.04, "learning_rate": 4.941406826704462e-05, "loss": 0.947, "step": 3175 }, { "epoch": 0.04, "learning_rate": 4.941314553990611e-05, "loss": 0.9244, "step": 3180 }, { "epoch": 0.04, "learning_rate": 4.941222281276759e-05, "loss": 0.9007, "step": 3185 }, { "epoch": 0.04, "learning_rate": 4.941130008562908e-05, "loss": 0.9296, "step": 3190 }, { "epoch": 0.04, "learning_rate": 4.941037735849057e-05, "loss": 0.8605, "step": 3195 }, { "epoch": 0.04, "learning_rate": 4.9409454631352056e-05, "loss": 0.9032, "step": 3200 }, { "epoch": 0.04, "learning_rate": 4.9408531904213544e-05, "loss": 0.9446, "step": 3205 }, { "epoch": 0.04, "learning_rate": 4.940760917707503e-05, "loss": 0.9181, "step": 3210 }, { "epoch": 0.04, "learning_rate": 4.940668644993652e-05, "loss": 0.9402, "step": 3215 }, { "epoch": 0.04, "learning_rate": 4.9405763722798e-05, "loss": 0.9052, "step": 3220 }, { "epoch": 0.04, "learning_rate": 4.9404840995659495e-05, "loss": 0.9213, "step": 3225 }, { "epoch": 0.04, "learning_rate": 4.940391826852098e-05, "loss": 0.8907, "step": 3230 }, { "epoch": 0.04, "learning_rate": 4.940299554138247e-05, "loss": 0.8399, "step": 3235 }, { "epoch": 0.04, "learning_rate": 4.940207281424395e-05, "loss": 0.9237, "step": 3240 }, { "epoch": 0.04, "learning_rate": 4.940115008710545e-05, "loss": 0.8941, "step": 3245 }, { "epoch": 0.04, "learning_rate": 4.9400227359966935e-05, "loss": 0.9584, "step": 3250 }, { "epoch": 0.04, "learning_rate": 4.939930463282842e-05, "loss": 0.9449, "step": 3255 }, { "epoch": 0.04, "learning_rate": 4.9398381905689904e-05, "loss": 0.8944, "step": 3260 }, { "epoch": 0.04, "learning_rate": 4.939745917855139e-05, "loss": 0.8169, "step": 3265 }, { "epoch": 0.04, "learning_rate": 4.9396536451412886e-05, "loss": 0.9148, "step": 3270 }, { "epoch": 0.04, "learning_rate": 4.939561372427437e-05, "loss": 0.8694, "step": 3275 }, { "epoch": 0.04, "learning_rate": 4.9394690997135855e-05, "loss": 0.9415, "step": 3280 }, { "epoch": 0.04, "learning_rate": 4.939376826999734e-05, "loss": 0.9522, "step": 3285 }, { "epoch": 0.04, "learning_rate": 4.939284554285884e-05, "loss": 0.9003, "step": 3290 }, { "epoch": 0.04, "learning_rate": 4.939192281572032e-05, "loss": 0.905, "step": 3295 }, { "epoch": 0.04, "learning_rate": 4.9391000088581806e-05, "loss": 0.8977, "step": 3300 }, { "epoch": 0.04, "learning_rate": 4.9390077361443294e-05, "loss": 0.9195, "step": 3305 }, { "epoch": 0.04, "learning_rate": 4.938915463430478e-05, "loss": 0.9015, "step": 3310 }, { "epoch": 0.04, "learning_rate": 4.938823190716627e-05, "loss": 0.942, "step": 3315 }, { "epoch": 0.04, "learning_rate": 4.938730918002776e-05, "loss": 0.8916, "step": 3320 }, { "epoch": 0.04, "learning_rate": 4.9386386452889246e-05, "loss": 0.9985, "step": 3325 }, { "epoch": 0.04, "learning_rate": 4.9385463725750733e-05, "loss": 0.946, "step": 3330 }, { "epoch": 0.04, "learning_rate": 4.938454099861222e-05, "loss": 0.9332, "step": 3335 }, { "epoch": 0.04, "learning_rate": 4.938361827147371e-05, "loss": 0.9369, "step": 3340 }, { "epoch": 0.04, "learning_rate": 4.93826955443352e-05, "loss": 0.9286, "step": 3345 }, { "epoch": 0.04, "learning_rate": 4.938177281719668e-05, "loss": 0.9446, "step": 3350 }, { "epoch": 0.04, "learning_rate": 4.938085009005817e-05, "loss": 0.9344, "step": 3355 }, { "epoch": 0.04, "learning_rate": 4.937992736291966e-05, "loss": 0.9041, "step": 3360 }, { "epoch": 0.04, "learning_rate": 4.937900463578115e-05, "loss": 0.9219, "step": 3365 }, { "epoch": 0.04, "learning_rate": 4.937808190864263e-05, "loss": 0.9258, "step": 3370 }, { "epoch": 0.04, "learning_rate": 4.9377159181504124e-05, "loss": 0.8621, "step": 3375 }, { "epoch": 0.04, "learning_rate": 4.937623645436561e-05, "loss": 0.9635, "step": 3380 }, { "epoch": 0.04, "learning_rate": 4.937531372722709e-05, "loss": 0.9072, "step": 3385 }, { "epoch": 0.04, "learning_rate": 4.937439100008858e-05, "loss": 0.8828, "step": 3390 }, { "epoch": 0.04, "learning_rate": 4.9373468272950075e-05, "loss": 0.9347, "step": 3395 }, { "epoch": 0.04, "learning_rate": 4.937254554581156e-05, "loss": 0.9149, "step": 3400 }, { "epoch": 0.04, "learning_rate": 4.9371622818673044e-05, "loss": 0.946, "step": 3405 }, { "epoch": 0.04, "learning_rate": 4.937070009153453e-05, "loss": 0.9505, "step": 3410 }, { "epoch": 0.04, "learning_rate": 4.936977736439603e-05, "loss": 0.8685, "step": 3415 }, { "epoch": 0.04, "learning_rate": 4.936885463725751e-05, "loss": 0.9193, "step": 3420 }, { "epoch": 0.04, "learning_rate": 4.9367931910118996e-05, "loss": 0.9132, "step": 3425 }, { "epoch": 0.04, "learning_rate": 4.9367009182980484e-05, "loss": 1.0033, "step": 3430 }, { "epoch": 0.04, "learning_rate": 4.936608645584197e-05, "loss": 0.8588, "step": 3435 }, { "epoch": 0.04, "learning_rate": 4.936516372870346e-05, "loss": 0.9689, "step": 3440 }, { "epoch": 0.04, "learning_rate": 4.936424100156495e-05, "loss": 0.9201, "step": 3445 }, { "epoch": 0.04, "learning_rate": 4.9363318274426435e-05, "loss": 0.974, "step": 3450 }, { "epoch": 0.04, "learning_rate": 4.936239554728792e-05, "loss": 0.9145, "step": 3455 }, { "epoch": 0.04, "learning_rate": 4.936147282014941e-05, "loss": 0.8997, "step": 3460 }, { "epoch": 0.04, "learning_rate": 4.93605500930109e-05, "loss": 0.9363, "step": 3465 }, { "epoch": 0.04, "learning_rate": 4.9359627365872386e-05, "loss": 0.8798, "step": 3470 }, { "epoch": 0.04, "learning_rate": 4.9358704638733874e-05, "loss": 0.9144, "step": 3475 }, { "epoch": 0.04, "learning_rate": 4.935778191159536e-05, "loss": 0.9493, "step": 3480 }, { "epoch": 0.04, "learning_rate": 4.935685918445685e-05, "loss": 0.9366, "step": 3485 }, { "epoch": 0.04, "learning_rate": 4.935593645731834e-05, "loss": 0.9449, "step": 3490 }, { "epoch": 0.04, "learning_rate": 4.935501373017982e-05, "loss": 0.9473, "step": 3495 }, { "epoch": 0.04, "learning_rate": 4.935409100304131e-05, "loss": 0.8742, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.93531682759028e-05, "loss": 0.9385, "step": 3505 }, { "epoch": 0.04, "learning_rate": 4.935224554876429e-05, "loss": 0.8604, "step": 3510 }, { "epoch": 0.04, "learning_rate": 4.935132282162577e-05, "loss": 0.8586, "step": 3515 }, { "epoch": 0.04, "learning_rate": 4.935040009448726e-05, "loss": 0.9023, "step": 3520 }, { "epoch": 0.04, "learning_rate": 4.934947736734875e-05, "loss": 0.9443, "step": 3525 }, { "epoch": 0.04, "learning_rate": 4.934855464021024e-05, "loss": 0.8817, "step": 3530 }, { "epoch": 0.04, "learning_rate": 4.934763191307172e-05, "loss": 0.9194, "step": 3535 }, { "epoch": 0.04, "learning_rate": 4.934670918593321e-05, "loss": 0.8862, "step": 3540 }, { "epoch": 0.04, "learning_rate": 4.9345786458794704e-05, "loss": 0.9021, "step": 3545 }, { "epoch": 0.04, "learning_rate": 4.9344863731656185e-05, "loss": 0.9141, "step": 3550 }, { "epoch": 0.04, "learning_rate": 4.934394100451767e-05, "loss": 0.894, "step": 3555 }, { "epoch": 0.04, "learning_rate": 4.934301827737916e-05, "loss": 0.9225, "step": 3560 }, { "epoch": 0.04, "learning_rate": 4.9342095550240656e-05, "loss": 0.9057, "step": 3565 }, { "epoch": 0.04, "learning_rate": 4.934117282310214e-05, "loss": 0.8676, "step": 3570 }, { "epoch": 0.04, "learning_rate": 4.9340250095963624e-05, "loss": 0.9359, "step": 3575 }, { "epoch": 0.04, "learning_rate": 4.933932736882511e-05, "loss": 0.9349, "step": 3580 }, { "epoch": 0.04, "learning_rate": 4.93384046416866e-05, "loss": 0.9099, "step": 3585 }, { "epoch": 0.04, "learning_rate": 4.933748191454809e-05, "loss": 0.9753, "step": 3590 }, { "epoch": 0.04, "learning_rate": 4.9336559187409576e-05, "loss": 0.8898, "step": 3595 }, { "epoch": 0.04, "learning_rate": 4.9335636460271064e-05, "loss": 0.8638, "step": 3600 }, { "epoch": 0.04, "learning_rate": 4.933471373313255e-05, "loss": 0.8624, "step": 3605 }, { "epoch": 0.04, "learning_rate": 4.933379100599404e-05, "loss": 0.9546, "step": 3610 }, { "epoch": 0.04, "learning_rate": 4.933286827885553e-05, "loss": 0.8459, "step": 3615 }, { "epoch": 0.04, "learning_rate": 4.9331945551717015e-05, "loss": 0.9906, "step": 3620 }, { "epoch": 0.04, "learning_rate": 4.9331022824578496e-05, "loss": 0.9427, "step": 3625 }, { "epoch": 0.04, "learning_rate": 4.933010009743999e-05, "loss": 0.9474, "step": 3630 }, { "epoch": 0.04, "learning_rate": 4.932917737030148e-05, "loss": 0.9113, "step": 3635 }, { "epoch": 0.04, "learning_rate": 4.9328254643162966e-05, "loss": 0.8486, "step": 3640 }, { "epoch": 0.04, "learning_rate": 4.932733191602445e-05, "loss": 0.9188, "step": 3645 }, { "epoch": 0.04, "learning_rate": 4.9326409188885935e-05, "loss": 0.892, "step": 3650 }, { "epoch": 0.04, "learning_rate": 4.932548646174743e-05, "loss": 0.885, "step": 3655 }, { "epoch": 0.04, "learning_rate": 4.932456373460891e-05, "loss": 0.9682, "step": 3660 }, { "epoch": 0.04, "learning_rate": 4.93236410074704e-05, "loss": 1.0085, "step": 3665 }, { "epoch": 0.04, "learning_rate": 4.932271828033189e-05, "loss": 0.9219, "step": 3670 }, { "epoch": 0.04, "learning_rate": 4.932179555319338e-05, "loss": 0.9349, "step": 3675 }, { "epoch": 0.04, "learning_rate": 4.932087282605486e-05, "loss": 0.878, "step": 3680 }, { "epoch": 0.04, "learning_rate": 4.931995009891635e-05, "loss": 0.887, "step": 3685 }, { "epoch": 0.04, "learning_rate": 4.931902737177784e-05, "loss": 0.9373, "step": 3690 }, { "epoch": 0.04, "learning_rate": 4.9318104644639326e-05, "loss": 0.9163, "step": 3695 }, { "epoch": 0.04, "learning_rate": 4.9317181917500814e-05, "loss": 0.9528, "step": 3700 }, { "epoch": 0.04, "learning_rate": 4.93162591903623e-05, "loss": 0.9865, "step": 3705 }, { "epoch": 0.04, "learning_rate": 4.931533646322379e-05, "loss": 0.9432, "step": 3710 }, { "epoch": 0.04, "learning_rate": 4.931441373608528e-05, "loss": 0.918, "step": 3715 }, { "epoch": 0.04, "learning_rate": 4.9313491008946765e-05, "loss": 0.8821, "step": 3720 }, { "epoch": 0.04, "learning_rate": 4.931256828180825e-05, "loss": 0.9181, "step": 3725 }, { "epoch": 0.04, "learning_rate": 4.931164555466974e-05, "loss": 0.8864, "step": 3730 }, { "epoch": 0.04, "learning_rate": 4.931072282753122e-05, "loss": 0.8864, "step": 3735 }, { "epoch": 0.04, "learning_rate": 4.930980010039272e-05, "loss": 0.9636, "step": 3740 }, { "epoch": 0.04, "learning_rate": 4.9308877373254205e-05, "loss": 0.9161, "step": 3745 }, { "epoch": 0.04, "learning_rate": 4.930795464611569e-05, "loss": 0.8677, "step": 3750 }, { "epoch": 0.04, "learning_rate": 4.9307031918977173e-05, "loss": 0.9068, "step": 3755 }, { "epoch": 0.04, "learning_rate": 4.930610919183867e-05, "loss": 0.8901, "step": 3760 }, { "epoch": 0.04, "learning_rate": 4.9305186464700156e-05, "loss": 0.9158, "step": 3765 }, { "epoch": 0.04, "learning_rate": 4.930426373756164e-05, "loss": 1.0098, "step": 3770 }, { "epoch": 0.04, "learning_rate": 4.9303341010423125e-05, "loss": 0.9475, "step": 3775 }, { "epoch": 0.04, "learning_rate": 4.930241828328462e-05, "loss": 0.9355, "step": 3780 }, { "epoch": 0.04, "learning_rate": 4.930149555614611e-05, "loss": 0.849, "step": 3785 }, { "epoch": 0.04, "learning_rate": 4.930057282900759e-05, "loss": 0.8965, "step": 3790 }, { "epoch": 0.04, "learning_rate": 4.9299650101869076e-05, "loss": 0.9595, "step": 3795 }, { "epoch": 0.04, "learning_rate": 4.9298727374730564e-05, "loss": 0.8665, "step": 3800 }, { "epoch": 0.04, "learning_rate": 4.929780464759205e-05, "loss": 0.95, "step": 3805 }, { "epoch": 0.04, "learning_rate": 4.929688192045354e-05, "loss": 0.9255, "step": 3810 }, { "epoch": 0.04, "learning_rate": 4.929595919331503e-05, "loss": 0.9162, "step": 3815 }, { "epoch": 0.04, "learning_rate": 4.9295036466176515e-05, "loss": 0.8749, "step": 3820 }, { "epoch": 0.04, "learning_rate": 4.9294113739038e-05, "loss": 0.9489, "step": 3825 }, { "epoch": 0.04, "learning_rate": 4.929319101189949e-05, "loss": 0.8676, "step": 3830 }, { "epoch": 0.04, "learning_rate": 4.929226828476098e-05, "loss": 0.855, "step": 3835 }, { "epoch": 0.04, "learning_rate": 4.929134555762247e-05, "loss": 0.9773, "step": 3840 }, { "epoch": 0.04, "learning_rate": 4.9290422830483955e-05, "loss": 0.9156, "step": 3845 }, { "epoch": 0.04, "learning_rate": 4.928950010334544e-05, "loss": 0.9206, "step": 3850 }, { "epoch": 0.04, "learning_rate": 4.928857737620693e-05, "loss": 0.886, "step": 3855 }, { "epoch": 0.04, "learning_rate": 4.928765464906842e-05, "loss": 0.9565, "step": 3860 }, { "epoch": 0.04, "learning_rate": 4.9286731921929906e-05, "loss": 0.9016, "step": 3865 }, { "epoch": 0.04, "learning_rate": 4.9285809194791394e-05, "loss": 0.9218, "step": 3870 }, { "epoch": 0.04, "learning_rate": 4.928488646765288e-05, "loss": 0.9235, "step": 3875 }, { "epoch": 0.04, "learning_rate": 4.928396374051436e-05, "loss": 0.9043, "step": 3880 }, { "epoch": 0.04, "learning_rate": 4.928304101337585e-05, "loss": 0.8907, "step": 3885 }, { "epoch": 0.04, "learning_rate": 4.9282118286237345e-05, "loss": 0.8849, "step": 3890 }, { "epoch": 0.04, "learning_rate": 4.928119555909883e-05, "loss": 0.9366, "step": 3895 }, { "epoch": 0.04, "learning_rate": 4.9280272831960314e-05, "loss": 0.852, "step": 3900 }, { "epoch": 0.04, "learning_rate": 4.92793501048218e-05, "loss": 0.8295, "step": 3905 }, { "epoch": 0.04, "learning_rate": 4.92784273776833e-05, "loss": 0.8739, "step": 3910 }, { "epoch": 0.04, "learning_rate": 4.9277504650544785e-05, "loss": 0.8592, "step": 3915 }, { "epoch": 0.04, "learning_rate": 4.9276581923406266e-05, "loss": 0.8947, "step": 3920 }, { "epoch": 0.04, "learning_rate": 4.9275659196267754e-05, "loss": 0.9529, "step": 3925 }, { "epoch": 0.04, "learning_rate": 4.927473646912925e-05, "loss": 0.9616, "step": 3930 }, { "epoch": 0.04, "learning_rate": 4.927381374199073e-05, "loss": 0.8985, "step": 3935 }, { "epoch": 0.04, "learning_rate": 4.927289101485222e-05, "loss": 0.8867, "step": 3940 }, { "epoch": 0.04, "learning_rate": 4.9271968287713705e-05, "loss": 0.8909, "step": 3945 }, { "epoch": 0.04, "learning_rate": 4.927104556057519e-05, "loss": 0.9621, "step": 3950 }, { "epoch": 0.04, "learning_rate": 4.927012283343668e-05, "loss": 0.8494, "step": 3955 }, { "epoch": 0.04, "learning_rate": 4.926920010629817e-05, "loss": 0.9895, "step": 3960 }, { "epoch": 0.04, "learning_rate": 4.9268277379159656e-05, "loss": 0.9406, "step": 3965 }, { "epoch": 0.04, "learning_rate": 4.9267354652021144e-05, "loss": 0.9646, "step": 3970 }, { "epoch": 0.04, "learning_rate": 4.926643192488263e-05, "loss": 0.9386, "step": 3975 }, { "epoch": 0.04, "learning_rate": 4.926550919774412e-05, "loss": 0.8959, "step": 3980 }, { "epoch": 0.04, "learning_rate": 4.926458647060561e-05, "loss": 0.9063, "step": 3985 }, { "epoch": 0.04, "learning_rate": 4.9263663743467096e-05, "loss": 0.907, "step": 3990 }, { "epoch": 0.04, "learning_rate": 4.9262741016328583e-05, "loss": 0.8889, "step": 3995 }, { "epoch": 0.04, "learning_rate": 4.926181828919007e-05, "loss": 0.8646, "step": 4000 }, { "epoch": 0.04, "eval_loss": 0.8577195405960083, "eval_runtime": 69.7853, "eval_samples_per_second": 28.659, "eval_steps_per_second": 14.33, "step": 4000 }, { "epoch": 0.04, "learning_rate": 4.926089556205156e-05, "loss": 0.8568, "step": 4005 }, { "epoch": 0.04, "learning_rate": 4.925997283491304e-05, "loss": 0.9226, "step": 4010 }, { "epoch": 0.04, "learning_rate": 4.9259050107774535e-05, "loss": 0.9103, "step": 4015 }, { "epoch": 0.04, "learning_rate": 4.925812738063602e-05, "loss": 0.8572, "step": 4020 }, { "epoch": 0.04, "learning_rate": 4.925720465349751e-05, "loss": 0.9158, "step": 4025 }, { "epoch": 0.04, "learning_rate": 4.925628192635899e-05, "loss": 0.8096, "step": 4030 }, { "epoch": 0.04, "learning_rate": 4.925535919922048e-05, "loss": 0.9034, "step": 4035 }, { "epoch": 0.04, "learning_rate": 4.9254436472081974e-05, "loss": 0.8658, "step": 4040 }, { "epoch": 0.04, "learning_rate": 4.9253513744943455e-05, "loss": 0.8548, "step": 4045 }, { "epoch": 0.04, "learning_rate": 4.925259101780494e-05, "loss": 0.8042, "step": 4050 }, { "epoch": 0.04, "learning_rate": 4.925166829066643e-05, "loss": 0.8745, "step": 4055 }, { "epoch": 0.04, "learning_rate": 4.9250745563527925e-05, "loss": 0.9046, "step": 4060 }, { "epoch": 0.05, "learning_rate": 4.9249822836389407e-05, "loss": 0.8692, "step": 4065 }, { "epoch": 0.05, "learning_rate": 4.9248900109250894e-05, "loss": 0.9026, "step": 4070 }, { "epoch": 0.05, "learning_rate": 4.924797738211238e-05, "loss": 0.9183, "step": 4075 }, { "epoch": 0.05, "learning_rate": 4.924705465497387e-05, "loss": 0.9233, "step": 4080 }, { "epoch": 0.05, "learning_rate": 4.924613192783536e-05, "loss": 0.8721, "step": 4085 }, { "epoch": 0.05, "learning_rate": 4.9245209200696846e-05, "loss": 0.9567, "step": 4090 }, { "epoch": 0.05, "learning_rate": 4.9244286473558334e-05, "loss": 0.925, "step": 4095 }, { "epoch": 0.05, "learning_rate": 4.924336374641982e-05, "loss": 0.9031, "step": 4100 }, { "epoch": 0.05, "learning_rate": 4.924244101928131e-05, "loss": 0.9119, "step": 4105 }, { "epoch": 0.05, "learning_rate": 4.92415182921428e-05, "loss": 0.9015, "step": 4110 }, { "epoch": 0.05, "learning_rate": 4.9240595565004285e-05, "loss": 0.9333, "step": 4115 }, { "epoch": 0.05, "learning_rate": 4.9239672837865766e-05, "loss": 0.928, "step": 4120 }, { "epoch": 0.05, "learning_rate": 4.923875011072726e-05, "loss": 0.8863, "step": 4125 }, { "epoch": 0.05, "learning_rate": 4.923782738358875e-05, "loss": 0.8986, "step": 4130 }, { "epoch": 0.05, "learning_rate": 4.9236904656450236e-05, "loss": 0.9438, "step": 4135 }, { "epoch": 0.05, "learning_rate": 4.923598192931172e-05, "loss": 0.8402, "step": 4140 }, { "epoch": 0.05, "learning_rate": 4.923505920217321e-05, "loss": 0.8973, "step": 4145 }, { "epoch": 0.05, "learning_rate": 4.92341364750347e-05, "loss": 0.9128, "step": 4150 }, { "epoch": 0.05, "learning_rate": 4.923321374789618e-05, "loss": 0.9121, "step": 4155 }, { "epoch": 0.05, "learning_rate": 4.923229102075767e-05, "loss": 0.873, "step": 4160 }, { "epoch": 0.05, "learning_rate": 4.9231368293619164e-05, "loss": 0.9393, "step": 4165 }, { "epoch": 0.05, "learning_rate": 4.923044556648065e-05, "loss": 0.8468, "step": 4170 }, { "epoch": 0.05, "learning_rate": 4.922952283934213e-05, "loss": 0.9111, "step": 4175 }, { "epoch": 0.05, "learning_rate": 4.922860011220362e-05, "loss": 0.8674, "step": 4180 }, { "epoch": 0.05, "learning_rate": 4.922767738506511e-05, "loss": 0.9275, "step": 4185 }, { "epoch": 0.05, "learning_rate": 4.9226754657926596e-05, "loss": 0.8988, "step": 4190 }, { "epoch": 0.05, "learning_rate": 4.9225831930788084e-05, "loss": 0.9209, "step": 4195 }, { "epoch": 0.05, "learning_rate": 4.922490920364957e-05, "loss": 0.9255, "step": 4200 }, { "epoch": 0.05, "learning_rate": 4.922398647651106e-05, "loss": 0.9241, "step": 4205 }, { "epoch": 0.05, "learning_rate": 4.922306374937255e-05, "loss": 0.8953, "step": 4210 }, { "epoch": 0.05, "learning_rate": 4.9222141022234035e-05, "loss": 0.9276, "step": 4215 }, { "epoch": 0.05, "learning_rate": 4.922121829509552e-05, "loss": 0.8984, "step": 4220 }, { "epoch": 0.05, "learning_rate": 4.922029556795701e-05, "loss": 0.9463, "step": 4225 }, { "epoch": 0.05, "learning_rate": 4.92193728408185e-05, "loss": 0.8905, "step": 4230 }, { "epoch": 0.05, "learning_rate": 4.9218450113679987e-05, "loss": 0.9154, "step": 4235 }, { "epoch": 0.05, "learning_rate": 4.9217527386541474e-05, "loss": 0.9414, "step": 4240 }, { "epoch": 0.05, "learning_rate": 4.921660465940296e-05, "loss": 0.914, "step": 4245 }, { "epoch": 0.05, "learning_rate": 4.921568193226445e-05, "loss": 0.9525, "step": 4250 }, { "epoch": 0.05, "learning_rate": 4.921475920512594e-05, "loss": 0.9168, "step": 4255 }, { "epoch": 0.05, "learning_rate": 4.9213836477987426e-05, "loss": 0.9019, "step": 4260 }, { "epoch": 0.05, "learning_rate": 4.921291375084891e-05, "loss": 0.8421, "step": 4265 }, { "epoch": 0.05, "learning_rate": 4.9211991023710395e-05, "loss": 0.9307, "step": 4270 }, { "epoch": 0.05, "learning_rate": 4.921106829657189e-05, "loss": 0.8447, "step": 4275 }, { "epoch": 0.05, "learning_rate": 4.921014556943338e-05, "loss": 0.8986, "step": 4280 }, { "epoch": 0.05, "learning_rate": 4.920922284229486e-05, "loss": 0.9216, "step": 4285 }, { "epoch": 0.05, "learning_rate": 4.9208300115156346e-05, "loss": 0.95, "step": 4290 }, { "epoch": 0.05, "learning_rate": 4.920737738801784e-05, "loss": 0.9433, "step": 4295 }, { "epoch": 0.05, "learning_rate": 4.920645466087933e-05, "loss": 0.8963, "step": 4300 }, { "epoch": 0.05, "learning_rate": 4.920553193374081e-05, "loss": 0.8338, "step": 4305 }, { "epoch": 0.05, "learning_rate": 4.92046092066023e-05, "loss": 0.8721, "step": 4310 }, { "epoch": 0.05, "learning_rate": 4.920368647946379e-05, "loss": 0.9364, "step": 4315 }, { "epoch": 0.05, "learning_rate": 4.920276375232527e-05, "loss": 0.8902, "step": 4320 }, { "epoch": 0.05, "learning_rate": 4.920184102518676e-05, "loss": 0.8903, "step": 4325 }, { "epoch": 0.05, "learning_rate": 4.920091829804825e-05, "loss": 0.8839, "step": 4330 }, { "epoch": 0.05, "learning_rate": 4.919999557090974e-05, "loss": 0.8548, "step": 4335 }, { "epoch": 0.05, "learning_rate": 4.9199072843771225e-05, "loss": 0.9143, "step": 4340 }, { "epoch": 0.05, "learning_rate": 4.919815011663271e-05, "loss": 0.9469, "step": 4345 }, { "epoch": 0.05, "learning_rate": 4.91972273894942e-05, "loss": 0.8857, "step": 4350 }, { "epoch": 0.05, "learning_rate": 4.919630466235569e-05, "loss": 0.9475, "step": 4355 }, { "epoch": 0.05, "learning_rate": 4.9195381935217176e-05, "loss": 0.8497, "step": 4360 }, { "epoch": 0.05, "learning_rate": 4.9194459208078664e-05, "loss": 0.9024, "step": 4365 }, { "epoch": 0.05, "learning_rate": 4.919353648094015e-05, "loss": 0.8851, "step": 4370 }, { "epoch": 0.05, "learning_rate": 4.919261375380164e-05, "loss": 0.831, "step": 4375 }, { "epoch": 0.05, "learning_rate": 4.919169102666313e-05, "loss": 0.9525, "step": 4380 }, { "epoch": 0.05, "learning_rate": 4.9190768299524615e-05, "loss": 0.9037, "step": 4385 }, { "epoch": 0.05, "learning_rate": 4.91898455723861e-05, "loss": 0.8417, "step": 4390 }, { "epoch": 0.05, "learning_rate": 4.9188922845247584e-05, "loss": 0.9139, "step": 4395 }, { "epoch": 0.05, "learning_rate": 4.918800011810908e-05, "loss": 0.8051, "step": 4400 }, { "epoch": 0.05, "learning_rate": 4.918707739097057e-05, "loss": 0.8691, "step": 4405 }, { "epoch": 0.05, "learning_rate": 4.9186154663832055e-05, "loss": 0.8924, "step": 4410 }, { "epoch": 0.05, "learning_rate": 4.9185231936693536e-05, "loss": 0.9019, "step": 4415 }, { "epoch": 0.05, "learning_rate": 4.9184309209555023e-05, "loss": 0.9192, "step": 4420 }, { "epoch": 0.05, "learning_rate": 4.918338648241652e-05, "loss": 0.8668, "step": 4425 }, { "epoch": 0.05, "learning_rate": 4.9182463755278e-05, "loss": 0.8636, "step": 4430 }, { "epoch": 0.05, "learning_rate": 4.918154102813949e-05, "loss": 0.9244, "step": 4435 }, { "epoch": 0.05, "learning_rate": 4.9180618301000975e-05, "loss": 0.8391, "step": 4440 }, { "epoch": 0.05, "learning_rate": 4.917969557386247e-05, "loss": 0.9749, "step": 4445 }, { "epoch": 0.05, "learning_rate": 4.917877284672395e-05, "loss": 0.9495, "step": 4450 }, { "epoch": 0.05, "learning_rate": 4.917785011958544e-05, "loss": 0.9161, "step": 4455 }, { "epoch": 0.05, "learning_rate": 4.9176927392446926e-05, "loss": 0.9223, "step": 4460 }, { "epoch": 0.05, "learning_rate": 4.9176004665308414e-05, "loss": 0.8887, "step": 4465 }, { "epoch": 0.05, "learning_rate": 4.91750819381699e-05, "loss": 0.8713, "step": 4470 }, { "epoch": 0.05, "learning_rate": 4.917415921103139e-05, "loss": 0.933, "step": 4475 }, { "epoch": 0.05, "learning_rate": 4.917323648389288e-05, "loss": 0.9107, "step": 4480 }, { "epoch": 0.05, "learning_rate": 4.9172313756754365e-05, "loss": 0.9194, "step": 4485 }, { "epoch": 0.05, "learning_rate": 4.917139102961585e-05, "loss": 0.8207, "step": 4490 }, { "epoch": 0.05, "learning_rate": 4.917046830247734e-05, "loss": 0.9246, "step": 4495 }, { "epoch": 0.05, "learning_rate": 4.916954557533883e-05, "loss": 0.8913, "step": 4500 }, { "epoch": 0.05, "learning_rate": 4.916862284820031e-05, "loss": 0.8452, "step": 4505 }, { "epoch": 0.05, "learning_rate": 4.9167700121061805e-05, "loss": 0.897, "step": 4510 }, { "epoch": 0.05, "learning_rate": 4.916677739392329e-05, "loss": 0.9501, "step": 4515 }, { "epoch": 0.05, "learning_rate": 4.916585466678478e-05, "loss": 0.9465, "step": 4520 }, { "epoch": 0.05, "learning_rate": 4.916493193964626e-05, "loss": 0.8903, "step": 4525 }, { "epoch": 0.05, "learning_rate": 4.9164009212507756e-05, "loss": 0.8569, "step": 4530 }, { "epoch": 0.05, "learning_rate": 4.9163086485369244e-05, "loss": 0.8621, "step": 4535 }, { "epoch": 0.05, "learning_rate": 4.9162163758230725e-05, "loss": 0.9104, "step": 4540 }, { "epoch": 0.05, "learning_rate": 4.916124103109221e-05, "loss": 0.898, "step": 4545 }, { "epoch": 0.05, "learning_rate": 4.916031830395371e-05, "loss": 0.868, "step": 4550 }, { "epoch": 0.05, "learning_rate": 4.9159395576815195e-05, "loss": 0.9086, "step": 4555 }, { "epoch": 0.05, "learning_rate": 4.9158472849676676e-05, "loss": 0.8674, "step": 4560 }, { "epoch": 0.05, "learning_rate": 4.9157550122538164e-05, "loss": 0.9012, "step": 4565 }, { "epoch": 0.05, "learning_rate": 4.915662739539965e-05, "loss": 0.8388, "step": 4570 }, { "epoch": 0.05, "learning_rate": 4.915570466826114e-05, "loss": 0.9155, "step": 4575 }, { "epoch": 0.05, "learning_rate": 4.915478194112263e-05, "loss": 0.9473, "step": 4580 }, { "epoch": 0.05, "learning_rate": 4.9153859213984116e-05, "loss": 0.8897, "step": 4585 }, { "epoch": 0.05, "learning_rate": 4.9152936486845604e-05, "loss": 0.9659, "step": 4590 }, { "epoch": 0.05, "learning_rate": 4.915201375970709e-05, "loss": 0.8951, "step": 4595 }, { "epoch": 0.05, "learning_rate": 4.915109103256858e-05, "loss": 0.9147, "step": 4600 }, { "epoch": 0.05, "learning_rate": 4.915016830543007e-05, "loss": 0.8829, "step": 4605 }, { "epoch": 0.05, "learning_rate": 4.9149245578291555e-05, "loss": 0.8725, "step": 4610 }, { "epoch": 0.05, "learning_rate": 4.914832285115304e-05, "loss": 0.9521, "step": 4615 }, { "epoch": 0.05, "learning_rate": 4.914740012401453e-05, "loss": 0.9295, "step": 4620 }, { "epoch": 0.05, "learning_rate": 4.914647739687602e-05, "loss": 0.8391, "step": 4625 }, { "epoch": 0.05, "learning_rate": 4.9145554669737506e-05, "loss": 0.9845, "step": 4630 }, { "epoch": 0.05, "learning_rate": 4.914463194259899e-05, "loss": 0.953, "step": 4635 }, { "epoch": 0.05, "learning_rate": 4.914370921546048e-05, "loss": 0.931, "step": 4640 }, { "epoch": 0.05, "learning_rate": 4.914278648832197e-05, "loss": 0.9133, "step": 4645 }, { "epoch": 0.05, "learning_rate": 4.914186376118345e-05, "loss": 0.9325, "step": 4650 }, { "epoch": 0.05, "learning_rate": 4.914094103404494e-05, "loss": 0.8796, "step": 4655 }, { "epoch": 0.05, "learning_rate": 4.9140018306906433e-05, "loss": 0.9353, "step": 4660 }, { "epoch": 0.05, "learning_rate": 4.913909557976792e-05, "loss": 0.9229, "step": 4665 }, { "epoch": 0.05, "learning_rate": 4.91381728526294e-05, "loss": 0.9239, "step": 4670 }, { "epoch": 0.05, "learning_rate": 4.913725012549089e-05, "loss": 0.9062, "step": 4675 }, { "epoch": 0.05, "learning_rate": 4.9136327398352385e-05, "loss": 0.9215, "step": 4680 }, { "epoch": 0.05, "learning_rate": 4.913540467121387e-05, "loss": 0.8646, "step": 4685 }, { "epoch": 0.05, "learning_rate": 4.9134481944075354e-05, "loss": 0.9118, "step": 4690 }, { "epoch": 0.05, "learning_rate": 4.913355921693684e-05, "loss": 0.9291, "step": 4695 }, { "epoch": 0.05, "learning_rate": 4.9132636489798336e-05, "loss": 0.8173, "step": 4700 }, { "epoch": 0.05, "learning_rate": 4.913171376265982e-05, "loss": 0.8652, "step": 4705 }, { "epoch": 0.05, "learning_rate": 4.9130791035521305e-05, "loss": 0.9364, "step": 4710 }, { "epoch": 0.05, "learning_rate": 4.912986830838279e-05, "loss": 0.9168, "step": 4715 }, { "epoch": 0.05, "learning_rate": 4.912894558124428e-05, "loss": 0.8967, "step": 4720 }, { "epoch": 0.05, "learning_rate": 4.912802285410577e-05, "loss": 0.9192, "step": 4725 }, { "epoch": 0.05, "learning_rate": 4.9127100126967257e-05, "loss": 0.9142, "step": 4730 }, { "epoch": 0.05, "learning_rate": 4.9126177399828744e-05, "loss": 0.9651, "step": 4735 }, { "epoch": 0.05, "learning_rate": 4.912525467269023e-05, "loss": 0.88, "step": 4740 }, { "epoch": 0.05, "learning_rate": 4.912433194555172e-05, "loss": 0.8746, "step": 4745 }, { "epoch": 0.05, "learning_rate": 4.912340921841321e-05, "loss": 0.914, "step": 4750 }, { "epoch": 0.05, "learning_rate": 4.9122486491274696e-05, "loss": 0.8777, "step": 4755 }, { "epoch": 0.05, "learning_rate": 4.9121563764136184e-05, "loss": 0.9138, "step": 4760 }, { "epoch": 0.05, "learning_rate": 4.912064103699767e-05, "loss": 0.8665, "step": 4765 }, { "epoch": 0.05, "learning_rate": 4.911971830985916e-05, "loss": 0.9611, "step": 4770 }, { "epoch": 0.05, "learning_rate": 4.911879558272065e-05, "loss": 0.9297, "step": 4775 }, { "epoch": 0.05, "learning_rate": 4.911787285558213e-05, "loss": 0.902, "step": 4780 }, { "epoch": 0.05, "learning_rate": 4.9116950128443616e-05, "loss": 0.8878, "step": 4785 }, { "epoch": 0.05, "learning_rate": 4.911602740130511e-05, "loss": 0.9101, "step": 4790 }, { "epoch": 0.05, "learning_rate": 4.91151046741666e-05, "loss": 0.9499, "step": 4795 }, { "epoch": 0.05, "learning_rate": 4.911418194702808e-05, "loss": 0.8658, "step": 4800 }, { "epoch": 0.05, "learning_rate": 4.911325921988957e-05, "loss": 0.8824, "step": 4805 }, { "epoch": 0.05, "learning_rate": 4.911233649275106e-05, "loss": 0.8422, "step": 4810 }, { "epoch": 0.05, "learning_rate": 4.911141376561254e-05, "loss": 0.8384, "step": 4815 }, { "epoch": 0.05, "learning_rate": 4.911049103847403e-05, "loss": 0.9536, "step": 4820 }, { "epoch": 0.05, "learning_rate": 4.910956831133552e-05, "loss": 0.8223, "step": 4825 }, { "epoch": 0.05, "learning_rate": 4.9108645584197013e-05, "loss": 0.8974, "step": 4830 }, { "epoch": 0.05, "learning_rate": 4.9107722857058495e-05, "loss": 0.9405, "step": 4835 }, { "epoch": 0.05, "learning_rate": 4.910680012991998e-05, "loss": 0.8851, "step": 4840 }, { "epoch": 0.05, "learning_rate": 4.910587740278147e-05, "loss": 0.8739, "step": 4845 }, { "epoch": 0.05, "learning_rate": 4.910495467564296e-05, "loss": 0.9191, "step": 4850 }, { "epoch": 0.05, "learning_rate": 4.9104031948504446e-05, "loss": 0.9058, "step": 4855 }, { "epoch": 0.05, "learning_rate": 4.9103109221365934e-05, "loss": 0.9776, "step": 4860 }, { "epoch": 0.05, "learning_rate": 4.910218649422742e-05, "loss": 0.9014, "step": 4865 }, { "epoch": 0.05, "learning_rate": 4.910126376708891e-05, "loss": 0.9945, "step": 4870 }, { "epoch": 0.05, "learning_rate": 4.91003410399504e-05, "loss": 0.919, "step": 4875 }, { "epoch": 0.05, "learning_rate": 4.9099418312811885e-05, "loss": 0.871, "step": 4880 }, { "epoch": 0.05, "learning_rate": 4.909849558567337e-05, "loss": 0.8719, "step": 4885 }, { "epoch": 0.05, "learning_rate": 4.9097572858534854e-05, "loss": 0.9385, "step": 4890 }, { "epoch": 0.05, "learning_rate": 4.909665013139635e-05, "loss": 0.9252, "step": 4895 }, { "epoch": 0.05, "learning_rate": 4.9095727404257837e-05, "loss": 0.8877, "step": 4900 }, { "epoch": 0.05, "learning_rate": 4.9094804677119324e-05, "loss": 0.8871, "step": 4905 }, { "epoch": 0.05, "learning_rate": 4.9093881949980806e-05, "loss": 0.898, "step": 4910 }, { "epoch": 0.05, "learning_rate": 4.90929592228423e-05, "loss": 0.8886, "step": 4915 }, { "epoch": 0.05, "learning_rate": 4.909203649570379e-05, "loss": 0.865, "step": 4920 }, { "epoch": 0.05, "learning_rate": 4.909111376856527e-05, "loss": 0.9467, "step": 4925 }, { "epoch": 0.05, "learning_rate": 4.909019104142676e-05, "loss": 0.853, "step": 4930 }, { "epoch": 0.05, "learning_rate": 4.9089268314288245e-05, "loss": 0.9482, "step": 4935 }, { "epoch": 0.05, "learning_rate": 4.908834558714974e-05, "loss": 0.98, "step": 4940 }, { "epoch": 0.05, "learning_rate": 4.908742286001122e-05, "loss": 0.8538, "step": 4945 }, { "epoch": 0.05, "learning_rate": 4.908650013287271e-05, "loss": 0.8743, "step": 4950 }, { "epoch": 0.05, "learning_rate": 4.9085577405734196e-05, "loss": 0.8533, "step": 4955 }, { "epoch": 0.05, "learning_rate": 4.9084654678595684e-05, "loss": 0.8933, "step": 4960 }, { "epoch": 0.05, "learning_rate": 4.908373195145717e-05, "loss": 0.8638, "step": 4965 }, { "epoch": 0.06, "learning_rate": 4.908280922431866e-05, "loss": 0.9064, "step": 4970 }, { "epoch": 0.06, "learning_rate": 4.908188649718015e-05, "loss": 0.8768, "step": 4975 }, { "epoch": 0.06, "learning_rate": 4.9080963770041635e-05, "loss": 0.9263, "step": 4980 }, { "epoch": 0.06, "learning_rate": 4.908004104290312e-05, "loss": 0.8987, "step": 4985 }, { "epoch": 0.06, "learning_rate": 4.907911831576461e-05, "loss": 0.9102, "step": 4990 }, { "epoch": 0.06, "learning_rate": 4.90781955886261e-05, "loss": 0.8767, "step": 4995 }, { "epoch": 0.06, "learning_rate": 4.907727286148759e-05, "loss": 0.9159, "step": 5000 }, { "epoch": 0.06, "eval_loss": 0.8677482008934021, "eval_runtime": 69.4572, "eval_samples_per_second": 28.795, "eval_steps_per_second": 14.397, "step": 5000 }, { "epoch": 0.06, "learning_rate": 4.9076350134349075e-05, "loss": 0.9645, "step": 5005 }, { "epoch": 0.06, "learning_rate": 4.907542740721056e-05, "loss": 0.9532, "step": 5010 }, { "epoch": 0.06, "learning_rate": 4.907450468007205e-05, "loss": 0.86, "step": 5015 }, { "epoch": 0.06, "learning_rate": 4.907358195293353e-05, "loss": 0.9351, "step": 5020 }, { "epoch": 0.06, "learning_rate": 4.9072659225795026e-05, "loss": 0.8463, "step": 5025 }, { "epoch": 0.06, "learning_rate": 4.9071736498656514e-05, "loss": 0.8583, "step": 5030 }, { "epoch": 0.06, "learning_rate": 4.9070813771517995e-05, "loss": 0.9163, "step": 5035 }, { "epoch": 0.06, "learning_rate": 4.906989104437948e-05, "loss": 0.9264, "step": 5040 }, { "epoch": 0.06, "learning_rate": 4.906896831724098e-05, "loss": 0.8812, "step": 5045 }, { "epoch": 0.06, "learning_rate": 4.9068045590102465e-05, "loss": 0.9091, "step": 5050 }, { "epoch": 0.06, "learning_rate": 4.9067122862963946e-05, "loss": 0.85, "step": 5055 }, { "epoch": 0.06, "learning_rate": 4.9066200135825434e-05, "loss": 0.869, "step": 5060 }, { "epoch": 0.06, "learning_rate": 4.906527740868693e-05, "loss": 0.9259, "step": 5065 }, { "epoch": 0.06, "learning_rate": 4.906435468154842e-05, "loss": 0.8891, "step": 5070 }, { "epoch": 0.06, "learning_rate": 4.90634319544099e-05, "loss": 0.8776, "step": 5075 }, { "epoch": 0.06, "learning_rate": 4.9062509227271386e-05, "loss": 0.8894, "step": 5080 }, { "epoch": 0.06, "learning_rate": 4.906158650013288e-05, "loss": 0.8995, "step": 5085 }, { "epoch": 0.06, "learning_rate": 4.906066377299436e-05, "loss": 0.9118, "step": 5090 }, { "epoch": 0.06, "learning_rate": 4.905974104585585e-05, "loss": 0.874, "step": 5095 }, { "epoch": 0.06, "learning_rate": 4.905881831871734e-05, "loss": 0.8758, "step": 5100 }, { "epoch": 0.06, "learning_rate": 4.9057895591578825e-05, "loss": 0.8754, "step": 5105 }, { "epoch": 0.06, "learning_rate": 4.905697286444031e-05, "loss": 0.8942, "step": 5110 }, { "epoch": 0.06, "learning_rate": 4.90560501373018e-05, "loss": 0.837, "step": 5115 }, { "epoch": 0.06, "learning_rate": 4.905512741016329e-05, "loss": 0.9043, "step": 5120 }, { "epoch": 0.06, "learning_rate": 4.9054204683024776e-05, "loss": 0.8308, "step": 5125 }, { "epoch": 0.06, "learning_rate": 4.9053281955886264e-05, "loss": 0.917, "step": 5130 }, { "epoch": 0.06, "learning_rate": 4.905235922874775e-05, "loss": 0.8623, "step": 5135 }, { "epoch": 0.06, "learning_rate": 4.905143650160924e-05, "loss": 0.9524, "step": 5140 }, { "epoch": 0.06, "learning_rate": 4.905051377447073e-05, "loss": 0.9111, "step": 5145 }, { "epoch": 0.06, "learning_rate": 4.9049591047332215e-05, "loss": 0.9023, "step": 5150 }, { "epoch": 0.06, "learning_rate": 4.90486683201937e-05, "loss": 0.8753, "step": 5155 }, { "epoch": 0.06, "learning_rate": 4.904774559305519e-05, "loss": 0.8417, "step": 5160 }, { "epoch": 0.06, "learning_rate": 4.904682286591667e-05, "loss": 0.9399, "step": 5165 }, { "epoch": 0.06, "learning_rate": 4.904590013877816e-05, "loss": 0.8879, "step": 5170 }, { "epoch": 0.06, "learning_rate": 4.9044977411639655e-05, "loss": 0.8833, "step": 5175 }, { "epoch": 0.06, "learning_rate": 4.904405468450114e-05, "loss": 0.9489, "step": 5180 }, { "epoch": 0.06, "learning_rate": 4.9043131957362624e-05, "loss": 0.8584, "step": 5185 }, { "epoch": 0.06, "learning_rate": 4.904220923022411e-05, "loss": 0.8872, "step": 5190 }, { "epoch": 0.06, "learning_rate": 4.9041286503085606e-05, "loss": 0.8696, "step": 5195 }, { "epoch": 0.06, "learning_rate": 4.904036377594709e-05, "loss": 0.8934, "step": 5200 }, { "epoch": 0.06, "learning_rate": 4.9039441048808575e-05, "loss": 0.9433, "step": 5205 }, { "epoch": 0.06, "learning_rate": 4.903851832167006e-05, "loss": 0.8438, "step": 5210 }, { "epoch": 0.06, "learning_rate": 4.903759559453156e-05, "loss": 0.8957, "step": 5215 }, { "epoch": 0.06, "learning_rate": 4.903667286739304e-05, "loss": 0.8354, "step": 5220 }, { "epoch": 0.06, "learning_rate": 4.9035750140254526e-05, "loss": 0.9566, "step": 5225 }, { "epoch": 0.06, "learning_rate": 4.9034827413116014e-05, "loss": 0.9191, "step": 5230 }, { "epoch": 0.06, "learning_rate": 4.90339046859775e-05, "loss": 0.9117, "step": 5235 }, { "epoch": 0.06, "learning_rate": 4.903298195883899e-05, "loss": 0.9091, "step": 5240 }, { "epoch": 0.06, "learning_rate": 4.903205923170048e-05, "loss": 0.8842, "step": 5245 }, { "epoch": 0.06, "learning_rate": 4.9031136504561966e-05, "loss": 0.9353, "step": 5250 }, { "epoch": 0.06, "learning_rate": 4.9030213777423454e-05, "loss": 0.8853, "step": 5255 }, { "epoch": 0.06, "learning_rate": 4.902929105028494e-05, "loss": 0.9281, "step": 5260 }, { "epoch": 0.06, "learning_rate": 4.902836832314643e-05, "loss": 0.9995, "step": 5265 }, { "epoch": 0.06, "learning_rate": 4.902744559600792e-05, "loss": 0.8843, "step": 5270 }, { "epoch": 0.06, "learning_rate": 4.90265228688694e-05, "loss": 0.893, "step": 5275 }, { "epoch": 0.06, "learning_rate": 4.902560014173089e-05, "loss": 0.9171, "step": 5280 }, { "epoch": 0.06, "learning_rate": 4.902467741459238e-05, "loss": 0.9369, "step": 5285 }, { "epoch": 0.06, "learning_rate": 4.902375468745387e-05, "loss": 0.8235, "step": 5290 }, { "epoch": 0.06, "learning_rate": 4.902283196031535e-05, "loss": 0.8694, "step": 5295 }, { "epoch": 0.06, "learning_rate": 4.9021909233176844e-05, "loss": 0.9253, "step": 5300 }, { "epoch": 0.06, "learning_rate": 4.902098650603833e-05, "loss": 0.8753, "step": 5305 }, { "epoch": 0.06, "learning_rate": 4.902006377889981e-05, "loss": 0.9217, "step": 5310 }, { "epoch": 0.06, "learning_rate": 4.90191410517613e-05, "loss": 0.8962, "step": 5315 }, { "epoch": 0.06, "learning_rate": 4.901821832462279e-05, "loss": 0.9004, "step": 5320 }, { "epoch": 0.06, "learning_rate": 4.9017295597484283e-05, "loss": 0.8533, "step": 5325 }, { "epoch": 0.06, "learning_rate": 4.9016372870345764e-05, "loss": 0.8576, "step": 5330 }, { "epoch": 0.06, "learning_rate": 4.901545014320725e-05, "loss": 0.9384, "step": 5335 }, { "epoch": 0.06, "learning_rate": 4.901452741606874e-05, "loss": 0.9076, "step": 5340 }, { "epoch": 0.06, "learning_rate": 4.901360468893023e-05, "loss": 0.8385, "step": 5345 }, { "epoch": 0.06, "learning_rate": 4.9012681961791716e-05, "loss": 0.895, "step": 5350 }, { "epoch": 0.06, "learning_rate": 4.9011759234653204e-05, "loss": 0.7924, "step": 5355 }, { "epoch": 0.06, "learning_rate": 4.901083650751469e-05, "loss": 0.8523, "step": 5360 }, { "epoch": 0.06, "learning_rate": 4.900991378037618e-05, "loss": 0.8446, "step": 5365 }, { "epoch": 0.06, "learning_rate": 4.900899105323767e-05, "loss": 0.8917, "step": 5370 }, { "epoch": 0.06, "learning_rate": 4.9008068326099155e-05, "loss": 0.9306, "step": 5375 }, { "epoch": 0.06, "learning_rate": 4.900714559896064e-05, "loss": 0.9382, "step": 5380 }, { "epoch": 0.06, "learning_rate": 4.900622287182213e-05, "loss": 0.9086, "step": 5385 }, { "epoch": 0.06, "learning_rate": 4.900530014468362e-05, "loss": 0.9176, "step": 5390 }, { "epoch": 0.06, "learning_rate": 4.9004377417545107e-05, "loss": 0.9228, "step": 5395 }, { "epoch": 0.06, "learning_rate": 4.9003454690406594e-05, "loss": 0.9057, "step": 5400 }, { "epoch": 0.06, "learning_rate": 4.9002531963268075e-05, "loss": 0.8586, "step": 5405 }, { "epoch": 0.06, "learning_rate": 4.900160923612957e-05, "loss": 0.9493, "step": 5410 }, { "epoch": 0.06, "learning_rate": 4.900068650899106e-05, "loss": 0.945, "step": 5415 }, { "epoch": 0.06, "learning_rate": 4.899976378185254e-05, "loss": 0.855, "step": 5420 }, { "epoch": 0.06, "learning_rate": 4.899884105471403e-05, "loss": 0.9014, "step": 5425 }, { "epoch": 0.06, "learning_rate": 4.899791832757552e-05, "loss": 0.8397, "step": 5430 }, { "epoch": 0.06, "learning_rate": 4.899699560043701e-05, "loss": 0.8587, "step": 5435 }, { "epoch": 0.06, "learning_rate": 4.899607287329849e-05, "loss": 0.8607, "step": 5440 }, { "epoch": 0.06, "learning_rate": 4.899515014615998e-05, "loss": 0.8714, "step": 5445 }, { "epoch": 0.06, "learning_rate": 4.899422741902147e-05, "loss": 0.912, "step": 5450 }, { "epoch": 0.06, "learning_rate": 4.899330469188296e-05, "loss": 0.8631, "step": 5455 }, { "epoch": 0.06, "learning_rate": 4.899238196474444e-05, "loss": 0.9048, "step": 5460 }, { "epoch": 0.06, "learning_rate": 4.899145923760593e-05, "loss": 0.8813, "step": 5465 }, { "epoch": 0.06, "learning_rate": 4.899053651046742e-05, "loss": 0.9489, "step": 5470 }, { "epoch": 0.06, "learning_rate": 4.8989613783328905e-05, "loss": 0.9079, "step": 5475 }, { "epoch": 0.06, "learning_rate": 4.898869105619039e-05, "loss": 0.8502, "step": 5480 }, { "epoch": 0.06, "learning_rate": 4.898776832905188e-05, "loss": 0.9507, "step": 5485 }, { "epoch": 0.06, "learning_rate": 4.898684560191337e-05, "loss": 0.9237, "step": 5490 }, { "epoch": 0.06, "learning_rate": 4.898592287477486e-05, "loss": 0.914, "step": 5495 }, { "epoch": 0.06, "learning_rate": 4.8985000147636345e-05, "loss": 0.9429, "step": 5500 }, { "epoch": 0.06, "learning_rate": 4.898407742049783e-05, "loss": 0.8818, "step": 5505 }, { "epoch": 0.06, "learning_rate": 4.898315469335932e-05, "loss": 0.9211, "step": 5510 }, { "epoch": 0.06, "learning_rate": 4.898223196622081e-05, "loss": 0.85, "step": 5515 }, { "epoch": 0.06, "learning_rate": 4.8981309239082296e-05, "loss": 0.9483, "step": 5520 }, { "epoch": 0.06, "learning_rate": 4.8980386511943784e-05, "loss": 0.9342, "step": 5525 }, { "epoch": 0.06, "learning_rate": 4.897946378480527e-05, "loss": 0.9223, "step": 5530 }, { "epoch": 0.06, "learning_rate": 4.897854105766676e-05, "loss": 0.8875, "step": 5535 }, { "epoch": 0.06, "learning_rate": 4.897761833052825e-05, "loss": 0.9304, "step": 5540 }, { "epoch": 0.06, "learning_rate": 4.8976695603389735e-05, "loss": 0.8226, "step": 5545 }, { "epoch": 0.06, "learning_rate": 4.8975772876251216e-05, "loss": 0.8934, "step": 5550 }, { "epoch": 0.06, "learning_rate": 4.8974850149112704e-05, "loss": 0.8758, "step": 5555 }, { "epoch": 0.06, "learning_rate": 4.89739274219742e-05, "loss": 0.9143, "step": 5560 }, { "epoch": 0.06, "learning_rate": 4.8973004694835687e-05, "loss": 0.8704, "step": 5565 }, { "epoch": 0.06, "learning_rate": 4.897208196769717e-05, "loss": 0.908, "step": 5570 }, { "epoch": 0.06, "learning_rate": 4.8971159240558656e-05, "loss": 0.8742, "step": 5575 }, { "epoch": 0.06, "learning_rate": 4.897023651342015e-05, "loss": 0.8953, "step": 5580 }, { "epoch": 0.06, "learning_rate": 4.896931378628163e-05, "loss": 0.8883, "step": 5585 }, { "epoch": 0.06, "learning_rate": 4.896839105914312e-05, "loss": 0.8217, "step": 5590 }, { "epoch": 0.06, "learning_rate": 4.896746833200461e-05, "loss": 0.8839, "step": 5595 }, { "epoch": 0.06, "learning_rate": 4.89665456048661e-05, "loss": 0.9427, "step": 5600 }, { "epoch": 0.06, "learning_rate": 4.896562287772758e-05, "loss": 0.8216, "step": 5605 }, { "epoch": 0.06, "learning_rate": 4.896470015058907e-05, "loss": 0.8622, "step": 5610 }, { "epoch": 0.06, "learning_rate": 4.896377742345056e-05, "loss": 0.8695, "step": 5615 }, { "epoch": 0.06, "learning_rate": 4.8962854696312046e-05, "loss": 0.8294, "step": 5620 }, { "epoch": 0.06, "learning_rate": 4.8961931969173534e-05, "loss": 0.8606, "step": 5625 }, { "epoch": 0.06, "learning_rate": 4.896100924203502e-05, "loss": 0.8064, "step": 5630 }, { "epoch": 0.06, "learning_rate": 4.896008651489651e-05, "loss": 0.8911, "step": 5635 }, { "epoch": 0.06, "learning_rate": 4.8959163787758e-05, "loss": 0.8633, "step": 5640 }, { "epoch": 0.06, "learning_rate": 4.8958241060619485e-05, "loss": 0.8751, "step": 5645 }, { "epoch": 0.06, "learning_rate": 4.895731833348097e-05, "loss": 0.9051, "step": 5650 }, { "epoch": 0.06, "learning_rate": 4.895639560634246e-05, "loss": 0.8696, "step": 5655 }, { "epoch": 0.06, "learning_rate": 4.895547287920394e-05, "loss": 0.8723, "step": 5660 }, { "epoch": 0.06, "learning_rate": 4.895455015206544e-05, "loss": 0.8922, "step": 5665 }, { "epoch": 0.06, "learning_rate": 4.8953627424926925e-05, "loss": 0.8894, "step": 5670 }, { "epoch": 0.06, "learning_rate": 4.895270469778841e-05, "loss": 0.875, "step": 5675 }, { "epoch": 0.06, "learning_rate": 4.8951781970649894e-05, "loss": 0.9329, "step": 5680 }, { "epoch": 0.06, "learning_rate": 4.895085924351139e-05, "loss": 0.8825, "step": 5685 }, { "epoch": 0.06, "learning_rate": 4.8949936516372876e-05, "loss": 0.8506, "step": 5690 }, { "epoch": 0.06, "learning_rate": 4.894901378923436e-05, "loss": 0.8837, "step": 5695 }, { "epoch": 0.06, "learning_rate": 4.8948091062095845e-05, "loss": 0.9642, "step": 5700 }, { "epoch": 0.06, "learning_rate": 4.894716833495733e-05, "loss": 0.9518, "step": 5705 }, { "epoch": 0.06, "learning_rate": 4.894624560781883e-05, "loss": 0.871, "step": 5710 }, { "epoch": 0.06, "learning_rate": 4.894532288068031e-05, "loss": 0.9208, "step": 5715 }, { "epoch": 0.06, "learning_rate": 4.8944400153541796e-05, "loss": 0.9369, "step": 5720 }, { "epoch": 0.06, "learning_rate": 4.8943477426403284e-05, "loss": 0.8998, "step": 5725 }, { "epoch": 0.06, "learning_rate": 4.894255469926477e-05, "loss": 0.9389, "step": 5730 }, { "epoch": 0.06, "learning_rate": 4.894163197212626e-05, "loss": 0.8206, "step": 5735 }, { "epoch": 0.06, "learning_rate": 4.894070924498775e-05, "loss": 0.8567, "step": 5740 }, { "epoch": 0.06, "learning_rate": 4.8939786517849236e-05, "loss": 0.8547, "step": 5745 }, { "epoch": 0.06, "learning_rate": 4.8938863790710723e-05, "loss": 0.8552, "step": 5750 }, { "epoch": 0.06, "learning_rate": 4.893794106357221e-05, "loss": 0.8923, "step": 5755 }, { "epoch": 0.06, "learning_rate": 4.89370183364337e-05, "loss": 0.8786, "step": 5760 }, { "epoch": 0.06, "learning_rate": 4.893609560929519e-05, "loss": 0.8718, "step": 5765 }, { "epoch": 0.06, "learning_rate": 4.893517288215667e-05, "loss": 0.8663, "step": 5770 }, { "epoch": 0.06, "learning_rate": 4.893425015501816e-05, "loss": 0.9305, "step": 5775 }, { "epoch": 0.06, "learning_rate": 4.893332742787965e-05, "loss": 0.9031, "step": 5780 }, { "epoch": 0.06, "learning_rate": 4.893240470074114e-05, "loss": 0.8963, "step": 5785 }, { "epoch": 0.06, "learning_rate": 4.893148197360262e-05, "loss": 0.8904, "step": 5790 }, { "epoch": 0.06, "learning_rate": 4.8930559246464114e-05, "loss": 0.8394, "step": 5795 }, { "epoch": 0.06, "learning_rate": 4.89296365193256e-05, "loss": 0.9017, "step": 5800 }, { "epoch": 0.06, "learning_rate": 4.892871379218709e-05, "loss": 0.8014, "step": 5805 }, { "epoch": 0.06, "learning_rate": 4.892779106504857e-05, "loss": 0.8585, "step": 5810 }, { "epoch": 0.06, "learning_rate": 4.8926868337910065e-05, "loss": 0.9223, "step": 5815 }, { "epoch": 0.06, "learning_rate": 4.892594561077155e-05, "loss": 0.9248, "step": 5820 }, { "epoch": 0.06, "learning_rate": 4.8925022883633034e-05, "loss": 0.8802, "step": 5825 }, { "epoch": 0.06, "learning_rate": 4.892410015649452e-05, "loss": 0.9418, "step": 5830 }, { "epoch": 0.06, "learning_rate": 4.892317742935602e-05, "loss": 0.803, "step": 5835 }, { "epoch": 0.06, "learning_rate": 4.8922254702217505e-05, "loss": 0.8821, "step": 5840 }, { "epoch": 0.06, "learning_rate": 4.8921331975078986e-05, "loss": 0.8328, "step": 5845 }, { "epoch": 0.06, "learning_rate": 4.8920409247940474e-05, "loss": 0.9353, "step": 5850 }, { "epoch": 0.06, "learning_rate": 4.891948652080196e-05, "loss": 0.8448, "step": 5855 }, { "epoch": 0.06, "learning_rate": 4.891856379366345e-05, "loss": 0.8947, "step": 5860 }, { "epoch": 0.06, "learning_rate": 4.891764106652494e-05, "loss": 0.8289, "step": 5865 }, { "epoch": 0.06, "learning_rate": 4.8916718339386425e-05, "loss": 0.8812, "step": 5870 }, { "epoch": 0.07, "learning_rate": 4.891579561224791e-05, "loss": 0.8658, "step": 5875 }, { "epoch": 0.07, "learning_rate": 4.89148728851094e-05, "loss": 0.9587, "step": 5880 }, { "epoch": 0.07, "learning_rate": 4.891395015797089e-05, "loss": 0.9796, "step": 5885 }, { "epoch": 0.07, "learning_rate": 4.8913027430832376e-05, "loss": 0.9171, "step": 5890 }, { "epoch": 0.07, "learning_rate": 4.8912104703693864e-05, "loss": 0.9035, "step": 5895 }, { "epoch": 0.07, "learning_rate": 4.891118197655535e-05, "loss": 0.8348, "step": 5900 }, { "epoch": 0.07, "learning_rate": 4.891025924941684e-05, "loss": 0.8706, "step": 5905 }, { "epoch": 0.07, "learning_rate": 4.890933652227833e-05, "loss": 0.9028, "step": 5910 }, { "epoch": 0.07, "learning_rate": 4.8908413795139816e-05, "loss": 0.8798, "step": 5915 }, { "epoch": 0.07, "learning_rate": 4.8907491068001304e-05, "loss": 0.8907, "step": 5920 }, { "epoch": 0.07, "learning_rate": 4.890656834086279e-05, "loss": 0.9193, "step": 5925 }, { "epoch": 0.07, "learning_rate": 4.890564561372428e-05, "loss": 0.8838, "step": 5930 }, { "epoch": 0.07, "learning_rate": 4.890472288658576e-05, "loss": 0.839, "step": 5935 }, { "epoch": 0.07, "learning_rate": 4.890380015944725e-05, "loss": 0.867, "step": 5940 }, { "epoch": 0.07, "learning_rate": 4.890287743230874e-05, "loss": 0.8543, "step": 5945 }, { "epoch": 0.07, "learning_rate": 4.890195470517023e-05, "loss": 0.9233, "step": 5950 }, { "epoch": 0.07, "learning_rate": 4.890103197803171e-05, "loss": 0.8635, "step": 5955 }, { "epoch": 0.07, "learning_rate": 4.89001092508932e-05, "loss": 0.8592, "step": 5960 }, { "epoch": 0.07, "learning_rate": 4.8899186523754694e-05, "loss": 0.8889, "step": 5965 }, { "epoch": 0.07, "learning_rate": 4.8898263796616175e-05, "loss": 0.8379, "step": 5970 }, { "epoch": 0.07, "learning_rate": 4.889734106947766e-05, "loss": 0.9616, "step": 5975 }, { "epoch": 0.07, "learning_rate": 4.889641834233915e-05, "loss": 0.9359, "step": 5980 }, { "epoch": 0.07, "learning_rate": 4.8895495615200646e-05, "loss": 0.9672, "step": 5985 }, { "epoch": 0.07, "learning_rate": 4.889457288806213e-05, "loss": 0.9274, "step": 5990 }, { "epoch": 0.07, "learning_rate": 4.8893650160923614e-05, "loss": 0.915, "step": 5995 }, { "epoch": 0.07, "learning_rate": 4.88927274337851e-05, "loss": 0.8449, "step": 6000 }, { "epoch": 0.07, "eval_loss": 0.8220890164375305, "eval_runtime": 70.1144, "eval_samples_per_second": 28.525, "eval_steps_per_second": 14.262, "step": 6000 }, { "epoch": 0.07, "learning_rate": 4.889180470664659e-05, "loss": 0.8242, "step": 6005 }, { "epoch": 0.07, "learning_rate": 4.889088197950808e-05, "loss": 0.8429, "step": 6010 }, { "epoch": 0.07, "learning_rate": 4.8889959252369566e-05, "loss": 0.9295, "step": 6015 }, { "epoch": 0.07, "learning_rate": 4.8889036525231054e-05, "loss": 0.8572, "step": 6020 }, { "epoch": 0.07, "learning_rate": 4.888811379809254e-05, "loss": 0.9067, "step": 6025 }, { "epoch": 0.07, "learning_rate": 4.888719107095403e-05, "loss": 0.9513, "step": 6030 }, { "epoch": 0.07, "learning_rate": 4.888626834381552e-05, "loss": 0.8168, "step": 6035 }, { "epoch": 0.07, "learning_rate": 4.8885345616677005e-05, "loss": 0.8507, "step": 6040 }, { "epoch": 0.07, "learning_rate": 4.8884422889538486e-05, "loss": 0.8629, "step": 6045 }, { "epoch": 0.07, "learning_rate": 4.888350016239998e-05, "loss": 0.8118, "step": 6050 }, { "epoch": 0.07, "learning_rate": 4.888257743526147e-05, "loss": 0.8864, "step": 6055 }, { "epoch": 0.07, "learning_rate": 4.8881654708122957e-05, "loss": 0.8684, "step": 6060 }, { "epoch": 0.07, "learning_rate": 4.888073198098444e-05, "loss": 0.8074, "step": 6065 }, { "epoch": 0.07, "learning_rate": 4.887980925384593e-05, "loss": 0.8784, "step": 6070 }, { "epoch": 0.07, "learning_rate": 4.887888652670742e-05, "loss": 0.9298, "step": 6075 }, { "epoch": 0.07, "learning_rate": 4.88779637995689e-05, "loss": 0.8719, "step": 6080 }, { "epoch": 0.07, "learning_rate": 4.887704107243039e-05, "loss": 0.8743, "step": 6085 }, { "epoch": 0.07, "learning_rate": 4.887611834529188e-05, "loss": 1.0018, "step": 6090 }, { "epoch": 0.07, "learning_rate": 4.887519561815337e-05, "loss": 0.8464, "step": 6095 }, { "epoch": 0.07, "learning_rate": 4.887427289101485e-05, "loss": 0.9178, "step": 6100 }, { "epoch": 0.07, "learning_rate": 4.887335016387634e-05, "loss": 0.8929, "step": 6105 }, { "epoch": 0.07, "learning_rate": 4.887242743673783e-05, "loss": 0.9007, "step": 6110 }, { "epoch": 0.07, "learning_rate": 4.887150470959932e-05, "loss": 0.8701, "step": 6115 }, { "epoch": 0.07, "learning_rate": 4.8870581982460804e-05, "loss": 0.9428, "step": 6120 }, { "epoch": 0.07, "learning_rate": 4.886965925532229e-05, "loss": 0.8407, "step": 6125 }, { "epoch": 0.07, "learning_rate": 4.886873652818378e-05, "loss": 0.9924, "step": 6130 }, { "epoch": 0.07, "learning_rate": 4.886781380104527e-05, "loss": 0.8561, "step": 6135 }, { "epoch": 0.07, "learning_rate": 4.8866891073906755e-05, "loss": 0.8397, "step": 6140 }, { "epoch": 0.07, "learning_rate": 4.886596834676824e-05, "loss": 0.8991, "step": 6145 }, { "epoch": 0.07, "learning_rate": 4.886504561962973e-05, "loss": 0.9015, "step": 6150 }, { "epoch": 0.07, "learning_rate": 4.886412289249121e-05, "loss": 0.8914, "step": 6155 }, { "epoch": 0.07, "learning_rate": 4.886320016535271e-05, "loss": 0.892, "step": 6160 }, { "epoch": 0.07, "learning_rate": 4.8862277438214195e-05, "loss": 0.8885, "step": 6165 }, { "epoch": 0.07, "learning_rate": 4.886135471107568e-05, "loss": 0.9135, "step": 6170 }, { "epoch": 0.07, "learning_rate": 4.8860431983937163e-05, "loss": 0.9048, "step": 6175 }, { "epoch": 0.07, "learning_rate": 4.885950925679866e-05, "loss": 0.8672, "step": 6180 }, { "epoch": 0.07, "learning_rate": 4.8858586529660146e-05, "loss": 0.9286, "step": 6185 }, { "epoch": 0.07, "learning_rate": 4.8857663802521634e-05, "loss": 0.8873, "step": 6190 }, { "epoch": 0.07, "learning_rate": 4.8856741075383115e-05, "loss": 0.9098, "step": 6195 }, { "epoch": 0.07, "learning_rate": 4.885581834824461e-05, "loss": 0.8618, "step": 6200 }, { "epoch": 0.07, "learning_rate": 4.88548956211061e-05, "loss": 0.8678, "step": 6205 }, { "epoch": 0.07, "learning_rate": 4.885397289396758e-05, "loss": 0.9239, "step": 6210 }, { "epoch": 0.07, "learning_rate": 4.8853050166829066e-05, "loss": 0.861, "step": 6215 }, { "epoch": 0.07, "learning_rate": 4.885212743969056e-05, "loss": 0.8855, "step": 6220 }, { "epoch": 0.07, "learning_rate": 4.885120471255205e-05, "loss": 0.8748, "step": 6225 }, { "epoch": 0.07, "learning_rate": 4.885028198541353e-05, "loss": 0.9315, "step": 6230 }, { "epoch": 0.07, "learning_rate": 4.884935925827502e-05, "loss": 0.8764, "step": 6235 }, { "epoch": 0.07, "learning_rate": 4.8848436531136506e-05, "loss": 0.8874, "step": 6240 }, { "epoch": 0.07, "learning_rate": 4.884751380399799e-05, "loss": 0.8482, "step": 6245 }, { "epoch": 0.07, "learning_rate": 4.884659107685948e-05, "loss": 0.9488, "step": 6250 }, { "epoch": 0.07, "learning_rate": 4.884566834972097e-05, "loss": 0.8245, "step": 6255 }, { "epoch": 0.07, "learning_rate": 4.884474562258246e-05, "loss": 0.8865, "step": 6260 }, { "epoch": 0.07, "learning_rate": 4.8843822895443945e-05, "loss": 0.8748, "step": 6265 }, { "epoch": 0.07, "learning_rate": 4.884290016830543e-05, "loss": 0.9228, "step": 6270 }, { "epoch": 0.07, "learning_rate": 4.884197744116692e-05, "loss": 0.9124, "step": 6275 }, { "epoch": 0.07, "learning_rate": 4.884105471402841e-05, "loss": 0.9303, "step": 6280 }, { "epoch": 0.07, "learning_rate": 4.8840131986889896e-05, "loss": 0.8691, "step": 6285 }, { "epoch": 0.07, "learning_rate": 4.8839209259751384e-05, "loss": 0.9423, "step": 6290 }, { "epoch": 0.07, "learning_rate": 4.883828653261287e-05, "loss": 0.8706, "step": 6295 }, { "epoch": 0.07, "learning_rate": 4.883736380547436e-05, "loss": 0.8561, "step": 6300 }, { "epoch": 0.07, "learning_rate": 4.883644107833584e-05, "loss": 0.901, "step": 6305 }, { "epoch": 0.07, "learning_rate": 4.8835518351197335e-05, "loss": 0.9594, "step": 6310 }, { "epoch": 0.07, "learning_rate": 4.883459562405882e-05, "loss": 0.8881, "step": 6315 }, { "epoch": 0.07, "learning_rate": 4.8833672896920304e-05, "loss": 0.9071, "step": 6320 }, { "epoch": 0.07, "learning_rate": 4.883275016978179e-05, "loss": 0.7464, "step": 6325 }, { "epoch": 0.07, "learning_rate": 4.883182744264329e-05, "loss": 0.8703, "step": 6330 }, { "epoch": 0.07, "learning_rate": 4.8830904715504775e-05, "loss": 0.8583, "step": 6335 }, { "epoch": 0.07, "learning_rate": 4.8829981988366256e-05, "loss": 0.8819, "step": 6340 }, { "epoch": 0.07, "learning_rate": 4.8829059261227744e-05, "loss": 0.8322, "step": 6345 }, { "epoch": 0.07, "learning_rate": 4.882813653408924e-05, "loss": 0.8545, "step": 6350 }, { "epoch": 0.07, "learning_rate": 4.882721380695072e-05, "loss": 0.8724, "step": 6355 }, { "epoch": 0.07, "learning_rate": 4.882629107981221e-05, "loss": 0.8431, "step": 6360 }, { "epoch": 0.07, "learning_rate": 4.8825368352673695e-05, "loss": 0.9254, "step": 6365 }, { "epoch": 0.07, "learning_rate": 4.882444562553519e-05, "loss": 0.9122, "step": 6370 }, { "epoch": 0.07, "learning_rate": 4.882352289839667e-05, "loss": 0.9283, "step": 6375 }, { "epoch": 0.07, "learning_rate": 4.882260017125816e-05, "loss": 0.8667, "step": 6380 }, { "epoch": 0.07, "learning_rate": 4.8821677444119646e-05, "loss": 0.8908, "step": 6385 }, { "epoch": 0.07, "learning_rate": 4.8820754716981134e-05, "loss": 0.9203, "step": 6390 }, { "epoch": 0.07, "learning_rate": 4.881983198984262e-05, "loss": 0.8968, "step": 6395 }, { "epoch": 0.07, "learning_rate": 4.881890926270411e-05, "loss": 0.9064, "step": 6400 }, { "epoch": 0.07, "learning_rate": 4.88179865355656e-05, "loss": 0.9444, "step": 6405 }, { "epoch": 0.07, "learning_rate": 4.8817063808427086e-05, "loss": 0.8666, "step": 6410 }, { "epoch": 0.07, "learning_rate": 4.8816141081288573e-05, "loss": 0.8777, "step": 6415 }, { "epoch": 0.07, "learning_rate": 4.881521835415006e-05, "loss": 0.8392, "step": 6420 }, { "epoch": 0.07, "learning_rate": 4.881429562701155e-05, "loss": 0.8658, "step": 6425 }, { "epoch": 0.07, "learning_rate": 4.881337289987303e-05, "loss": 0.863, "step": 6430 }, { "epoch": 0.07, "learning_rate": 4.8812450172734525e-05, "loss": 0.8874, "step": 6435 }, { "epoch": 0.07, "learning_rate": 4.881152744559601e-05, "loss": 0.874, "step": 6440 }, { "epoch": 0.07, "learning_rate": 4.88106047184575e-05, "loss": 0.8783, "step": 6445 }, { "epoch": 0.07, "learning_rate": 4.880968199131898e-05, "loss": 0.9069, "step": 6450 }, { "epoch": 0.07, "learning_rate": 4.880875926418047e-05, "loss": 0.8905, "step": 6455 }, { "epoch": 0.07, "learning_rate": 4.8807836537041964e-05, "loss": 0.8575, "step": 6460 }, { "epoch": 0.07, "learning_rate": 4.8806913809903445e-05, "loss": 0.8568, "step": 6465 }, { "epoch": 0.07, "learning_rate": 4.880599108276493e-05, "loss": 0.8689, "step": 6470 }, { "epoch": 0.07, "learning_rate": 4.880506835562642e-05, "loss": 0.8513, "step": 6475 }, { "epoch": 0.07, "learning_rate": 4.8804145628487915e-05, "loss": 0.9118, "step": 6480 }, { "epoch": 0.07, "learning_rate": 4.8803222901349397e-05, "loss": 0.836, "step": 6485 }, { "epoch": 0.07, "learning_rate": 4.8802300174210884e-05, "loss": 0.9286, "step": 6490 }, { "epoch": 0.07, "learning_rate": 4.880137744707237e-05, "loss": 0.9158, "step": 6495 }, { "epoch": 0.07, "learning_rate": 4.880045471993387e-05, "loss": 0.9136, "step": 6500 }, { "epoch": 0.07, "learning_rate": 4.879953199279535e-05, "loss": 0.806, "step": 6505 }, { "epoch": 0.07, "learning_rate": 4.8798609265656836e-05, "loss": 0.8461, "step": 6510 }, { "epoch": 0.07, "learning_rate": 4.8797686538518324e-05, "loss": 0.8565, "step": 6515 }, { "epoch": 0.07, "learning_rate": 4.879676381137981e-05, "loss": 0.9234, "step": 6520 }, { "epoch": 0.07, "learning_rate": 4.87958410842413e-05, "loss": 0.8545, "step": 6525 }, { "epoch": 0.07, "learning_rate": 4.879491835710279e-05, "loss": 0.8505, "step": 6530 }, { "epoch": 0.07, "learning_rate": 4.8793995629964275e-05, "loss": 0.9127, "step": 6535 }, { "epoch": 0.07, "learning_rate": 4.8793072902825756e-05, "loss": 0.859, "step": 6540 }, { "epoch": 0.07, "learning_rate": 4.879215017568725e-05, "loss": 0.8167, "step": 6545 }, { "epoch": 0.07, "learning_rate": 4.879122744854874e-05, "loss": 0.8899, "step": 6550 }, { "epoch": 0.07, "learning_rate": 4.8790304721410226e-05, "loss": 0.868, "step": 6555 }, { "epoch": 0.07, "learning_rate": 4.878938199427171e-05, "loss": 0.9013, "step": 6560 }, { "epoch": 0.07, "learning_rate": 4.87884592671332e-05, "loss": 0.7993, "step": 6565 }, { "epoch": 0.07, "learning_rate": 4.878753653999469e-05, "loss": 0.8382, "step": 6570 }, { "epoch": 0.07, "learning_rate": 4.878661381285618e-05, "loss": 0.8813, "step": 6575 }, { "epoch": 0.07, "learning_rate": 4.878569108571766e-05, "loss": 0.9037, "step": 6580 }, { "epoch": 0.07, "learning_rate": 4.8784768358579154e-05, "loss": 0.8981, "step": 6585 }, { "epoch": 0.07, "learning_rate": 4.878384563144064e-05, "loss": 0.8845, "step": 6590 }, { "epoch": 0.07, "learning_rate": 4.878292290430212e-05, "loss": 0.918, "step": 6595 }, { "epoch": 0.07, "learning_rate": 4.878200017716361e-05, "loss": 0.8705, "step": 6600 }, { "epoch": 0.07, "learning_rate": 4.87810774500251e-05, "loss": 0.8368, "step": 6605 }, { "epoch": 0.07, "learning_rate": 4.878015472288659e-05, "loss": 0.8554, "step": 6610 }, { "epoch": 0.07, "learning_rate": 4.8779231995748074e-05, "loss": 0.8437, "step": 6615 }, { "epoch": 0.07, "learning_rate": 4.877830926860956e-05, "loss": 0.9008, "step": 6620 }, { "epoch": 0.07, "learning_rate": 4.877738654147105e-05, "loss": 0.8659, "step": 6625 }, { "epoch": 0.07, "learning_rate": 4.877646381433254e-05, "loss": 0.8846, "step": 6630 }, { "epoch": 0.07, "learning_rate": 4.8775541087194025e-05, "loss": 0.9219, "step": 6635 }, { "epoch": 0.07, "learning_rate": 4.877461836005551e-05, "loss": 0.8847, "step": 6640 }, { "epoch": 0.07, "learning_rate": 4.8773695632917e-05, "loss": 0.8711, "step": 6645 }, { "epoch": 0.07, "learning_rate": 4.877277290577849e-05, "loss": 0.9222, "step": 6650 }, { "epoch": 0.07, "learning_rate": 4.877185017863998e-05, "loss": 0.8318, "step": 6655 }, { "epoch": 0.07, "learning_rate": 4.8770927451501464e-05, "loss": 0.8844, "step": 6660 }, { "epoch": 0.07, "learning_rate": 4.877000472436295e-05, "loss": 0.8859, "step": 6665 }, { "epoch": 0.07, "learning_rate": 4.876908199722444e-05, "loss": 0.8697, "step": 6670 }, { "epoch": 0.07, "learning_rate": 4.876815927008593e-05, "loss": 0.9011, "step": 6675 }, { "epoch": 0.07, "learning_rate": 4.8767236542947416e-05, "loss": 0.879, "step": 6680 }, { "epoch": 0.07, "learning_rate": 4.8766313815808904e-05, "loss": 0.8562, "step": 6685 }, { "epoch": 0.07, "learning_rate": 4.8765391088670385e-05, "loss": 0.9107, "step": 6690 }, { "epoch": 0.07, "learning_rate": 4.876446836153188e-05, "loss": 0.8331, "step": 6695 }, { "epoch": 0.07, "learning_rate": 4.876354563439337e-05, "loss": 0.9326, "step": 6700 }, { "epoch": 0.07, "learning_rate": 4.876262290725485e-05, "loss": 0.9311, "step": 6705 }, { "epoch": 0.07, "learning_rate": 4.8761700180116336e-05, "loss": 0.8443, "step": 6710 }, { "epoch": 0.07, "learning_rate": 4.876077745297783e-05, "loss": 0.8656, "step": 6715 }, { "epoch": 0.07, "learning_rate": 4.875985472583932e-05, "loss": 0.9524, "step": 6720 }, { "epoch": 0.07, "learning_rate": 4.87589319987008e-05, "loss": 0.8529, "step": 6725 }, { "epoch": 0.07, "learning_rate": 4.875800927156229e-05, "loss": 0.9744, "step": 6730 }, { "epoch": 0.07, "learning_rate": 4.875708654442378e-05, "loss": 0.8598, "step": 6735 }, { "epoch": 0.07, "learning_rate": 4.875616381728526e-05, "loss": 0.8624, "step": 6740 }, { "epoch": 0.07, "learning_rate": 4.875524109014675e-05, "loss": 0.853, "step": 6745 }, { "epoch": 0.07, "learning_rate": 4.875431836300824e-05, "loss": 0.8434, "step": 6750 }, { "epoch": 0.07, "learning_rate": 4.8753395635869734e-05, "loss": 0.8418, "step": 6755 }, { "epoch": 0.07, "learning_rate": 4.8752472908731215e-05, "loss": 0.9171, "step": 6760 }, { "epoch": 0.07, "learning_rate": 4.87515501815927e-05, "loss": 0.8112, "step": 6765 }, { "epoch": 0.07, "learning_rate": 4.875062745445419e-05, "loss": 0.8232, "step": 6770 }, { "epoch": 0.08, "learning_rate": 4.874970472731568e-05, "loss": 0.8444, "step": 6775 }, { "epoch": 0.08, "learning_rate": 4.8748782000177166e-05, "loss": 0.8826, "step": 6780 }, { "epoch": 0.08, "learning_rate": 4.8747859273038654e-05, "loss": 0.8881, "step": 6785 }, { "epoch": 0.08, "learning_rate": 4.874693654590014e-05, "loss": 0.8685, "step": 6790 }, { "epoch": 0.08, "learning_rate": 4.874601381876163e-05, "loss": 0.841, "step": 6795 }, { "epoch": 0.08, "learning_rate": 4.874509109162312e-05, "loss": 0.8076, "step": 6800 }, { "epoch": 0.08, "learning_rate": 4.8744168364484605e-05, "loss": 0.8707, "step": 6805 }, { "epoch": 0.08, "learning_rate": 4.874324563734609e-05, "loss": 0.9236, "step": 6810 }, { "epoch": 0.08, "learning_rate": 4.8742322910207574e-05, "loss": 0.8941, "step": 6815 }, { "epoch": 0.08, "learning_rate": 4.874140018306907e-05, "loss": 0.8494, "step": 6820 }, { "epoch": 0.08, "learning_rate": 4.874047745593056e-05, "loss": 0.9204, "step": 6825 }, { "epoch": 0.08, "learning_rate": 4.8739554728792045e-05, "loss": 0.8384, "step": 6830 }, { "epoch": 0.08, "learning_rate": 4.8738632001653526e-05, "loss": 0.885, "step": 6835 }, { "epoch": 0.08, "learning_rate": 4.8737709274515013e-05, "loss": 0.9168, "step": 6840 }, { "epoch": 0.08, "learning_rate": 4.873678654737651e-05, "loss": 0.9201, "step": 6845 }, { "epoch": 0.08, "learning_rate": 4.873586382023799e-05, "loss": 0.8758, "step": 6850 }, { "epoch": 0.08, "learning_rate": 4.873494109309948e-05, "loss": 0.8967, "step": 6855 }, { "epoch": 0.08, "learning_rate": 4.8734018365960965e-05, "loss": 0.7955, "step": 6860 }, { "epoch": 0.08, "learning_rate": 4.873309563882246e-05, "loss": 0.8962, "step": 6865 }, { "epoch": 0.08, "learning_rate": 4.873217291168394e-05, "loss": 0.9588, "step": 6870 }, { "epoch": 0.08, "learning_rate": 4.873125018454543e-05, "loss": 0.8768, "step": 6875 }, { "epoch": 0.08, "learning_rate": 4.8730327457406916e-05, "loss": 0.8436, "step": 6880 }, { "epoch": 0.08, "learning_rate": 4.872940473026841e-05, "loss": 0.8063, "step": 6885 }, { "epoch": 0.08, "learning_rate": 4.872848200312989e-05, "loss": 0.8386, "step": 6890 }, { "epoch": 0.08, "learning_rate": 4.872755927599138e-05, "loss": 0.9176, "step": 6895 }, { "epoch": 0.08, "learning_rate": 4.872663654885287e-05, "loss": 0.8911, "step": 6900 }, { "epoch": 0.08, "learning_rate": 4.8725713821714355e-05, "loss": 0.8488, "step": 6905 }, { "epoch": 0.08, "learning_rate": 4.872479109457584e-05, "loss": 0.8909, "step": 6910 }, { "epoch": 0.08, "learning_rate": 4.872386836743733e-05, "loss": 0.9231, "step": 6915 }, { "epoch": 0.08, "learning_rate": 4.872294564029882e-05, "loss": 0.8596, "step": 6920 }, { "epoch": 0.08, "learning_rate": 4.87220229131603e-05, "loss": 0.8431, "step": 6925 }, { "epoch": 0.08, "learning_rate": 4.8721100186021795e-05, "loss": 0.8771, "step": 6930 }, { "epoch": 0.08, "learning_rate": 4.872017745888328e-05, "loss": 0.8405, "step": 6935 }, { "epoch": 0.08, "learning_rate": 4.871925473174477e-05, "loss": 0.8209, "step": 6940 }, { "epoch": 0.08, "learning_rate": 4.871833200460625e-05, "loss": 0.862, "step": 6945 }, { "epoch": 0.08, "learning_rate": 4.8717409277467746e-05, "loss": 0.7882, "step": 6950 }, { "epoch": 0.08, "learning_rate": 4.8716486550329234e-05, "loss": 0.8678, "step": 6955 }, { "epoch": 0.08, "learning_rate": 4.871556382319072e-05, "loss": 0.875, "step": 6960 }, { "epoch": 0.08, "learning_rate": 4.87146410960522e-05, "loss": 0.8558, "step": 6965 }, { "epoch": 0.08, "learning_rate": 4.87137183689137e-05, "loss": 0.8716, "step": 6970 }, { "epoch": 0.08, "learning_rate": 4.8712795641775185e-05, "loss": 0.8993, "step": 6975 }, { "epoch": 0.08, "learning_rate": 4.8711872914636666e-05, "loss": 0.8781, "step": 6980 }, { "epoch": 0.08, "learning_rate": 4.8710950187498154e-05, "loss": 0.8145, "step": 6985 }, { "epoch": 0.08, "learning_rate": 4.871002746035964e-05, "loss": 0.8453, "step": 6990 }, { "epoch": 0.08, "learning_rate": 4.870910473322114e-05, "loss": 0.8615, "step": 6995 }, { "epoch": 0.08, "learning_rate": 4.870818200608262e-05, "loss": 0.8681, "step": 7000 }, { "epoch": 0.08, "eval_loss": 0.8331711292266846, "eval_runtime": 70.3468, "eval_samples_per_second": 28.431, "eval_steps_per_second": 14.215, "step": 7000 }, { "epoch": 0.08, "learning_rate": 4.8707259278944106e-05, "loss": 0.8834, "step": 7005 }, { "epoch": 0.08, "learning_rate": 4.8706336551805594e-05, "loss": 0.8261, "step": 7010 }, { "epoch": 0.08, "learning_rate": 4.870541382466708e-05, "loss": 0.8425, "step": 7015 }, { "epoch": 0.08, "learning_rate": 4.870449109752857e-05, "loss": 0.8738, "step": 7020 }, { "epoch": 0.08, "learning_rate": 4.870356837039006e-05, "loss": 0.9181, "step": 7025 }, { "epoch": 0.08, "learning_rate": 4.8702645643251545e-05, "loss": 0.8509, "step": 7030 }, { "epoch": 0.08, "learning_rate": 4.870172291611303e-05, "loss": 0.8368, "step": 7035 }, { "epoch": 0.08, "learning_rate": 4.870080018897452e-05, "loss": 0.8992, "step": 7040 }, { "epoch": 0.08, "learning_rate": 4.869987746183601e-05, "loss": 0.8105, "step": 7045 }, { "epoch": 0.08, "learning_rate": 4.8698954734697496e-05, "loss": 0.8822, "step": 7050 }, { "epoch": 0.08, "learning_rate": 4.8698032007558984e-05, "loss": 0.8576, "step": 7055 }, { "epoch": 0.08, "learning_rate": 4.869710928042047e-05, "loss": 0.8531, "step": 7060 }, { "epoch": 0.08, "learning_rate": 4.869618655328196e-05, "loss": 0.9155, "step": 7065 }, { "epoch": 0.08, "learning_rate": 4.869526382614345e-05, "loss": 0.8972, "step": 7070 }, { "epoch": 0.08, "learning_rate": 4.869434109900493e-05, "loss": 0.8734, "step": 7075 }, { "epoch": 0.08, "learning_rate": 4.8693418371866423e-05, "loss": 0.8533, "step": 7080 }, { "epoch": 0.08, "learning_rate": 4.869249564472791e-05, "loss": 0.8704, "step": 7085 }, { "epoch": 0.08, "learning_rate": 4.869157291758939e-05, "loss": 0.847, "step": 7090 }, { "epoch": 0.08, "learning_rate": 4.869065019045088e-05, "loss": 0.9486, "step": 7095 }, { "epoch": 0.08, "learning_rate": 4.8689727463312375e-05, "loss": 0.8842, "step": 7100 }, { "epoch": 0.08, "learning_rate": 4.868880473617386e-05, "loss": 0.8878, "step": 7105 }, { "epoch": 0.08, "learning_rate": 4.8687882009035344e-05, "loss": 0.8889, "step": 7110 }, { "epoch": 0.08, "learning_rate": 4.868695928189683e-05, "loss": 0.8683, "step": 7115 }, { "epoch": 0.08, "learning_rate": 4.8686036554758326e-05, "loss": 0.882, "step": 7120 }, { "epoch": 0.08, "learning_rate": 4.868511382761981e-05, "loss": 0.8221, "step": 7125 }, { "epoch": 0.08, "learning_rate": 4.8684191100481295e-05, "loss": 0.8227, "step": 7130 }, { "epoch": 0.08, "learning_rate": 4.868326837334278e-05, "loss": 0.9101, "step": 7135 }, { "epoch": 0.08, "learning_rate": 4.868234564620427e-05, "loss": 0.8734, "step": 7140 }, { "epoch": 0.08, "learning_rate": 4.868142291906576e-05, "loss": 0.9104, "step": 7145 }, { "epoch": 0.08, "learning_rate": 4.8680500191927247e-05, "loss": 0.8033, "step": 7150 }, { "epoch": 0.08, "learning_rate": 4.8679577464788734e-05, "loss": 0.8666, "step": 7155 }, { "epoch": 0.08, "learning_rate": 4.867865473765022e-05, "loss": 0.8947, "step": 7160 }, { "epoch": 0.08, "learning_rate": 4.867773201051171e-05, "loss": 0.8393, "step": 7165 }, { "epoch": 0.08, "learning_rate": 4.86768092833732e-05, "loss": 0.9448, "step": 7170 }, { "epoch": 0.08, "learning_rate": 4.8675886556234686e-05, "loss": 0.9058, "step": 7175 }, { "epoch": 0.08, "learning_rate": 4.8674963829096174e-05, "loss": 0.8163, "step": 7180 }, { "epoch": 0.08, "learning_rate": 4.867404110195766e-05, "loss": 0.8427, "step": 7185 }, { "epoch": 0.08, "learning_rate": 4.867311837481915e-05, "loss": 0.8959, "step": 7190 }, { "epoch": 0.08, "learning_rate": 4.867219564768064e-05, "loss": 0.8856, "step": 7195 }, { "epoch": 0.08, "learning_rate": 4.867127292054212e-05, "loss": 0.8617, "step": 7200 }, { "epoch": 0.08, "learning_rate": 4.867035019340361e-05, "loss": 0.9349, "step": 7205 }, { "epoch": 0.08, "learning_rate": 4.86694274662651e-05, "loss": 0.8842, "step": 7210 }, { "epoch": 0.08, "learning_rate": 4.866850473912659e-05, "loss": 0.8867, "step": 7215 }, { "epoch": 0.08, "learning_rate": 4.866758201198807e-05, "loss": 0.9067, "step": 7220 }, { "epoch": 0.08, "learning_rate": 4.866665928484956e-05, "loss": 0.8915, "step": 7225 }, { "epoch": 0.08, "learning_rate": 4.866573655771105e-05, "loss": 0.8906, "step": 7230 }, { "epoch": 0.08, "learning_rate": 4.866481383057253e-05, "loss": 0.879, "step": 7235 }, { "epoch": 0.08, "learning_rate": 4.866389110343402e-05, "loss": 0.8796, "step": 7240 }, { "epoch": 0.08, "learning_rate": 4.866296837629551e-05, "loss": 0.9174, "step": 7245 }, { "epoch": 0.08, "learning_rate": 4.8662045649157004e-05, "loss": 0.8704, "step": 7250 }, { "epoch": 0.08, "learning_rate": 4.8661122922018485e-05, "loss": 0.8661, "step": 7255 }, { "epoch": 0.08, "learning_rate": 4.866020019487997e-05, "loss": 0.9271, "step": 7260 }, { "epoch": 0.08, "learning_rate": 4.865927746774146e-05, "loss": 0.8648, "step": 7265 }, { "epoch": 0.08, "learning_rate": 4.8658354740602955e-05, "loss": 0.8309, "step": 7270 }, { "epoch": 0.08, "learning_rate": 4.8657432013464436e-05, "loss": 0.8794, "step": 7275 }, { "epoch": 0.08, "learning_rate": 4.8656509286325924e-05, "loss": 0.8686, "step": 7280 }, { "epoch": 0.08, "learning_rate": 4.865558655918741e-05, "loss": 0.915, "step": 7285 }, { "epoch": 0.08, "learning_rate": 4.86546638320489e-05, "loss": 0.8311, "step": 7290 }, { "epoch": 0.08, "learning_rate": 4.865374110491039e-05, "loss": 0.909, "step": 7295 }, { "epoch": 0.08, "learning_rate": 4.8652818377771875e-05, "loss": 0.824, "step": 7300 }, { "epoch": 0.08, "learning_rate": 4.865189565063336e-05, "loss": 0.8677, "step": 7305 }, { "epoch": 0.08, "learning_rate": 4.8650972923494844e-05, "loss": 0.9275, "step": 7310 }, { "epoch": 0.08, "learning_rate": 4.865005019635634e-05, "loss": 0.8371, "step": 7315 }, { "epoch": 0.08, "learning_rate": 4.8649127469217827e-05, "loss": 0.9153, "step": 7320 }, { "epoch": 0.08, "learning_rate": 4.8648204742079314e-05, "loss": 0.8453, "step": 7325 }, { "epoch": 0.08, "learning_rate": 4.8647282014940796e-05, "loss": 0.9143, "step": 7330 }, { "epoch": 0.08, "learning_rate": 4.864635928780229e-05, "loss": 0.8618, "step": 7335 }, { "epoch": 0.08, "learning_rate": 4.864543656066378e-05, "loss": 0.8367, "step": 7340 }, { "epoch": 0.08, "learning_rate": 4.8644513833525266e-05, "loss": 0.8529, "step": 7345 }, { "epoch": 0.08, "learning_rate": 4.864359110638675e-05, "loss": 0.8681, "step": 7350 }, { "epoch": 0.08, "learning_rate": 4.864266837924824e-05, "loss": 0.8794, "step": 7355 }, { "epoch": 0.08, "learning_rate": 4.864174565210973e-05, "loss": 0.8872, "step": 7360 }, { "epoch": 0.08, "learning_rate": 4.864082292497121e-05, "loss": 0.8851, "step": 7365 }, { "epoch": 0.08, "learning_rate": 4.86399001978327e-05, "loss": 0.909, "step": 7370 }, { "epoch": 0.08, "learning_rate": 4.8638977470694186e-05, "loss": 0.8663, "step": 7375 }, { "epoch": 0.08, "learning_rate": 4.863805474355568e-05, "loss": 0.826, "step": 7380 }, { "epoch": 0.08, "learning_rate": 4.863713201641716e-05, "loss": 0.8494, "step": 7385 }, { "epoch": 0.08, "learning_rate": 4.863620928927865e-05, "loss": 0.9306, "step": 7390 }, { "epoch": 0.08, "learning_rate": 4.863528656214014e-05, "loss": 0.8247, "step": 7395 }, { "epoch": 0.08, "learning_rate": 4.8634363835001625e-05, "loss": 0.8838, "step": 7400 }, { "epoch": 0.08, "learning_rate": 4.863344110786311e-05, "loss": 0.9126, "step": 7405 }, { "epoch": 0.08, "learning_rate": 4.86325183807246e-05, "loss": 0.8488, "step": 7410 }, { "epoch": 0.08, "learning_rate": 4.863159565358609e-05, "loss": 0.8305, "step": 7415 }, { "epoch": 0.08, "learning_rate": 4.863067292644758e-05, "loss": 0.9138, "step": 7420 }, { "epoch": 0.08, "learning_rate": 4.8629750199309065e-05, "loss": 0.8243, "step": 7425 }, { "epoch": 0.08, "learning_rate": 4.862882747217055e-05, "loss": 0.8719, "step": 7430 }, { "epoch": 0.08, "learning_rate": 4.862790474503204e-05, "loss": 0.8921, "step": 7435 }, { "epoch": 0.08, "learning_rate": 4.862698201789352e-05, "loss": 0.7919, "step": 7440 }, { "epoch": 0.08, "learning_rate": 4.8626059290755016e-05, "loss": 0.8612, "step": 7445 }, { "epoch": 0.08, "learning_rate": 4.8625136563616504e-05, "loss": 0.8456, "step": 7450 }, { "epoch": 0.08, "learning_rate": 4.862421383647799e-05, "loss": 0.8039, "step": 7455 }, { "epoch": 0.08, "learning_rate": 4.862329110933947e-05, "loss": 0.8794, "step": 7460 }, { "epoch": 0.08, "learning_rate": 4.862236838220097e-05, "loss": 0.8681, "step": 7465 }, { "epoch": 0.08, "learning_rate": 4.8621445655062455e-05, "loss": 0.8424, "step": 7470 }, { "epoch": 0.08, "learning_rate": 4.8620522927923936e-05, "loss": 0.8908, "step": 7475 }, { "epoch": 0.08, "learning_rate": 4.8619600200785424e-05, "loss": 0.9161, "step": 7480 }, { "epoch": 0.08, "learning_rate": 4.861867747364692e-05, "loss": 0.8703, "step": 7485 }, { "epoch": 0.08, "learning_rate": 4.861775474650841e-05, "loss": 0.8839, "step": 7490 }, { "epoch": 0.08, "learning_rate": 4.861683201936989e-05, "loss": 0.8909, "step": 7495 }, { "epoch": 0.08, "learning_rate": 4.8615909292231376e-05, "loss": 0.8592, "step": 7500 }, { "epoch": 0.08, "learning_rate": 4.861498656509287e-05, "loss": 0.8329, "step": 7505 }, { "epoch": 0.08, "learning_rate": 4.861406383795435e-05, "loss": 0.8665, "step": 7510 }, { "epoch": 0.08, "learning_rate": 4.861314111081584e-05, "loss": 0.8377, "step": 7515 }, { "epoch": 0.08, "learning_rate": 4.861221838367733e-05, "loss": 0.9308, "step": 7520 }, { "epoch": 0.08, "learning_rate": 4.8611295656538815e-05, "loss": 0.8839, "step": 7525 }, { "epoch": 0.08, "learning_rate": 4.86103729294003e-05, "loss": 0.8947, "step": 7530 }, { "epoch": 0.08, "learning_rate": 4.860945020226179e-05, "loss": 0.897, "step": 7535 }, { "epoch": 0.08, "learning_rate": 4.860852747512328e-05, "loss": 0.8913, "step": 7540 }, { "epoch": 0.08, "learning_rate": 4.8607604747984766e-05, "loss": 0.8096, "step": 7545 }, { "epoch": 0.08, "learning_rate": 4.8606682020846254e-05, "loss": 0.8968, "step": 7550 }, { "epoch": 0.08, "learning_rate": 4.860575929370774e-05, "loss": 0.8175, "step": 7555 }, { "epoch": 0.08, "learning_rate": 4.860483656656923e-05, "loss": 0.8559, "step": 7560 }, { "epoch": 0.08, "learning_rate": 4.860391383943072e-05, "loss": 0.8899, "step": 7565 }, { "epoch": 0.08, "learning_rate": 4.8602991112292205e-05, "loss": 0.8425, "step": 7570 }, { "epoch": 0.08, "learning_rate": 4.860206838515369e-05, "loss": 0.7898, "step": 7575 }, { "epoch": 0.08, "learning_rate": 4.860114565801518e-05, "loss": 0.8743, "step": 7580 }, { "epoch": 0.08, "learning_rate": 4.860022293087666e-05, "loss": 0.8821, "step": 7585 }, { "epoch": 0.08, "learning_rate": 4.859930020373816e-05, "loss": 0.8573, "step": 7590 }, { "epoch": 0.08, "learning_rate": 4.8598377476599645e-05, "loss": 0.86, "step": 7595 }, { "epoch": 0.08, "learning_rate": 4.859745474946113e-05, "loss": 0.8952, "step": 7600 }, { "epoch": 0.08, "learning_rate": 4.8596532022322614e-05, "loss": 0.8394, "step": 7605 }, { "epoch": 0.08, "learning_rate": 4.85956092951841e-05, "loss": 0.8656, "step": 7610 }, { "epoch": 0.08, "learning_rate": 4.8594686568045596e-05, "loss": 0.8805, "step": 7615 }, { "epoch": 0.08, "learning_rate": 4.859376384090708e-05, "loss": 0.8333, "step": 7620 }, { "epoch": 0.08, "learning_rate": 4.8592841113768565e-05, "loss": 0.8526, "step": 7625 }, { "epoch": 0.08, "learning_rate": 4.859191838663005e-05, "loss": 0.8777, "step": 7630 }, { "epoch": 0.08, "learning_rate": 4.859099565949155e-05, "loss": 0.8628, "step": 7635 }, { "epoch": 0.08, "learning_rate": 4.859007293235303e-05, "loss": 0.8457, "step": 7640 }, { "epoch": 0.08, "learning_rate": 4.8589150205214516e-05, "loss": 0.8377, "step": 7645 }, { "epoch": 0.08, "learning_rate": 4.8588227478076004e-05, "loss": 0.8381, "step": 7650 }, { "epoch": 0.08, "learning_rate": 4.85873047509375e-05, "loss": 0.9209, "step": 7655 }, { "epoch": 0.08, "learning_rate": 4.858638202379898e-05, "loss": 0.8585, "step": 7660 }, { "epoch": 0.08, "learning_rate": 4.858545929666047e-05, "loss": 0.8911, "step": 7665 }, { "epoch": 0.08, "learning_rate": 4.8584536569521956e-05, "loss": 0.9524, "step": 7670 }, { "epoch": 0.08, "learning_rate": 4.8583613842383444e-05, "loss": 0.9095, "step": 7675 }, { "epoch": 0.09, "learning_rate": 4.858269111524493e-05, "loss": 0.8723, "step": 7680 }, { "epoch": 0.09, "learning_rate": 4.858176838810642e-05, "loss": 0.8849, "step": 7685 }, { "epoch": 0.09, "learning_rate": 4.858084566096791e-05, "loss": 0.8568, "step": 7690 }, { "epoch": 0.09, "learning_rate": 4.857992293382939e-05, "loss": 0.83, "step": 7695 }, { "epoch": 0.09, "learning_rate": 4.857900020669088e-05, "loss": 0.8659, "step": 7700 }, { "epoch": 0.09, "learning_rate": 4.857807747955237e-05, "loss": 0.8316, "step": 7705 }, { "epoch": 0.09, "learning_rate": 4.857715475241386e-05, "loss": 0.8551, "step": 7710 }, { "epoch": 0.09, "learning_rate": 4.857623202527534e-05, "loss": 0.8474, "step": 7715 }, { "epoch": 0.09, "learning_rate": 4.8575309298136834e-05, "loss": 0.8201, "step": 7720 }, { "epoch": 0.09, "learning_rate": 4.857438657099832e-05, "loss": 0.9316, "step": 7725 }, { "epoch": 0.09, "learning_rate": 4.857346384385981e-05, "loss": 0.9019, "step": 7730 }, { "epoch": 0.09, "learning_rate": 4.857254111672129e-05, "loss": 0.8488, "step": 7735 }, { "epoch": 0.09, "learning_rate": 4.8571618389582786e-05, "loss": 0.8135, "step": 7740 }, { "epoch": 0.09, "learning_rate": 4.8570695662444273e-05, "loss": 0.9059, "step": 7745 }, { "epoch": 0.09, "learning_rate": 4.8569772935305754e-05, "loss": 0.8784, "step": 7750 }, { "epoch": 0.09, "learning_rate": 4.856885020816724e-05, "loss": 0.9275, "step": 7755 }, { "epoch": 0.09, "learning_rate": 4.856792748102873e-05, "loss": 0.8627, "step": 7760 }, { "epoch": 0.09, "learning_rate": 4.8567004753890225e-05, "loss": 0.9071, "step": 7765 }, { "epoch": 0.09, "learning_rate": 4.8566082026751706e-05, "loss": 0.8586, "step": 7770 }, { "epoch": 0.09, "learning_rate": 4.8565159299613194e-05, "loss": 0.874, "step": 7775 }, { "epoch": 0.09, "learning_rate": 4.856423657247468e-05, "loss": 0.84, "step": 7780 }, { "epoch": 0.09, "learning_rate": 4.856331384533617e-05, "loss": 0.8518, "step": 7785 }, { "epoch": 0.09, "learning_rate": 4.856239111819766e-05, "loss": 0.8733, "step": 7790 }, { "epoch": 0.09, "learning_rate": 4.8561468391059145e-05, "loss": 0.8746, "step": 7795 }, { "epoch": 0.09, "learning_rate": 4.856054566392063e-05, "loss": 0.8675, "step": 7800 }, { "epoch": 0.09, "learning_rate": 4.855962293678212e-05, "loss": 0.832, "step": 7805 }, { "epoch": 0.09, "learning_rate": 4.855870020964361e-05, "loss": 0.8761, "step": 7810 }, { "epoch": 0.09, "learning_rate": 4.8557777482505097e-05, "loss": 0.8582, "step": 7815 }, { "epoch": 0.09, "learning_rate": 4.8556854755366584e-05, "loss": 0.9458, "step": 7820 }, { "epoch": 0.09, "learning_rate": 4.8555932028228065e-05, "loss": 0.8704, "step": 7825 }, { "epoch": 0.09, "learning_rate": 4.855500930108956e-05, "loss": 0.823, "step": 7830 }, { "epoch": 0.09, "learning_rate": 4.855408657395105e-05, "loss": 0.91, "step": 7835 }, { "epoch": 0.09, "learning_rate": 4.8553163846812536e-05, "loss": 0.8403, "step": 7840 }, { "epoch": 0.09, "learning_rate": 4.855224111967402e-05, "loss": 0.9303, "step": 7845 }, { "epoch": 0.09, "learning_rate": 4.855131839253551e-05, "loss": 0.7529, "step": 7850 }, { "epoch": 0.09, "learning_rate": 4.8550395665397e-05, "loss": 0.831, "step": 7855 }, { "epoch": 0.09, "learning_rate": 4.854947293825848e-05, "loss": 0.8747, "step": 7860 }, { "epoch": 0.09, "learning_rate": 4.854855021111997e-05, "loss": 0.8344, "step": 7865 }, { "epoch": 0.09, "learning_rate": 4.854762748398146e-05, "loss": 0.8743, "step": 7870 }, { "epoch": 0.09, "learning_rate": 4.854670475684295e-05, "loss": 0.8735, "step": 7875 }, { "epoch": 0.09, "learning_rate": 4.854578202970443e-05, "loss": 0.8571, "step": 7880 }, { "epoch": 0.09, "learning_rate": 4.854485930256592e-05, "loss": 0.8625, "step": 7885 }, { "epoch": 0.09, "learning_rate": 4.8543936575427414e-05, "loss": 0.8236, "step": 7890 }, { "epoch": 0.09, "learning_rate": 4.8543013848288895e-05, "loss": 0.8653, "step": 7895 }, { "epoch": 0.09, "learning_rate": 4.854209112115038e-05, "loss": 0.9045, "step": 7900 }, { "epoch": 0.09, "learning_rate": 4.854116839401187e-05, "loss": 0.9092, "step": 7905 }, { "epoch": 0.09, "learning_rate": 4.854024566687336e-05, "loss": 0.9015, "step": 7910 }, { "epoch": 0.09, "learning_rate": 4.853932293973485e-05, "loss": 0.8397, "step": 7915 }, { "epoch": 0.09, "learning_rate": 4.8538400212596335e-05, "loss": 0.8784, "step": 7920 }, { "epoch": 0.09, "learning_rate": 4.853747748545782e-05, "loss": 0.8461, "step": 7925 }, { "epoch": 0.09, "learning_rate": 4.853655475831931e-05, "loss": 0.948, "step": 7930 }, { "epoch": 0.09, "learning_rate": 4.85356320311808e-05, "loss": 0.8208, "step": 7935 }, { "epoch": 0.09, "learning_rate": 4.8534709304042286e-05, "loss": 0.8602, "step": 7940 }, { "epoch": 0.09, "learning_rate": 4.8533786576903774e-05, "loss": 0.8743, "step": 7945 }, { "epoch": 0.09, "learning_rate": 4.853286384976526e-05, "loss": 0.8863, "step": 7950 }, { "epoch": 0.09, "learning_rate": 4.853194112262675e-05, "loss": 0.8462, "step": 7955 }, { "epoch": 0.09, "learning_rate": 4.853101839548824e-05, "loss": 0.8721, "step": 7960 }, { "epoch": 0.09, "learning_rate": 4.8530095668349725e-05, "loss": 0.8638, "step": 7965 }, { "epoch": 0.09, "learning_rate": 4.8529172941211206e-05, "loss": 0.8585, "step": 7970 }, { "epoch": 0.09, "learning_rate": 4.8528250214072694e-05, "loss": 0.871, "step": 7975 }, { "epoch": 0.09, "learning_rate": 4.852732748693419e-05, "loss": 0.958, "step": 7980 }, { "epoch": 0.09, "learning_rate": 4.8526404759795677e-05, "loss": 0.8879, "step": 7985 }, { "epoch": 0.09, "learning_rate": 4.852548203265716e-05, "loss": 0.8845, "step": 7990 }, { "epoch": 0.09, "learning_rate": 4.8524559305518646e-05, "loss": 0.8656, "step": 7995 }, { "epoch": 0.09, "learning_rate": 4.852363657838014e-05, "loss": 0.8738, "step": 8000 }, { "epoch": 0.09, "eval_loss": 0.8334123492240906, "eval_runtime": 69.8344, "eval_samples_per_second": 28.639, "eval_steps_per_second": 14.32, "step": 8000 }, { "epoch": 0.09, "learning_rate": 4.852271385124162e-05, "loss": 0.8191, "step": 8005 }, { "epoch": 0.09, "learning_rate": 4.852179112410311e-05, "loss": 0.8904, "step": 8010 }, { "epoch": 0.09, "learning_rate": 4.85208683969646e-05, "loss": 0.8757, "step": 8015 }, { "epoch": 0.09, "learning_rate": 4.851994566982609e-05, "loss": 0.843, "step": 8020 }, { "epoch": 0.09, "learning_rate": 4.851902294268757e-05, "loss": 0.8642, "step": 8025 }, { "epoch": 0.09, "learning_rate": 4.851810021554906e-05, "loss": 0.8341, "step": 8030 }, { "epoch": 0.09, "learning_rate": 4.851717748841055e-05, "loss": 0.8043, "step": 8035 }, { "epoch": 0.09, "learning_rate": 4.851625476127204e-05, "loss": 0.8113, "step": 8040 }, { "epoch": 0.09, "learning_rate": 4.8515332034133524e-05, "loss": 0.8471, "step": 8045 }, { "epoch": 0.09, "learning_rate": 4.851440930699501e-05, "loss": 0.8748, "step": 8050 }, { "epoch": 0.09, "learning_rate": 4.85134865798565e-05, "loss": 0.8995, "step": 8055 }, { "epoch": 0.09, "learning_rate": 4.851256385271799e-05, "loss": 0.8625, "step": 8060 }, { "epoch": 0.09, "learning_rate": 4.8511641125579475e-05, "loss": 0.9011, "step": 8065 }, { "epoch": 0.09, "learning_rate": 4.851071839844096e-05, "loss": 0.8737, "step": 8070 }, { "epoch": 0.09, "learning_rate": 4.850979567130245e-05, "loss": 0.835, "step": 8075 }, { "epoch": 0.09, "learning_rate": 4.850887294416394e-05, "loss": 0.8703, "step": 8080 }, { "epoch": 0.09, "learning_rate": 4.850795021702543e-05, "loss": 0.8075, "step": 8085 }, { "epoch": 0.09, "learning_rate": 4.8507027489886915e-05, "loss": 0.8919, "step": 8090 }, { "epoch": 0.09, "learning_rate": 4.85061047627484e-05, "loss": 0.9024, "step": 8095 }, { "epoch": 0.09, "learning_rate": 4.8505182035609884e-05, "loss": 0.8001, "step": 8100 }, { "epoch": 0.09, "learning_rate": 4.850425930847138e-05, "loss": 0.8429, "step": 8105 }, { "epoch": 0.09, "learning_rate": 4.8503336581332866e-05, "loss": 0.8585, "step": 8110 }, { "epoch": 0.09, "learning_rate": 4.8502413854194354e-05, "loss": 0.8088, "step": 8115 }, { "epoch": 0.09, "learning_rate": 4.8501491127055835e-05, "loss": 0.9076, "step": 8120 }, { "epoch": 0.09, "learning_rate": 4.850056839991732e-05, "loss": 0.8696, "step": 8125 }, { "epoch": 0.09, "learning_rate": 4.849964567277882e-05, "loss": 0.8617, "step": 8130 }, { "epoch": 0.09, "learning_rate": 4.84987229456403e-05, "loss": 0.858, "step": 8135 }, { "epoch": 0.09, "learning_rate": 4.8497800218501786e-05, "loss": 0.9107, "step": 8140 }, { "epoch": 0.09, "learning_rate": 4.8496877491363274e-05, "loss": 0.897, "step": 8145 }, { "epoch": 0.09, "learning_rate": 4.849595476422477e-05, "loss": 0.8126, "step": 8150 }, { "epoch": 0.09, "learning_rate": 4.849503203708625e-05, "loss": 0.8633, "step": 8155 }, { "epoch": 0.09, "learning_rate": 4.849410930994774e-05, "loss": 0.8927, "step": 8160 }, { "epoch": 0.09, "learning_rate": 4.8493186582809226e-05, "loss": 0.8459, "step": 8165 }, { "epoch": 0.09, "learning_rate": 4.8492263855670713e-05, "loss": 0.8605, "step": 8170 }, { "epoch": 0.09, "learning_rate": 4.84913411285322e-05, "loss": 0.8453, "step": 8175 }, { "epoch": 0.09, "learning_rate": 4.849041840139369e-05, "loss": 0.8972, "step": 8180 }, { "epoch": 0.09, "learning_rate": 4.848949567425518e-05, "loss": 0.8192, "step": 8185 }, { "epoch": 0.09, "learning_rate": 4.8488572947116665e-05, "loss": 0.8777, "step": 8190 }, { "epoch": 0.09, "learning_rate": 4.848765021997815e-05, "loss": 0.8852, "step": 8195 }, { "epoch": 0.09, "learning_rate": 4.848672749283964e-05, "loss": 0.8524, "step": 8200 }, { "epoch": 0.09, "learning_rate": 4.848580476570113e-05, "loss": 0.902, "step": 8205 }, { "epoch": 0.09, "learning_rate": 4.848488203856261e-05, "loss": 0.9132, "step": 8210 }, { "epoch": 0.09, "learning_rate": 4.8483959311424104e-05, "loss": 0.8649, "step": 8215 }, { "epoch": 0.09, "learning_rate": 4.848303658428559e-05, "loss": 0.8598, "step": 8220 }, { "epoch": 0.09, "learning_rate": 4.848211385714708e-05, "loss": 0.8778, "step": 8225 }, { "epoch": 0.09, "learning_rate": 4.848119113000856e-05, "loss": 0.8623, "step": 8230 }, { "epoch": 0.09, "learning_rate": 4.8480268402870055e-05, "loss": 0.8694, "step": 8235 }, { "epoch": 0.09, "learning_rate": 4.847934567573154e-05, "loss": 0.8534, "step": 8240 }, { "epoch": 0.09, "learning_rate": 4.8478422948593024e-05, "loss": 0.8404, "step": 8245 }, { "epoch": 0.09, "learning_rate": 4.847750022145451e-05, "loss": 0.7846, "step": 8250 }, { "epoch": 0.09, "learning_rate": 4.847657749431601e-05, "loss": 0.8682, "step": 8255 }, { "epoch": 0.09, "learning_rate": 4.8475654767177495e-05, "loss": 0.8671, "step": 8260 }, { "epoch": 0.09, "learning_rate": 4.8474732040038976e-05, "loss": 0.9045, "step": 8265 }, { "epoch": 0.09, "learning_rate": 4.8473809312900464e-05, "loss": 0.882, "step": 8270 }, { "epoch": 0.09, "learning_rate": 4.847288658576195e-05, "loss": 0.8825, "step": 8275 }, { "epoch": 0.09, "learning_rate": 4.847196385862344e-05, "loss": 0.812, "step": 8280 }, { "epoch": 0.09, "learning_rate": 4.847104113148493e-05, "loss": 0.8437, "step": 8285 }, { "epoch": 0.09, "learning_rate": 4.8470118404346415e-05, "loss": 0.8196, "step": 8290 }, { "epoch": 0.09, "learning_rate": 4.84691956772079e-05, "loss": 0.8268, "step": 8295 }, { "epoch": 0.09, "learning_rate": 4.846827295006939e-05, "loss": 0.8331, "step": 8300 }, { "epoch": 0.09, "learning_rate": 4.846735022293088e-05, "loss": 0.8437, "step": 8305 }, { "epoch": 0.09, "learning_rate": 4.8466427495792366e-05, "loss": 0.8544, "step": 8310 }, { "epoch": 0.09, "learning_rate": 4.8465504768653854e-05, "loss": 0.9288, "step": 8315 }, { "epoch": 0.09, "learning_rate": 4.846458204151534e-05, "loss": 0.8895, "step": 8320 }, { "epoch": 0.09, "learning_rate": 4.846365931437683e-05, "loss": 0.885, "step": 8325 }, { "epoch": 0.09, "learning_rate": 4.846273658723832e-05, "loss": 0.8196, "step": 8330 }, { "epoch": 0.09, "learning_rate": 4.8461813860099806e-05, "loss": 0.8466, "step": 8335 }, { "epoch": 0.09, "learning_rate": 4.8460891132961294e-05, "loss": 0.8737, "step": 8340 }, { "epoch": 0.09, "learning_rate": 4.845996840582278e-05, "loss": 0.9115, "step": 8345 }, { "epoch": 0.09, "learning_rate": 4.845904567868427e-05, "loss": 0.9199, "step": 8350 }, { "epoch": 0.09, "learning_rate": 4.845812295154575e-05, "loss": 0.8946, "step": 8355 }, { "epoch": 0.09, "learning_rate": 4.845720022440724e-05, "loss": 0.8981, "step": 8360 }, { "epoch": 0.09, "learning_rate": 4.845627749726873e-05, "loss": 0.8363, "step": 8365 }, { "epoch": 0.09, "learning_rate": 4.845535477013022e-05, "loss": 0.9024, "step": 8370 }, { "epoch": 0.09, "learning_rate": 4.84544320429917e-05, "loss": 0.9234, "step": 8375 }, { "epoch": 0.09, "learning_rate": 4.845350931585319e-05, "loss": 0.9124, "step": 8380 }, { "epoch": 0.09, "learning_rate": 4.8452586588714684e-05, "loss": 0.844, "step": 8385 }, { "epoch": 0.09, "learning_rate": 4.845166386157617e-05, "loss": 0.9083, "step": 8390 }, { "epoch": 0.09, "learning_rate": 4.845074113443765e-05, "loss": 0.8187, "step": 8395 }, { "epoch": 0.09, "learning_rate": 4.844981840729914e-05, "loss": 0.8343, "step": 8400 }, { "epoch": 0.09, "learning_rate": 4.8448895680160636e-05, "loss": 0.8264, "step": 8405 }, { "epoch": 0.09, "learning_rate": 4.844797295302212e-05, "loss": 0.8432, "step": 8410 }, { "epoch": 0.09, "learning_rate": 4.8447050225883604e-05, "loss": 0.9513, "step": 8415 }, { "epoch": 0.09, "learning_rate": 4.844612749874509e-05, "loss": 0.8037, "step": 8420 }, { "epoch": 0.09, "learning_rate": 4.844520477160659e-05, "loss": 0.8372, "step": 8425 }, { "epoch": 0.09, "learning_rate": 4.844428204446807e-05, "loss": 0.888, "step": 8430 }, { "epoch": 0.09, "learning_rate": 4.8443359317329556e-05, "loss": 0.8206, "step": 8435 }, { "epoch": 0.09, "learning_rate": 4.8442436590191044e-05, "loss": 0.8487, "step": 8440 }, { "epoch": 0.09, "learning_rate": 4.844151386305253e-05, "loss": 0.8767, "step": 8445 }, { "epoch": 0.09, "learning_rate": 4.844059113591402e-05, "loss": 0.9158, "step": 8450 }, { "epoch": 0.09, "learning_rate": 4.843966840877551e-05, "loss": 0.812, "step": 8455 }, { "epoch": 0.09, "learning_rate": 4.8438745681636995e-05, "loss": 0.9699, "step": 8460 }, { "epoch": 0.09, "learning_rate": 4.843782295449848e-05, "loss": 0.8418, "step": 8465 }, { "epoch": 0.09, "learning_rate": 4.843690022735997e-05, "loss": 0.8719, "step": 8470 }, { "epoch": 0.09, "learning_rate": 4.843597750022146e-05, "loss": 0.9053, "step": 8475 }, { "epoch": 0.09, "learning_rate": 4.8435054773082947e-05, "loss": 0.8709, "step": 8480 }, { "epoch": 0.09, "learning_rate": 4.843413204594443e-05, "loss": 0.8797, "step": 8485 }, { "epoch": 0.09, "learning_rate": 4.843320931880592e-05, "loss": 0.8004, "step": 8490 }, { "epoch": 0.09, "learning_rate": 4.843228659166741e-05, "loss": 0.8489, "step": 8495 }, { "epoch": 0.09, "learning_rate": 4.84313638645289e-05, "loss": 0.8317, "step": 8500 }, { "epoch": 0.09, "learning_rate": 4.843044113739038e-05, "loss": 0.9145, "step": 8505 }, { "epoch": 0.09, "learning_rate": 4.842951841025187e-05, "loss": 0.8867, "step": 8510 }, { "epoch": 0.09, "learning_rate": 4.842859568311336e-05, "loss": 0.8283, "step": 8515 }, { "epoch": 0.09, "learning_rate": 4.842767295597484e-05, "loss": 0.8911, "step": 8520 }, { "epoch": 0.09, "learning_rate": 4.842675022883633e-05, "loss": 0.9225, "step": 8525 }, { "epoch": 0.09, "learning_rate": 4.842582750169782e-05, "loss": 0.8493, "step": 8530 }, { "epoch": 0.09, "learning_rate": 4.842490477455931e-05, "loss": 0.9013, "step": 8535 }, { "epoch": 0.09, "learning_rate": 4.8423982047420794e-05, "loss": 0.8225, "step": 8540 }, { "epoch": 0.09, "learning_rate": 4.842305932028228e-05, "loss": 0.8434, "step": 8545 }, { "epoch": 0.09, "learning_rate": 4.842213659314377e-05, "loss": 0.826, "step": 8550 }, { "epoch": 0.09, "learning_rate": 4.842121386600526e-05, "loss": 0.857, "step": 8555 }, { "epoch": 0.09, "learning_rate": 4.8420291138866745e-05, "loss": 0.9195, "step": 8560 }, { "epoch": 0.09, "learning_rate": 4.841936841172823e-05, "loss": 0.8268, "step": 8565 }, { "epoch": 0.09, "learning_rate": 4.841844568458972e-05, "loss": 0.8991, "step": 8570 }, { "epoch": 0.09, "learning_rate": 4.841752295745121e-05, "loss": 0.8367, "step": 8575 }, { "epoch": 0.1, "learning_rate": 4.84166002303127e-05, "loss": 0.8579, "step": 8580 }, { "epoch": 0.1, "learning_rate": 4.8415677503174185e-05, "loss": 0.8514, "step": 8585 }, { "epoch": 0.1, "learning_rate": 4.841475477603567e-05, "loss": 0.8437, "step": 8590 }, { "epoch": 0.1, "learning_rate": 4.8413832048897153e-05, "loss": 0.9021, "step": 8595 }, { "epoch": 0.1, "learning_rate": 4.841290932175865e-05, "loss": 0.839, "step": 8600 }, { "epoch": 0.1, "learning_rate": 4.8411986594620136e-05, "loss": 0.8698, "step": 8605 }, { "epoch": 0.1, "learning_rate": 4.8411063867481624e-05, "loss": 0.8219, "step": 8610 }, { "epoch": 0.1, "learning_rate": 4.8410141140343105e-05, "loss": 0.8937, "step": 8615 }, { "epoch": 0.1, "learning_rate": 4.84092184132046e-05, "loss": 0.925, "step": 8620 }, { "epoch": 0.1, "learning_rate": 4.840829568606609e-05, "loss": 0.8324, "step": 8625 }, { "epoch": 0.1, "learning_rate": 4.840737295892757e-05, "loss": 0.9647, "step": 8630 }, { "epoch": 0.1, "learning_rate": 4.8406450231789056e-05, "loss": 0.8739, "step": 8635 }, { "epoch": 0.1, "learning_rate": 4.840552750465055e-05, "loss": 0.9016, "step": 8640 }, { "epoch": 0.1, "learning_rate": 4.840460477751204e-05, "loss": 0.8966, "step": 8645 }, { "epoch": 0.1, "learning_rate": 4.840368205037352e-05, "loss": 0.8674, "step": 8650 }, { "epoch": 0.1, "learning_rate": 4.840275932323501e-05, "loss": 0.8649, "step": 8655 }, { "epoch": 0.1, "learning_rate": 4.8401836596096496e-05, "loss": 0.8259, "step": 8660 }, { "epoch": 0.1, "learning_rate": 4.840091386895798e-05, "loss": 0.8792, "step": 8665 }, { "epoch": 0.1, "learning_rate": 4.839999114181947e-05, "loss": 0.8983, "step": 8670 }, { "epoch": 0.1, "learning_rate": 4.839906841468096e-05, "loss": 0.8926, "step": 8675 }, { "epoch": 0.1, "learning_rate": 4.839814568754245e-05, "loss": 0.8807, "step": 8680 }, { "epoch": 0.1, "learning_rate": 4.8397222960403935e-05, "loss": 0.8693, "step": 8685 }, { "epoch": 0.1, "learning_rate": 4.839630023326542e-05, "loss": 0.8892, "step": 8690 }, { "epoch": 0.1, "learning_rate": 4.839537750612691e-05, "loss": 0.9199, "step": 8695 }, { "epoch": 0.1, "learning_rate": 4.83944547789884e-05, "loss": 0.8627, "step": 8700 }, { "epoch": 0.1, "learning_rate": 4.8393532051849886e-05, "loss": 0.8705, "step": 8705 }, { "epoch": 0.1, "learning_rate": 4.8392609324711374e-05, "loss": 0.8448, "step": 8710 }, { "epoch": 0.1, "learning_rate": 4.839168659757286e-05, "loss": 0.9701, "step": 8715 }, { "epoch": 0.1, "learning_rate": 4.839076387043435e-05, "loss": 0.9222, "step": 8720 }, { "epoch": 0.1, "learning_rate": 4.838984114329584e-05, "loss": 0.8786, "step": 8725 }, { "epoch": 0.1, "learning_rate": 4.8388918416157325e-05, "loss": 0.817, "step": 8730 }, { "epoch": 0.1, "learning_rate": 4.838799568901881e-05, "loss": 0.7931, "step": 8735 }, { "epoch": 0.1, "learning_rate": 4.8387072961880294e-05, "loss": 0.9306, "step": 8740 }, { "epoch": 0.1, "learning_rate": 4.838615023474178e-05, "loss": 0.8711, "step": 8745 }, { "epoch": 0.1, "learning_rate": 4.838522750760328e-05, "loss": 0.8742, "step": 8750 }, { "epoch": 0.1, "learning_rate": 4.8384304780464765e-05, "loss": 0.8671, "step": 8755 }, { "epoch": 0.1, "learning_rate": 4.8383382053326246e-05, "loss": 0.929, "step": 8760 }, { "epoch": 0.1, "learning_rate": 4.8382459326187734e-05, "loss": 0.8516, "step": 8765 }, { "epoch": 0.1, "learning_rate": 4.838153659904923e-05, "loss": 0.895, "step": 8770 }, { "epoch": 0.1, "learning_rate": 4.8380613871910716e-05, "loss": 0.8361, "step": 8775 }, { "epoch": 0.1, "learning_rate": 4.83796911447722e-05, "loss": 0.8573, "step": 8780 }, { "epoch": 0.1, "learning_rate": 4.8378768417633685e-05, "loss": 0.8862, "step": 8785 }, { "epoch": 0.1, "learning_rate": 4.837784569049518e-05, "loss": 0.8908, "step": 8790 }, { "epoch": 0.1, "learning_rate": 4.837692296335666e-05, "loss": 0.8306, "step": 8795 }, { "epoch": 0.1, "learning_rate": 4.837600023621815e-05, "loss": 0.8247, "step": 8800 }, { "epoch": 0.1, "learning_rate": 4.8375077509079636e-05, "loss": 0.8713, "step": 8805 }, { "epoch": 0.1, "learning_rate": 4.8374154781941124e-05, "loss": 0.891, "step": 8810 }, { "epoch": 0.1, "learning_rate": 4.837323205480261e-05, "loss": 0.8303, "step": 8815 }, { "epoch": 0.1, "learning_rate": 4.83723093276641e-05, "loss": 0.852, "step": 8820 }, { "epoch": 0.1, "learning_rate": 4.837138660052559e-05, "loss": 0.8651, "step": 8825 }, { "epoch": 0.1, "learning_rate": 4.8370463873387076e-05, "loss": 0.9162, "step": 8830 }, { "epoch": 0.1, "learning_rate": 4.8369541146248563e-05, "loss": 0.8528, "step": 8835 }, { "epoch": 0.1, "learning_rate": 4.836861841911005e-05, "loss": 0.8169, "step": 8840 }, { "epoch": 0.1, "learning_rate": 4.836769569197154e-05, "loss": 0.8231, "step": 8845 }, { "epoch": 0.1, "learning_rate": 4.836677296483303e-05, "loss": 0.8882, "step": 8850 }, { "epoch": 0.1, "learning_rate": 4.8365850237694515e-05, "loss": 0.8269, "step": 8855 }, { "epoch": 0.1, "learning_rate": 4.8364927510556e-05, "loss": 0.8865, "step": 8860 }, { "epoch": 0.1, "learning_rate": 4.836400478341749e-05, "loss": 0.8089, "step": 8865 }, { "epoch": 0.1, "learning_rate": 4.836308205627897e-05, "loss": 0.8636, "step": 8870 }, { "epoch": 0.1, "learning_rate": 4.8362159329140466e-05, "loss": 0.8974, "step": 8875 }, { "epoch": 0.1, "learning_rate": 4.8361236602001954e-05, "loss": 0.8903, "step": 8880 }, { "epoch": 0.1, "learning_rate": 4.836031387486344e-05, "loss": 0.8577, "step": 8885 }, { "epoch": 0.1, "learning_rate": 4.835939114772492e-05, "loss": 0.9526, "step": 8890 }, { "epoch": 0.1, "learning_rate": 4.835846842058641e-05, "loss": 0.8116, "step": 8895 }, { "epoch": 0.1, "learning_rate": 4.8357545693447905e-05, "loss": 0.8266, "step": 8900 }, { "epoch": 0.1, "learning_rate": 4.8356622966309387e-05, "loss": 0.8515, "step": 8905 }, { "epoch": 0.1, "learning_rate": 4.8355700239170874e-05, "loss": 0.9028, "step": 8910 }, { "epoch": 0.1, "learning_rate": 4.835477751203236e-05, "loss": 0.8411, "step": 8915 }, { "epoch": 0.1, "learning_rate": 4.835385478489386e-05, "loss": 0.8773, "step": 8920 }, { "epoch": 0.1, "learning_rate": 4.835293205775534e-05, "loss": 0.9081, "step": 8925 }, { "epoch": 0.1, "learning_rate": 4.8352009330616826e-05, "loss": 0.8599, "step": 8930 }, { "epoch": 0.1, "learning_rate": 4.8351086603478314e-05, "loss": 0.924, "step": 8935 }, { "epoch": 0.1, "learning_rate": 4.83501638763398e-05, "loss": 0.8986, "step": 8940 }, { "epoch": 0.1, "learning_rate": 4.834924114920129e-05, "loss": 0.7997, "step": 8945 }, { "epoch": 0.1, "learning_rate": 4.834831842206278e-05, "loss": 0.8387, "step": 8950 }, { "epoch": 0.1, "learning_rate": 4.8347395694924265e-05, "loss": 0.8899, "step": 8955 }, { "epoch": 0.1, "learning_rate": 4.834647296778575e-05, "loss": 0.8444, "step": 8960 }, { "epoch": 0.1, "learning_rate": 4.834555024064724e-05, "loss": 0.9105, "step": 8965 }, { "epoch": 0.1, "learning_rate": 4.834462751350873e-05, "loss": 0.8331, "step": 8970 }, { "epoch": 0.1, "learning_rate": 4.8343704786370216e-05, "loss": 0.7952, "step": 8975 }, { "epoch": 0.1, "learning_rate": 4.83427820592317e-05, "loss": 0.8484, "step": 8980 }, { "epoch": 0.1, "learning_rate": 4.834185933209319e-05, "loss": 0.8063, "step": 8985 }, { "epoch": 0.1, "learning_rate": 4.834093660495468e-05, "loss": 0.8419, "step": 8990 }, { "epoch": 0.1, "learning_rate": 4.834001387781617e-05, "loss": 0.8435, "step": 8995 }, { "epoch": 0.1, "learning_rate": 4.833909115067765e-05, "loss": 0.8638, "step": 9000 }, { "epoch": 0.1, "eval_loss": 0.8236207962036133, "eval_runtime": 69.9496, "eval_samples_per_second": 28.592, "eval_steps_per_second": 14.296, "step": 9000 }, { "epoch": 0.1, "learning_rate": 4.8338168423539144e-05, "loss": 0.7816, "step": 9005 }, { "epoch": 0.1, "learning_rate": 4.833724569640063e-05, "loss": 0.8489, "step": 9010 }, { "epoch": 0.1, "learning_rate": 4.833632296926211e-05, "loss": 0.8396, "step": 9015 }, { "epoch": 0.1, "learning_rate": 4.83354002421236e-05, "loss": 0.8226, "step": 9020 }, { "epoch": 0.1, "learning_rate": 4.8334477514985095e-05, "loss": 0.8374, "step": 9025 }, { "epoch": 0.1, "learning_rate": 4.833355478784658e-05, "loss": 0.8984, "step": 9030 }, { "epoch": 0.1, "learning_rate": 4.8332632060708064e-05, "loss": 0.8808, "step": 9035 }, { "epoch": 0.1, "learning_rate": 4.833170933356955e-05, "loss": 0.8815, "step": 9040 }, { "epoch": 0.1, "learning_rate": 4.833078660643104e-05, "loss": 0.8629, "step": 9045 }, { "epoch": 0.1, "learning_rate": 4.832986387929253e-05, "loss": 0.8135, "step": 9050 }, { "epoch": 0.1, "learning_rate": 4.8328941152154015e-05, "loss": 0.8801, "step": 9055 }, { "epoch": 0.1, "learning_rate": 4.83280184250155e-05, "loss": 0.8583, "step": 9060 }, { "epoch": 0.1, "learning_rate": 4.832709569787699e-05, "loss": 0.8904, "step": 9065 }, { "epoch": 0.1, "learning_rate": 4.832617297073848e-05, "loss": 0.8349, "step": 9070 }, { "epoch": 0.1, "learning_rate": 4.832525024359997e-05, "loss": 0.849, "step": 9075 }, { "epoch": 0.1, "learning_rate": 4.8324327516461454e-05, "loss": 0.8212, "step": 9080 }, { "epoch": 0.1, "learning_rate": 4.832340478932294e-05, "loss": 0.8368, "step": 9085 }, { "epoch": 0.1, "learning_rate": 4.832248206218443e-05, "loss": 0.869, "step": 9090 }, { "epoch": 0.1, "learning_rate": 4.832155933504592e-05, "loss": 0.8789, "step": 9095 }, { "epoch": 0.1, "learning_rate": 4.8320636607907406e-05, "loss": 0.8643, "step": 9100 }, { "epoch": 0.1, "learning_rate": 4.8319713880768894e-05, "loss": 0.8217, "step": 9105 }, { "epoch": 0.1, "learning_rate": 4.8318791153630375e-05, "loss": 0.8857, "step": 9110 }, { "epoch": 0.1, "learning_rate": 4.831786842649187e-05, "loss": 0.8236, "step": 9115 }, { "epoch": 0.1, "learning_rate": 4.831694569935336e-05, "loss": 0.9136, "step": 9120 }, { "epoch": 0.1, "learning_rate": 4.831602297221484e-05, "loss": 0.8349, "step": 9125 }, { "epoch": 0.1, "learning_rate": 4.8315100245076326e-05, "loss": 0.8581, "step": 9130 }, { "epoch": 0.1, "learning_rate": 4.831417751793782e-05, "loss": 0.8472, "step": 9135 }, { "epoch": 0.1, "learning_rate": 4.831325479079931e-05, "loss": 0.8136, "step": 9140 }, { "epoch": 0.1, "learning_rate": 4.831233206366079e-05, "loss": 0.7901, "step": 9145 }, { "epoch": 0.1, "learning_rate": 4.831140933652228e-05, "loss": 0.8526, "step": 9150 }, { "epoch": 0.1, "learning_rate": 4.831048660938377e-05, "loss": 0.836, "step": 9155 }, { "epoch": 0.1, "learning_rate": 4.830956388224526e-05, "loss": 0.9097, "step": 9160 }, { "epoch": 0.1, "learning_rate": 4.830864115510674e-05, "loss": 0.9117, "step": 9165 }, { "epoch": 0.1, "learning_rate": 4.830771842796823e-05, "loss": 0.8428, "step": 9170 }, { "epoch": 0.1, "learning_rate": 4.8306795700829724e-05, "loss": 0.8866, "step": 9175 }, { "epoch": 0.1, "learning_rate": 4.8305872973691205e-05, "loss": 0.8505, "step": 9180 }, { "epoch": 0.1, "learning_rate": 4.830495024655269e-05, "loss": 0.8205, "step": 9185 }, { "epoch": 0.1, "learning_rate": 4.830402751941418e-05, "loss": 0.9704, "step": 9190 }, { "epoch": 0.1, "learning_rate": 4.830310479227567e-05, "loss": 0.8031, "step": 9195 }, { "epoch": 0.1, "learning_rate": 4.8302182065137156e-05, "loss": 0.8188, "step": 9200 }, { "epoch": 0.1, "learning_rate": 4.8301259337998644e-05, "loss": 0.8265, "step": 9205 }, { "epoch": 0.1, "learning_rate": 4.830033661086013e-05, "loss": 0.9357, "step": 9210 }, { "epoch": 0.1, "learning_rate": 4.829941388372162e-05, "loss": 0.7814, "step": 9215 }, { "epoch": 0.1, "learning_rate": 4.829849115658311e-05, "loss": 0.8102, "step": 9220 }, { "epoch": 0.1, "learning_rate": 4.8297568429444595e-05, "loss": 0.8742, "step": 9225 }, { "epoch": 0.1, "learning_rate": 4.829664570230608e-05, "loss": 0.8286, "step": 9230 }, { "epoch": 0.1, "learning_rate": 4.829572297516757e-05, "loss": 0.8055, "step": 9235 }, { "epoch": 0.1, "learning_rate": 4.829480024802906e-05, "loss": 0.8257, "step": 9240 }, { "epoch": 0.1, "learning_rate": 4.829387752089055e-05, "loss": 0.8212, "step": 9245 }, { "epoch": 0.1, "learning_rate": 4.8292954793752035e-05, "loss": 0.8558, "step": 9250 }, { "epoch": 0.1, "learning_rate": 4.8292032066613516e-05, "loss": 0.8911, "step": 9255 }, { "epoch": 0.1, "learning_rate": 4.8291109339475003e-05, "loss": 0.8382, "step": 9260 }, { "epoch": 0.1, "learning_rate": 4.82901866123365e-05, "loss": 0.8958, "step": 9265 }, { "epoch": 0.1, "learning_rate": 4.8289263885197986e-05, "loss": 0.8163, "step": 9270 }, { "epoch": 0.1, "learning_rate": 4.828834115805947e-05, "loss": 0.8649, "step": 9275 }, { "epoch": 0.1, "learning_rate": 4.8287418430920955e-05, "loss": 0.7381, "step": 9280 }, { "epoch": 0.1, "learning_rate": 4.828649570378245e-05, "loss": 0.8389, "step": 9285 }, { "epoch": 0.1, "learning_rate": 4.828557297664393e-05, "loss": 0.8522, "step": 9290 }, { "epoch": 0.1, "learning_rate": 4.828465024950542e-05, "loss": 0.8227, "step": 9295 }, { "epoch": 0.1, "learning_rate": 4.8283727522366906e-05, "loss": 0.8077, "step": 9300 }, { "epoch": 0.1, "learning_rate": 4.82828047952284e-05, "loss": 0.8638, "step": 9305 }, { "epoch": 0.1, "learning_rate": 4.828188206808988e-05, "loss": 0.7982, "step": 9310 }, { "epoch": 0.1, "learning_rate": 4.828095934095137e-05, "loss": 0.9315, "step": 9315 }, { "epoch": 0.1, "learning_rate": 4.828003661381286e-05, "loss": 0.8895, "step": 9320 }, { "epoch": 0.1, "learning_rate": 4.8279113886674346e-05, "loss": 0.8372, "step": 9325 }, { "epoch": 0.1, "learning_rate": 4.827819115953583e-05, "loss": 0.8683, "step": 9330 }, { "epoch": 0.1, "learning_rate": 4.827726843239732e-05, "loss": 0.8614, "step": 9335 }, { "epoch": 0.1, "learning_rate": 4.827634570525881e-05, "loss": 0.8758, "step": 9340 }, { "epoch": 0.1, "learning_rate": 4.82754229781203e-05, "loss": 0.858, "step": 9345 }, { "epoch": 0.1, "learning_rate": 4.8274500250981785e-05, "loss": 0.8397, "step": 9350 }, { "epoch": 0.1, "learning_rate": 4.827357752384327e-05, "loss": 0.8441, "step": 9355 }, { "epoch": 0.1, "learning_rate": 4.827265479670476e-05, "loss": 0.826, "step": 9360 }, { "epoch": 0.1, "learning_rate": 4.827173206956624e-05, "loss": 0.8497, "step": 9365 }, { "epoch": 0.1, "learning_rate": 4.8270809342427736e-05, "loss": 0.9176, "step": 9370 }, { "epoch": 0.1, "learning_rate": 4.8269886615289224e-05, "loss": 0.8906, "step": 9375 }, { "epoch": 0.1, "learning_rate": 4.826896388815071e-05, "loss": 0.8601, "step": 9380 }, { "epoch": 0.1, "learning_rate": 4.826804116101219e-05, "loss": 0.8532, "step": 9385 }, { "epoch": 0.1, "learning_rate": 4.826711843387369e-05, "loss": 0.8482, "step": 9390 }, { "epoch": 0.1, "learning_rate": 4.8266195706735175e-05, "loss": 0.8643, "step": 9395 }, { "epoch": 0.1, "learning_rate": 4.8265272979596656e-05, "loss": 0.9241, "step": 9400 }, { "epoch": 0.1, "learning_rate": 4.8264350252458144e-05, "loss": 0.7951, "step": 9405 }, { "epoch": 0.1, "learning_rate": 4.826342752531964e-05, "loss": 0.8206, "step": 9410 }, { "epoch": 0.1, "learning_rate": 4.826250479818113e-05, "loss": 0.902, "step": 9415 }, { "epoch": 0.1, "learning_rate": 4.826158207104261e-05, "loss": 0.8012, "step": 9420 }, { "epoch": 0.1, "learning_rate": 4.8260659343904096e-05, "loss": 0.9244, "step": 9425 }, { "epoch": 0.1, "learning_rate": 4.8259736616765584e-05, "loss": 0.886, "step": 9430 }, { "epoch": 0.1, "learning_rate": 4.825881388962707e-05, "loss": 0.8627, "step": 9435 }, { "epoch": 0.1, "learning_rate": 4.825789116248856e-05, "loss": 0.9017, "step": 9440 }, { "epoch": 0.1, "learning_rate": 4.825696843535005e-05, "loss": 0.8034, "step": 9445 }, { "epoch": 0.1, "learning_rate": 4.8256045708211535e-05, "loss": 0.8312, "step": 9450 }, { "epoch": 0.1, "learning_rate": 4.825512298107302e-05, "loss": 0.8269, "step": 9455 }, { "epoch": 0.1, "learning_rate": 4.825420025393451e-05, "loss": 0.8662, "step": 9460 }, { "epoch": 0.1, "learning_rate": 4.8253277526796e-05, "loss": 0.8902, "step": 9465 }, { "epoch": 0.1, "learning_rate": 4.8252354799657486e-05, "loss": 0.8561, "step": 9470 }, { "epoch": 0.1, "learning_rate": 4.8251432072518974e-05, "loss": 0.8647, "step": 9475 }, { "epoch": 0.1, "learning_rate": 4.825050934538046e-05, "loss": 0.8142, "step": 9480 }, { "epoch": 0.11, "learning_rate": 4.824958661824195e-05, "loss": 0.8588, "step": 9485 }, { "epoch": 0.11, "learning_rate": 4.824866389110344e-05, "loss": 0.8547, "step": 9490 }, { "epoch": 0.11, "learning_rate": 4.824774116396492e-05, "loss": 0.8398, "step": 9495 }, { "epoch": 0.11, "learning_rate": 4.8246818436826413e-05, "loss": 0.8727, "step": 9500 }, { "epoch": 0.11, "learning_rate": 4.82458957096879e-05, "loss": 0.8655, "step": 9505 }, { "epoch": 0.11, "learning_rate": 4.824497298254938e-05, "loss": 0.8233, "step": 9510 }, { "epoch": 0.11, "learning_rate": 4.824405025541087e-05, "loss": 0.8517, "step": 9515 }, { "epoch": 0.11, "learning_rate": 4.8243127528272365e-05, "loss": 0.797, "step": 9520 }, { "epoch": 0.11, "learning_rate": 4.824220480113385e-05, "loss": 0.7644, "step": 9525 }, { "epoch": 0.11, "learning_rate": 4.8241282073995334e-05, "loss": 0.8472, "step": 9530 }, { "epoch": 0.11, "learning_rate": 4.824035934685682e-05, "loss": 0.833, "step": 9535 }, { "epoch": 0.11, "learning_rate": 4.8239436619718316e-05, "loss": 0.8906, "step": 9540 }, { "epoch": 0.11, "learning_rate": 4.8238513892579804e-05, "loss": 0.849, "step": 9545 }, { "epoch": 0.11, "learning_rate": 4.8237591165441285e-05, "loss": 0.839, "step": 9550 }, { "epoch": 0.11, "learning_rate": 4.823666843830277e-05, "loss": 0.8351, "step": 9555 }, { "epoch": 0.11, "learning_rate": 4.823574571116427e-05, "loss": 0.8903, "step": 9560 }, { "epoch": 0.11, "learning_rate": 4.823482298402575e-05, "loss": 0.7985, "step": 9565 }, { "epoch": 0.11, "learning_rate": 4.8233900256887237e-05, "loss": 0.7803, "step": 9570 }, { "epoch": 0.11, "learning_rate": 4.8232977529748724e-05, "loss": 0.8686, "step": 9575 }, { "epoch": 0.11, "learning_rate": 4.823205480261021e-05, "loss": 0.825, "step": 9580 }, { "epoch": 0.11, "learning_rate": 4.82311320754717e-05, "loss": 0.7723, "step": 9585 }, { "epoch": 0.11, "learning_rate": 4.823020934833319e-05, "loss": 0.8222, "step": 9590 }, { "epoch": 0.11, "learning_rate": 4.8229286621194676e-05, "loss": 0.7893, "step": 9595 }, { "epoch": 0.11, "learning_rate": 4.8228363894056164e-05, "loss": 0.8888, "step": 9600 }, { "epoch": 0.11, "learning_rate": 4.822744116691765e-05, "loss": 0.9143, "step": 9605 }, { "epoch": 0.11, "learning_rate": 4.822651843977914e-05, "loss": 0.918, "step": 9610 }, { "epoch": 0.11, "learning_rate": 4.822559571264063e-05, "loss": 0.8686, "step": 9615 }, { "epoch": 0.11, "learning_rate": 4.8224672985502115e-05, "loss": 0.8685, "step": 9620 }, { "epoch": 0.11, "learning_rate": 4.82237502583636e-05, "loss": 0.9176, "step": 9625 }, { "epoch": 0.11, "learning_rate": 4.822282753122509e-05, "loss": 0.847, "step": 9630 }, { "epoch": 0.11, "learning_rate": 4.822190480408658e-05, "loss": 0.8608, "step": 9635 }, { "epoch": 0.11, "learning_rate": 4.822098207694806e-05, "loss": 0.9103, "step": 9640 }, { "epoch": 0.11, "learning_rate": 4.822005934980955e-05, "loss": 0.9361, "step": 9645 }, { "epoch": 0.11, "learning_rate": 4.821913662267104e-05, "loss": 0.799, "step": 9650 }, { "epoch": 0.11, "learning_rate": 4.821821389553253e-05, "loss": 0.8327, "step": 9655 }, { "epoch": 0.11, "learning_rate": 4.821729116839401e-05, "loss": 0.8488, "step": 9660 }, { "epoch": 0.11, "learning_rate": 4.82163684412555e-05, "loss": 0.8591, "step": 9665 }, { "epoch": 0.11, "learning_rate": 4.8215445714116994e-05, "loss": 0.9117, "step": 9670 }, { "epoch": 0.11, "learning_rate": 4.8214522986978475e-05, "loss": 0.7956, "step": 9675 }, { "epoch": 0.11, "learning_rate": 4.821360025983996e-05, "loss": 0.831, "step": 9680 }, { "epoch": 0.11, "learning_rate": 4.821267753270145e-05, "loss": 0.8357, "step": 9685 }, { "epoch": 0.11, "learning_rate": 4.8211754805562945e-05, "loss": 0.8589, "step": 9690 }, { "epoch": 0.11, "learning_rate": 4.8210832078424426e-05, "loss": 0.8246, "step": 9695 }, { "epoch": 0.11, "learning_rate": 4.8209909351285914e-05, "loss": 0.8451, "step": 9700 }, { "epoch": 0.11, "learning_rate": 4.82089866241474e-05, "loss": 0.8871, "step": 9705 }, { "epoch": 0.11, "learning_rate": 4.820806389700889e-05, "loss": 0.8425, "step": 9710 }, { "epoch": 0.11, "learning_rate": 4.820714116987038e-05, "loss": 0.8611, "step": 9715 }, { "epoch": 0.11, "learning_rate": 4.8206218442731865e-05, "loss": 0.8299, "step": 9720 }, { "epoch": 0.11, "learning_rate": 4.820529571559335e-05, "loss": 0.8788, "step": 9725 }, { "epoch": 0.11, "learning_rate": 4.820437298845484e-05, "loss": 0.8206, "step": 9730 }, { "epoch": 0.11, "learning_rate": 4.820345026131633e-05, "loss": 0.828, "step": 9735 }, { "epoch": 0.11, "learning_rate": 4.820252753417782e-05, "loss": 0.809, "step": 9740 }, { "epoch": 0.11, "learning_rate": 4.8201604807039304e-05, "loss": 0.8863, "step": 9745 }, { "epoch": 0.11, "learning_rate": 4.8200682079900786e-05, "loss": 0.8399, "step": 9750 }, { "epoch": 0.11, "learning_rate": 4.819975935276228e-05, "loss": 0.8294, "step": 9755 }, { "epoch": 0.11, "learning_rate": 4.819883662562377e-05, "loss": 0.8176, "step": 9760 }, { "epoch": 0.11, "learning_rate": 4.8197913898485256e-05, "loss": 0.8422, "step": 9765 }, { "epoch": 0.11, "learning_rate": 4.819699117134674e-05, "loss": 0.8404, "step": 9770 }, { "epoch": 0.11, "learning_rate": 4.819606844420823e-05, "loss": 0.9064, "step": 9775 }, { "epoch": 0.11, "learning_rate": 4.819514571706972e-05, "loss": 0.873, "step": 9780 }, { "epoch": 0.11, "learning_rate": 4.81942229899312e-05, "loss": 0.8785, "step": 9785 }, { "epoch": 0.11, "learning_rate": 4.819330026279269e-05, "loss": 0.8863, "step": 9790 }, { "epoch": 0.11, "learning_rate": 4.8192377535654176e-05, "loss": 0.8921, "step": 9795 }, { "epoch": 0.11, "learning_rate": 4.819145480851567e-05, "loss": 0.8526, "step": 9800 }, { "epoch": 0.11, "learning_rate": 4.819053208137715e-05, "loss": 0.8544, "step": 9805 }, { "epoch": 0.11, "learning_rate": 4.818960935423864e-05, "loss": 0.8496, "step": 9810 }, { "epoch": 0.11, "learning_rate": 4.818868662710013e-05, "loss": 0.8827, "step": 9815 }, { "epoch": 0.11, "learning_rate": 4.8187763899961615e-05, "loss": 0.9024, "step": 9820 }, { "epoch": 0.11, "learning_rate": 4.81868411728231e-05, "loss": 0.9233, "step": 9825 }, { "epoch": 0.11, "learning_rate": 4.818591844568459e-05, "loss": 0.8424, "step": 9830 }, { "epoch": 0.11, "learning_rate": 4.818499571854608e-05, "loss": 0.8205, "step": 9835 }, { "epoch": 0.11, "learning_rate": 4.818407299140757e-05, "loss": 0.8614, "step": 9840 }, { "epoch": 0.11, "learning_rate": 4.8183150264269055e-05, "loss": 0.8291, "step": 9845 }, { "epoch": 0.11, "learning_rate": 4.818222753713054e-05, "loss": 0.8933, "step": 9850 }, { "epoch": 0.11, "learning_rate": 4.818130480999203e-05, "loss": 0.8541, "step": 9855 }, { "epoch": 0.11, "learning_rate": 4.818038208285352e-05, "loss": 0.8595, "step": 9860 }, { "epoch": 0.11, "learning_rate": 4.8179459355715006e-05, "loss": 0.8835, "step": 9865 }, { "epoch": 0.11, "learning_rate": 4.8178536628576494e-05, "loss": 0.8925, "step": 9870 }, { "epoch": 0.11, "learning_rate": 4.817761390143798e-05, "loss": 0.888, "step": 9875 }, { "epoch": 0.11, "learning_rate": 4.817669117429946e-05, "loss": 0.8532, "step": 9880 }, { "epoch": 0.11, "learning_rate": 4.817576844716096e-05, "loss": 0.9049, "step": 9885 }, { "epoch": 0.11, "learning_rate": 4.8174845720022445e-05, "loss": 0.8618, "step": 9890 }, { "epoch": 0.11, "learning_rate": 4.8173922992883926e-05, "loss": 0.9277, "step": 9895 }, { "epoch": 0.11, "learning_rate": 4.8173000265745414e-05, "loss": 0.7682, "step": 9900 }, { "epoch": 0.11, "learning_rate": 4.817207753860691e-05, "loss": 0.7921, "step": 9905 }, { "epoch": 0.11, "learning_rate": 4.81711548114684e-05, "loss": 0.8593, "step": 9910 }, { "epoch": 0.11, "learning_rate": 4.817023208432988e-05, "loss": 0.8393, "step": 9915 }, { "epoch": 0.11, "learning_rate": 4.8169309357191366e-05, "loss": 0.8188, "step": 9920 }, { "epoch": 0.11, "learning_rate": 4.816838663005286e-05, "loss": 0.8963, "step": 9925 }, { "epoch": 0.11, "learning_rate": 4.816746390291435e-05, "loss": 0.8361, "step": 9930 }, { "epoch": 0.11, "learning_rate": 4.816654117577583e-05, "loss": 0.8226, "step": 9935 }, { "epoch": 0.11, "learning_rate": 4.816561844863732e-05, "loss": 0.8781, "step": 9940 }, { "epoch": 0.11, "learning_rate": 4.8164695721498805e-05, "loss": 0.8027, "step": 9945 }, { "epoch": 0.11, "learning_rate": 4.816377299436029e-05, "loss": 0.8915, "step": 9950 }, { "epoch": 0.11, "learning_rate": 4.816285026722178e-05, "loss": 0.8588, "step": 9955 }, { "epoch": 0.11, "learning_rate": 4.816192754008327e-05, "loss": 0.8587, "step": 9960 }, { "epoch": 0.11, "learning_rate": 4.8161004812944756e-05, "loss": 0.8304, "step": 9965 }, { "epoch": 0.11, "learning_rate": 4.8160082085806244e-05, "loss": 0.8832, "step": 9970 }, { "epoch": 0.11, "learning_rate": 4.815915935866773e-05, "loss": 0.8422, "step": 9975 }, { "epoch": 0.11, "learning_rate": 4.815823663152922e-05, "loss": 0.8622, "step": 9980 }, { "epoch": 0.11, "learning_rate": 4.815731390439071e-05, "loss": 0.8131, "step": 9985 }, { "epoch": 0.11, "learning_rate": 4.8156391177252196e-05, "loss": 0.861, "step": 9990 }, { "epoch": 0.11, "learning_rate": 4.815546845011368e-05, "loss": 0.8121, "step": 9995 }, { "epoch": 0.11, "learning_rate": 4.815454572297517e-05, "loss": 0.9066, "step": 10000 }, { "epoch": 0.11, "eval_loss": 0.8200355172157288, "eval_runtime": 70.5969, "eval_samples_per_second": 28.33, "eval_steps_per_second": 14.165, "step": 10000 }, { "epoch": 0.11, "learning_rate": 4.815362299583666e-05, "loss": 0.8012, "step": 10005 }, { "epoch": 0.11, "learning_rate": 4.815270026869815e-05, "loss": 0.8735, "step": 10010 }, { "epoch": 0.11, "learning_rate": 4.8151777541559635e-05, "loss": 0.8995, "step": 10015 }, { "epoch": 0.11, "learning_rate": 4.815085481442112e-05, "loss": 0.8366, "step": 10020 }, { "epoch": 0.11, "learning_rate": 4.8149932087282604e-05, "loss": 0.8432, "step": 10025 }, { "epoch": 0.11, "learning_rate": 4.814900936014409e-05, "loss": 0.8298, "step": 10030 }, { "epoch": 0.11, "learning_rate": 4.8148086633005586e-05, "loss": 0.8142, "step": 10035 }, { "epoch": 0.11, "learning_rate": 4.8147163905867074e-05, "loss": 0.9071, "step": 10040 }, { "epoch": 0.11, "learning_rate": 4.8146241178728555e-05, "loss": 0.8734, "step": 10045 }, { "epoch": 0.11, "learning_rate": 4.814531845159004e-05, "loss": 0.8658, "step": 10050 }, { "epoch": 0.11, "learning_rate": 4.814439572445154e-05, "loss": 0.9285, "step": 10055 }, { "epoch": 0.11, "learning_rate": 4.814347299731302e-05, "loss": 0.8421, "step": 10060 }, { "epoch": 0.11, "learning_rate": 4.8142550270174506e-05, "loss": 0.8308, "step": 10065 }, { "epoch": 0.11, "learning_rate": 4.8141627543035994e-05, "loss": 0.8022, "step": 10070 }, { "epoch": 0.11, "learning_rate": 4.814070481589749e-05, "loss": 0.831, "step": 10075 }, { "epoch": 0.11, "learning_rate": 4.813978208875897e-05, "loss": 0.9028, "step": 10080 }, { "epoch": 0.11, "learning_rate": 4.813885936162046e-05, "loss": 0.8651, "step": 10085 }, { "epoch": 0.11, "learning_rate": 4.8137936634481946e-05, "loss": 0.8354, "step": 10090 }, { "epoch": 0.11, "learning_rate": 4.8137013907343434e-05, "loss": 0.8653, "step": 10095 }, { "epoch": 0.11, "learning_rate": 4.813609118020492e-05, "loss": 0.8228, "step": 10100 }, { "epoch": 0.11, "learning_rate": 4.813516845306641e-05, "loss": 0.8509, "step": 10105 }, { "epoch": 0.11, "learning_rate": 4.81342457259279e-05, "loss": 0.8978, "step": 10110 }, { "epoch": 0.11, "learning_rate": 4.8133322998789385e-05, "loss": 0.8586, "step": 10115 }, { "epoch": 0.11, "learning_rate": 4.813240027165087e-05, "loss": 0.8512, "step": 10120 }, { "epoch": 0.11, "learning_rate": 4.813147754451236e-05, "loss": 0.8239, "step": 10125 }, { "epoch": 0.11, "learning_rate": 4.813055481737385e-05, "loss": 0.8011, "step": 10130 }, { "epoch": 0.11, "learning_rate": 4.812963209023533e-05, "loss": 0.83, "step": 10135 }, { "epoch": 0.11, "learning_rate": 4.8128709363096824e-05, "loss": 0.8649, "step": 10140 }, { "epoch": 0.11, "learning_rate": 4.812778663595831e-05, "loss": 0.7978, "step": 10145 }, { "epoch": 0.11, "learning_rate": 4.81268639088198e-05, "loss": 0.8981, "step": 10150 }, { "epoch": 0.11, "learning_rate": 4.812594118168128e-05, "loss": 0.8195, "step": 10155 }, { "epoch": 0.11, "learning_rate": 4.8125018454542776e-05, "loss": 0.9154, "step": 10160 }, { "epoch": 0.11, "learning_rate": 4.8124095727404263e-05, "loss": 0.8357, "step": 10165 }, { "epoch": 0.11, "learning_rate": 4.8123173000265745e-05, "loss": 0.865, "step": 10170 }, { "epoch": 0.11, "learning_rate": 4.812225027312723e-05, "loss": 0.8506, "step": 10175 }, { "epoch": 0.11, "learning_rate": 4.812132754598872e-05, "loss": 0.8133, "step": 10180 }, { "epoch": 0.11, "learning_rate": 4.8120404818850215e-05, "loss": 0.9066, "step": 10185 }, { "epoch": 0.11, "learning_rate": 4.8119482091711696e-05, "loss": 0.8518, "step": 10190 }, { "epoch": 0.11, "learning_rate": 4.8118559364573184e-05, "loss": 0.8757, "step": 10195 }, { "epoch": 0.11, "learning_rate": 4.811763663743467e-05, "loss": 0.8892, "step": 10200 }, { "epoch": 0.11, "learning_rate": 4.811671391029616e-05, "loss": 0.791, "step": 10205 }, { "epoch": 0.11, "learning_rate": 4.811579118315765e-05, "loss": 0.8427, "step": 10210 }, { "epoch": 0.11, "learning_rate": 4.8114868456019135e-05, "loss": 0.8628, "step": 10215 }, { "epoch": 0.11, "learning_rate": 4.811394572888062e-05, "loss": 0.8323, "step": 10220 }, { "epoch": 0.11, "learning_rate": 4.811302300174211e-05, "loss": 0.7862, "step": 10225 }, { "epoch": 0.11, "learning_rate": 4.81121002746036e-05, "loss": 0.8189, "step": 10230 }, { "epoch": 0.11, "learning_rate": 4.8111177547465087e-05, "loss": 0.847, "step": 10235 }, { "epoch": 0.11, "learning_rate": 4.8110254820326574e-05, "loss": 0.8719, "step": 10240 }, { "epoch": 0.11, "learning_rate": 4.810933209318806e-05, "loss": 0.8345, "step": 10245 }, { "epoch": 0.11, "learning_rate": 4.810840936604955e-05, "loss": 0.8209, "step": 10250 }, { "epoch": 0.11, "learning_rate": 4.810748663891104e-05, "loss": 0.8352, "step": 10255 }, { "epoch": 0.11, "learning_rate": 4.8106563911772526e-05, "loss": 0.911, "step": 10260 }, { "epoch": 0.11, "learning_rate": 4.810564118463401e-05, "loss": 0.8608, "step": 10265 }, { "epoch": 0.11, "learning_rate": 4.81047184574955e-05, "loss": 0.8108, "step": 10270 }, { "epoch": 0.11, "learning_rate": 4.810379573035699e-05, "loss": 0.82, "step": 10275 }, { "epoch": 0.11, "learning_rate": 4.810287300321848e-05, "loss": 0.8243, "step": 10280 }, { "epoch": 0.11, "learning_rate": 4.810195027607996e-05, "loss": 0.8218, "step": 10285 }, { "epoch": 0.11, "learning_rate": 4.810102754894145e-05, "loss": 0.8811, "step": 10290 }, { "epoch": 0.11, "learning_rate": 4.810010482180294e-05, "loss": 0.8588, "step": 10295 }, { "epoch": 0.11, "learning_rate": 4.809918209466442e-05, "loss": 0.8484, "step": 10300 }, { "epoch": 0.11, "learning_rate": 4.809825936752591e-05, "loss": 0.8618, "step": 10305 }, { "epoch": 0.11, "learning_rate": 4.8097336640387404e-05, "loss": 0.8464, "step": 10310 }, { "epoch": 0.11, "learning_rate": 4.809641391324889e-05, "loss": 0.8251, "step": 10315 }, { "epoch": 0.11, "learning_rate": 4.809549118611037e-05, "loss": 0.8689, "step": 10320 }, { "epoch": 0.11, "learning_rate": 4.809456845897186e-05, "loss": 0.8846, "step": 10325 }, { "epoch": 0.11, "learning_rate": 4.809364573183335e-05, "loss": 0.836, "step": 10330 }, { "epoch": 0.11, "learning_rate": 4.809272300469484e-05, "loss": 0.9074, "step": 10335 }, { "epoch": 0.11, "learning_rate": 4.8091800277556325e-05, "loss": 0.8714, "step": 10340 }, { "epoch": 0.11, "learning_rate": 4.809087755041781e-05, "loss": 0.7996, "step": 10345 }, { "epoch": 0.11, "learning_rate": 4.80899548232793e-05, "loss": 0.8027, "step": 10350 }, { "epoch": 0.11, "learning_rate": 4.808903209614079e-05, "loss": 0.8369, "step": 10355 }, { "epoch": 0.11, "learning_rate": 4.8088109369002276e-05, "loss": 0.9185, "step": 10360 }, { "epoch": 0.11, "learning_rate": 4.8087186641863764e-05, "loss": 0.8136, "step": 10365 }, { "epoch": 0.11, "learning_rate": 4.808626391472525e-05, "loss": 0.8402, "step": 10370 }, { "epoch": 0.11, "learning_rate": 4.808534118758674e-05, "loss": 0.7762, "step": 10375 }, { "epoch": 0.11, "learning_rate": 4.808441846044823e-05, "loss": 0.8823, "step": 10380 }, { "epoch": 0.11, "learning_rate": 4.8083495733309715e-05, "loss": 0.8157, "step": 10385 }, { "epoch": 0.12, "learning_rate": 4.80825730061712e-05, "loss": 0.8708, "step": 10390 }, { "epoch": 0.12, "learning_rate": 4.808165027903269e-05, "loss": 0.8151, "step": 10395 }, { "epoch": 0.12, "learning_rate": 4.808072755189418e-05, "loss": 0.8036, "step": 10400 }, { "epoch": 0.12, "learning_rate": 4.807980482475567e-05, "loss": 0.8247, "step": 10405 }, { "epoch": 0.12, "learning_rate": 4.807888209761715e-05, "loss": 0.8502, "step": 10410 }, { "epoch": 0.12, "learning_rate": 4.8077959370478636e-05, "loss": 0.8131, "step": 10415 }, { "epoch": 0.12, "learning_rate": 4.807703664334013e-05, "loss": 0.8737, "step": 10420 }, { "epoch": 0.12, "learning_rate": 4.807611391620162e-05, "loss": 0.8361, "step": 10425 }, { "epoch": 0.12, "learning_rate": 4.80751911890631e-05, "loss": 0.8989, "step": 10430 }, { "epoch": 0.12, "learning_rate": 4.807426846192459e-05, "loss": 0.8399, "step": 10435 }, { "epoch": 0.12, "learning_rate": 4.807334573478608e-05, "loss": 0.853, "step": 10440 }, { "epoch": 0.12, "learning_rate": 4.807242300764756e-05, "loss": 0.782, "step": 10445 }, { "epoch": 0.12, "learning_rate": 4.807150028050905e-05, "loss": 0.7988, "step": 10450 }, { "epoch": 0.12, "learning_rate": 4.807057755337054e-05, "loss": 0.852, "step": 10455 }, { "epoch": 0.12, "learning_rate": 4.806965482623203e-05, "loss": 0.7745, "step": 10460 }, { "epoch": 0.12, "learning_rate": 4.8068732099093514e-05, "loss": 0.8495, "step": 10465 }, { "epoch": 0.12, "learning_rate": 4.8067809371955e-05, "loss": 0.7965, "step": 10470 }, { "epoch": 0.12, "learning_rate": 4.806688664481649e-05, "loss": 0.8967, "step": 10475 }, { "epoch": 0.12, "learning_rate": 4.806596391767798e-05, "loss": 0.8663, "step": 10480 }, { "epoch": 0.12, "learning_rate": 4.8065041190539465e-05, "loss": 0.8195, "step": 10485 }, { "epoch": 0.12, "learning_rate": 4.806411846340095e-05, "loss": 0.8464, "step": 10490 }, { "epoch": 0.12, "learning_rate": 4.806319573626244e-05, "loss": 0.8876, "step": 10495 }, { "epoch": 0.12, "learning_rate": 4.806227300912393e-05, "loss": 0.7895, "step": 10500 }, { "epoch": 0.12, "learning_rate": 4.806135028198542e-05, "loss": 0.9155, "step": 10505 }, { "epoch": 0.12, "learning_rate": 4.8060427554846905e-05, "loss": 0.8195, "step": 10510 }, { "epoch": 0.12, "learning_rate": 4.805950482770839e-05, "loss": 0.8369, "step": 10515 }, { "epoch": 0.12, "learning_rate": 4.8058582100569874e-05, "loss": 0.8696, "step": 10520 }, { "epoch": 0.12, "learning_rate": 4.805765937343137e-05, "loss": 0.8322, "step": 10525 }, { "epoch": 0.12, "learning_rate": 4.8056736646292856e-05, "loss": 0.8072, "step": 10530 }, { "epoch": 0.12, "learning_rate": 4.8055813919154344e-05, "loss": 0.8531, "step": 10535 }, { "epoch": 0.12, "learning_rate": 4.8054891192015825e-05, "loss": 0.7694, "step": 10540 }, { "epoch": 0.12, "learning_rate": 4.805396846487732e-05, "loss": 0.8592, "step": 10545 }, { "epoch": 0.12, "learning_rate": 4.805304573773881e-05, "loss": 0.8582, "step": 10550 }, { "epoch": 0.12, "learning_rate": 4.805212301060029e-05, "loss": 0.8388, "step": 10555 }, { "epoch": 0.12, "learning_rate": 4.8051200283461776e-05, "loss": 0.7609, "step": 10560 }, { "epoch": 0.12, "learning_rate": 4.8050277556323264e-05, "loss": 0.8504, "step": 10565 }, { "epoch": 0.12, "learning_rate": 4.804935482918476e-05, "loss": 0.8345, "step": 10570 }, { "epoch": 0.12, "learning_rate": 4.804843210204624e-05, "loss": 0.7936, "step": 10575 }, { "epoch": 0.12, "learning_rate": 4.804750937490773e-05, "loss": 0.8953, "step": 10580 }, { "epoch": 0.12, "learning_rate": 4.8046586647769216e-05, "loss": 0.8585, "step": 10585 }, { "epoch": 0.12, "learning_rate": 4.804566392063071e-05, "loss": 0.8525, "step": 10590 }, { "epoch": 0.12, "learning_rate": 4.804474119349219e-05, "loss": 0.8596, "step": 10595 }, { "epoch": 0.12, "learning_rate": 4.804381846635368e-05, "loss": 0.8449, "step": 10600 }, { "epoch": 0.12, "learning_rate": 4.804289573921517e-05, "loss": 0.8747, "step": 10605 }, { "epoch": 0.12, "learning_rate": 4.8041973012076655e-05, "loss": 0.8874, "step": 10610 }, { "epoch": 0.12, "learning_rate": 4.804105028493814e-05, "loss": 0.8741, "step": 10615 }, { "epoch": 0.12, "learning_rate": 4.804012755779963e-05, "loss": 0.7858, "step": 10620 }, { "epoch": 0.12, "learning_rate": 4.803920483066112e-05, "loss": 0.8386, "step": 10625 }, { "epoch": 0.12, "learning_rate": 4.80382821035226e-05, "loss": 0.8337, "step": 10630 }, { "epoch": 0.12, "learning_rate": 4.8037359376384094e-05, "loss": 0.8751, "step": 10635 }, { "epoch": 0.12, "learning_rate": 4.803643664924558e-05, "loss": 0.873, "step": 10640 }, { "epoch": 0.12, "learning_rate": 4.803551392210707e-05, "loss": 0.9193, "step": 10645 }, { "epoch": 0.12, "learning_rate": 4.803459119496855e-05, "loss": 0.8085, "step": 10650 }, { "epoch": 0.12, "learning_rate": 4.8033668467830045e-05, "loss": 0.8045, "step": 10655 }, { "epoch": 0.12, "learning_rate": 4.803274574069153e-05, "loss": 0.854, "step": 10660 }, { "epoch": 0.12, "learning_rate": 4.803182301355302e-05, "loss": 0.8227, "step": 10665 }, { "epoch": 0.12, "learning_rate": 4.80309002864145e-05, "loss": 0.9032, "step": 10670 }, { "epoch": 0.12, "learning_rate": 4.8029977559276e-05, "loss": 0.8195, "step": 10675 }, { "epoch": 0.12, "learning_rate": 4.8029054832137485e-05, "loss": 0.8762, "step": 10680 }, { "epoch": 0.12, "learning_rate": 4.8028132104998966e-05, "loss": 0.8765, "step": 10685 }, { "epoch": 0.12, "learning_rate": 4.8027209377860454e-05, "loss": 0.7921, "step": 10690 }, { "epoch": 0.12, "learning_rate": 4.802628665072195e-05, "loss": 0.8683, "step": 10695 }, { "epoch": 0.12, "learning_rate": 4.8025363923583436e-05, "loss": 0.829, "step": 10700 }, { "epoch": 0.12, "learning_rate": 4.802444119644492e-05, "loss": 0.7981, "step": 10705 }, { "epoch": 0.12, "learning_rate": 4.8023518469306405e-05, "loss": 0.8532, "step": 10710 }, { "epoch": 0.12, "learning_rate": 4.802259574216789e-05, "loss": 0.7889, "step": 10715 }, { "epoch": 0.12, "learning_rate": 4.802167301502938e-05, "loss": 0.766, "step": 10720 }, { "epoch": 0.12, "learning_rate": 4.802075028789087e-05, "loss": 0.916, "step": 10725 }, { "epoch": 0.12, "learning_rate": 4.8019827560752356e-05, "loss": 0.9188, "step": 10730 }, { "epoch": 0.12, "learning_rate": 4.8018904833613844e-05, "loss": 0.814, "step": 10735 }, { "epoch": 0.12, "learning_rate": 4.801798210647533e-05, "loss": 0.8439, "step": 10740 }, { "epoch": 0.12, "learning_rate": 4.801705937933682e-05, "loss": 0.8234, "step": 10745 }, { "epoch": 0.12, "learning_rate": 4.801613665219831e-05, "loss": 0.8191, "step": 10750 }, { "epoch": 0.12, "learning_rate": 4.8015213925059796e-05, "loss": 0.8476, "step": 10755 }, { "epoch": 0.12, "learning_rate": 4.8014291197921284e-05, "loss": 0.8341, "step": 10760 }, { "epoch": 0.12, "learning_rate": 4.801336847078277e-05, "loss": 0.8841, "step": 10765 }, { "epoch": 0.12, "learning_rate": 4.801244574364426e-05, "loss": 0.853, "step": 10770 }, { "epoch": 0.12, "learning_rate": 4.801152301650575e-05, "loss": 0.8378, "step": 10775 }, { "epoch": 0.12, "learning_rate": 4.801060028936723e-05, "loss": 0.8604, "step": 10780 }, { "epoch": 0.12, "learning_rate": 4.800967756222872e-05, "loss": 0.8056, "step": 10785 }, { "epoch": 0.12, "learning_rate": 4.800875483509021e-05, "loss": 0.8971, "step": 10790 }, { "epoch": 0.12, "learning_rate": 4.800783210795169e-05, "loss": 0.8352, "step": 10795 }, { "epoch": 0.12, "learning_rate": 4.800690938081318e-05, "loss": 0.8173, "step": 10800 }, { "epoch": 0.12, "learning_rate": 4.8005986653674674e-05, "loss": 0.8542, "step": 10805 }, { "epoch": 0.12, "learning_rate": 4.800506392653616e-05, "loss": 0.8077, "step": 10810 }, { "epoch": 0.12, "learning_rate": 4.800414119939764e-05, "loss": 0.7989, "step": 10815 }, { "epoch": 0.12, "learning_rate": 4.800321847225913e-05, "loss": 0.8578, "step": 10820 }, { "epoch": 0.12, "learning_rate": 4.8002295745120626e-05, "loss": 0.8558, "step": 10825 }, { "epoch": 0.12, "learning_rate": 4.800137301798211e-05, "loss": 0.9071, "step": 10830 }, { "epoch": 0.12, "learning_rate": 4.8000450290843594e-05, "loss": 0.8803, "step": 10835 }, { "epoch": 0.12, "learning_rate": 4.799952756370508e-05, "loss": 0.7778, "step": 10840 }, { "epoch": 0.12, "learning_rate": 4.799860483656658e-05, "loss": 0.8246, "step": 10845 }, { "epoch": 0.12, "learning_rate": 4.799768210942806e-05, "loss": 0.8182, "step": 10850 }, { "epoch": 0.12, "learning_rate": 4.7996759382289546e-05, "loss": 0.8762, "step": 10855 }, { "epoch": 0.12, "learning_rate": 4.7995836655151034e-05, "loss": 0.8405, "step": 10860 }, { "epoch": 0.12, "learning_rate": 4.799491392801252e-05, "loss": 0.9059, "step": 10865 }, { "epoch": 0.12, "learning_rate": 4.799399120087401e-05, "loss": 0.8804, "step": 10870 }, { "epoch": 0.12, "learning_rate": 4.79930684737355e-05, "loss": 0.7984, "step": 10875 }, { "epoch": 0.12, "learning_rate": 4.7992145746596985e-05, "loss": 0.8441, "step": 10880 }, { "epoch": 0.12, "learning_rate": 4.799122301945847e-05, "loss": 0.7787, "step": 10885 }, { "epoch": 0.12, "learning_rate": 4.799030029231996e-05, "loss": 0.8204, "step": 10890 }, { "epoch": 0.12, "learning_rate": 4.798937756518145e-05, "loss": 0.8813, "step": 10895 }, { "epoch": 0.12, "learning_rate": 4.7988454838042937e-05, "loss": 0.8191, "step": 10900 }, { "epoch": 0.12, "learning_rate": 4.798753211090442e-05, "loss": 0.8097, "step": 10905 }, { "epoch": 0.12, "learning_rate": 4.798660938376591e-05, "loss": 0.8276, "step": 10910 }, { "epoch": 0.12, "learning_rate": 4.79856866566274e-05, "loss": 0.8393, "step": 10915 }, { "epoch": 0.12, "learning_rate": 4.798476392948889e-05, "loss": 0.8133, "step": 10920 }, { "epoch": 0.12, "learning_rate": 4.798384120235037e-05, "loss": 0.8495, "step": 10925 }, { "epoch": 0.12, "learning_rate": 4.798291847521186e-05, "loss": 0.9031, "step": 10930 }, { "epoch": 0.12, "learning_rate": 4.798199574807335e-05, "loss": 0.8501, "step": 10935 }, { "epoch": 0.12, "learning_rate": 4.798107302093483e-05, "loss": 0.7913, "step": 10940 }, { "epoch": 0.12, "learning_rate": 4.798015029379632e-05, "loss": 0.8445, "step": 10945 }, { "epoch": 0.12, "learning_rate": 4.797922756665781e-05, "loss": 0.8419, "step": 10950 }, { "epoch": 0.12, "learning_rate": 4.79783048395193e-05, "loss": 0.8467, "step": 10955 }, { "epoch": 0.12, "learning_rate": 4.7977382112380784e-05, "loss": 0.8674, "step": 10960 }, { "epoch": 0.12, "learning_rate": 4.797645938524227e-05, "loss": 0.8676, "step": 10965 }, { "epoch": 0.12, "learning_rate": 4.797553665810376e-05, "loss": 0.8052, "step": 10970 }, { "epoch": 0.12, "learning_rate": 4.7974613930965254e-05, "loss": 0.8173, "step": 10975 }, { "epoch": 0.12, "learning_rate": 4.7973691203826735e-05, "loss": 0.8634, "step": 10980 }, { "epoch": 0.12, "learning_rate": 4.797276847668822e-05, "loss": 0.8061, "step": 10985 }, { "epoch": 0.12, "learning_rate": 4.797184574954971e-05, "loss": 0.8725, "step": 10990 }, { "epoch": 0.12, "learning_rate": 4.79709230224112e-05, "loss": 0.8542, "step": 10995 }, { "epoch": 0.12, "learning_rate": 4.797000029527269e-05, "loss": 0.8686, "step": 11000 }, { "epoch": 0.12, "eval_loss": 0.8092126250267029, "eval_runtime": 70.3927, "eval_samples_per_second": 28.412, "eval_steps_per_second": 14.206, "step": 11000 }, { "epoch": 0.12, "learning_rate": 4.7969077568134175e-05, "loss": 0.8058, "step": 11005 }, { "epoch": 0.12, "learning_rate": 4.796815484099566e-05, "loss": 0.905, "step": 11010 }, { "epoch": 0.12, "learning_rate": 4.7967232113857143e-05, "loss": 0.7967, "step": 11015 }, { "epoch": 0.12, "learning_rate": 4.796630938671864e-05, "loss": 0.8232, "step": 11020 }, { "epoch": 0.12, "learning_rate": 4.7965386659580126e-05, "loss": 0.8904, "step": 11025 }, { "epoch": 0.12, "learning_rate": 4.7964463932441614e-05, "loss": 0.7807, "step": 11030 }, { "epoch": 0.12, "learning_rate": 4.7963541205303095e-05, "loss": 0.8991, "step": 11035 }, { "epoch": 0.12, "learning_rate": 4.796261847816459e-05, "loss": 0.8524, "step": 11040 }, { "epoch": 0.12, "learning_rate": 4.796169575102608e-05, "loss": 0.8768, "step": 11045 }, { "epoch": 0.12, "learning_rate": 4.7960773023887565e-05, "loss": 0.8282, "step": 11050 }, { "epoch": 0.12, "learning_rate": 4.7959850296749046e-05, "loss": 0.8686, "step": 11055 }, { "epoch": 0.12, "learning_rate": 4.795892756961054e-05, "loss": 0.864, "step": 11060 }, { "epoch": 0.12, "learning_rate": 4.795800484247203e-05, "loss": 0.8369, "step": 11065 }, { "epoch": 0.12, "learning_rate": 4.795708211533351e-05, "loss": 0.8706, "step": 11070 }, { "epoch": 0.12, "learning_rate": 4.7956159388195e-05, "loss": 0.7983, "step": 11075 }, { "epoch": 0.12, "learning_rate": 4.795523666105649e-05, "loss": 0.7711, "step": 11080 }, { "epoch": 0.12, "learning_rate": 4.795431393391798e-05, "loss": 0.8163, "step": 11085 }, { "epoch": 0.12, "learning_rate": 4.795339120677946e-05, "loss": 0.8476, "step": 11090 }, { "epoch": 0.12, "learning_rate": 4.795246847964095e-05, "loss": 0.8196, "step": 11095 }, { "epoch": 0.12, "learning_rate": 4.795154575250244e-05, "loss": 0.9136, "step": 11100 }, { "epoch": 0.12, "learning_rate": 4.7950623025363925e-05, "loss": 0.8701, "step": 11105 }, { "epoch": 0.12, "learning_rate": 4.794970029822541e-05, "loss": 0.8525, "step": 11110 }, { "epoch": 0.12, "learning_rate": 4.79487775710869e-05, "loss": 0.8257, "step": 11115 }, { "epoch": 0.12, "learning_rate": 4.794785484394839e-05, "loss": 0.9076, "step": 11120 }, { "epoch": 0.12, "learning_rate": 4.7946932116809876e-05, "loss": 0.8743, "step": 11125 }, { "epoch": 0.12, "learning_rate": 4.7946009389671364e-05, "loss": 0.8168, "step": 11130 }, { "epoch": 0.12, "learning_rate": 4.794508666253285e-05, "loss": 0.9269, "step": 11135 }, { "epoch": 0.12, "learning_rate": 4.794416393539434e-05, "loss": 0.8867, "step": 11140 }, { "epoch": 0.12, "learning_rate": 4.794324120825583e-05, "loss": 0.8411, "step": 11145 }, { "epoch": 0.12, "learning_rate": 4.7942318481117315e-05, "loss": 0.8567, "step": 11150 }, { "epoch": 0.12, "learning_rate": 4.79413957539788e-05, "loss": 0.8465, "step": 11155 }, { "epoch": 0.12, "learning_rate": 4.794047302684029e-05, "loss": 0.833, "step": 11160 }, { "epoch": 0.12, "learning_rate": 4.793955029970177e-05, "loss": 0.7726, "step": 11165 }, { "epoch": 0.12, "learning_rate": 4.793862757256327e-05, "loss": 0.8806, "step": 11170 }, { "epoch": 0.12, "learning_rate": 4.7937704845424755e-05, "loss": 0.8177, "step": 11175 }, { "epoch": 0.12, "learning_rate": 4.7936782118286236e-05, "loss": 0.8527, "step": 11180 }, { "epoch": 0.12, "learning_rate": 4.7935859391147724e-05, "loss": 0.8506, "step": 11185 }, { "epoch": 0.12, "learning_rate": 4.793493666400922e-05, "loss": 0.9145, "step": 11190 }, { "epoch": 0.12, "learning_rate": 4.7934013936870706e-05, "loss": 0.8444, "step": 11195 }, { "epoch": 0.12, "learning_rate": 4.793309120973219e-05, "loss": 0.8273, "step": 11200 }, { "epoch": 0.12, "learning_rate": 4.7932168482593675e-05, "loss": 0.8509, "step": 11205 }, { "epoch": 0.12, "learning_rate": 4.793124575545517e-05, "loss": 0.8573, "step": 11210 }, { "epoch": 0.12, "learning_rate": 4.793032302831665e-05, "loss": 0.822, "step": 11215 }, { "epoch": 0.12, "learning_rate": 4.792940030117814e-05, "loss": 0.8347, "step": 11220 }, { "epoch": 0.12, "learning_rate": 4.7928477574039626e-05, "loss": 0.8178, "step": 11225 }, { "epoch": 0.12, "learning_rate": 4.792755484690112e-05, "loss": 0.9087, "step": 11230 }, { "epoch": 0.12, "learning_rate": 4.79266321197626e-05, "loss": 0.8504, "step": 11235 }, { "epoch": 0.12, "learning_rate": 4.792570939262409e-05, "loss": 0.8335, "step": 11240 }, { "epoch": 0.12, "learning_rate": 4.792478666548558e-05, "loss": 0.9262, "step": 11245 }, { "epoch": 0.12, "learning_rate": 4.7923863938347066e-05, "loss": 0.8194, "step": 11250 }, { "epoch": 0.12, "learning_rate": 4.7922941211208553e-05, "loss": 0.8392, "step": 11255 }, { "epoch": 0.12, "learning_rate": 4.792201848407004e-05, "loss": 0.8482, "step": 11260 }, { "epoch": 0.12, "learning_rate": 4.792109575693153e-05, "loss": 0.8872, "step": 11265 }, { "epoch": 0.12, "learning_rate": 4.792017302979302e-05, "loss": 0.862, "step": 11270 }, { "epoch": 0.12, "learning_rate": 4.7919250302654505e-05, "loss": 0.8585, "step": 11275 }, { "epoch": 0.12, "learning_rate": 4.791832757551599e-05, "loss": 0.8004, "step": 11280 }, { "epoch": 0.12, "learning_rate": 4.791740484837748e-05, "loss": 0.8668, "step": 11285 }, { "epoch": 0.13, "learning_rate": 4.791648212123896e-05, "loss": 0.805, "step": 11290 }, { "epoch": 0.13, "learning_rate": 4.7915559394100456e-05, "loss": 0.8629, "step": 11295 }, { "epoch": 0.13, "learning_rate": 4.7914636666961944e-05, "loss": 0.9083, "step": 11300 }, { "epoch": 0.13, "learning_rate": 4.791371393982343e-05, "loss": 0.822, "step": 11305 }, { "epoch": 0.13, "learning_rate": 4.791279121268491e-05, "loss": 0.9302, "step": 11310 }, { "epoch": 0.13, "learning_rate": 4.79118684855464e-05, "loss": 0.7913, "step": 11315 }, { "epoch": 0.13, "learning_rate": 4.7910945758407895e-05, "loss": 0.8315, "step": 11320 }, { "epoch": 0.13, "learning_rate": 4.7910023031269377e-05, "loss": 0.9956, "step": 11325 }, { "epoch": 0.13, "learning_rate": 4.7909100304130864e-05, "loss": 0.9021, "step": 11330 }, { "epoch": 0.13, "learning_rate": 4.790817757699235e-05, "loss": 0.8518, "step": 11335 }, { "epoch": 0.13, "learning_rate": 4.790725484985385e-05, "loss": 0.8396, "step": 11340 }, { "epoch": 0.13, "learning_rate": 4.790633212271533e-05, "loss": 0.8006, "step": 11345 }, { "epoch": 0.13, "learning_rate": 4.7905409395576816e-05, "loss": 0.7713, "step": 11350 }, { "epoch": 0.13, "learning_rate": 4.7904486668438304e-05, "loss": 0.8567, "step": 11355 }, { "epoch": 0.13, "learning_rate": 4.79035639412998e-05, "loss": 0.8626, "step": 11360 }, { "epoch": 0.13, "learning_rate": 4.790264121416128e-05, "loss": 0.829, "step": 11365 }, { "epoch": 0.13, "learning_rate": 4.790171848702277e-05, "loss": 0.8262, "step": 11370 }, { "epoch": 0.13, "learning_rate": 4.7900795759884255e-05, "loss": 0.858, "step": 11375 }, { "epoch": 0.13, "learning_rate": 4.789987303274574e-05, "loss": 0.8397, "step": 11380 }, { "epoch": 0.13, "learning_rate": 4.789895030560723e-05, "loss": 0.7798, "step": 11385 }, { "epoch": 0.13, "learning_rate": 4.789802757846872e-05, "loss": 0.7812, "step": 11390 }, { "epoch": 0.13, "learning_rate": 4.7897104851330206e-05, "loss": 0.8634, "step": 11395 }, { "epoch": 0.13, "learning_rate": 4.789618212419169e-05, "loss": 0.8793, "step": 11400 }, { "epoch": 0.13, "learning_rate": 4.789525939705318e-05, "loss": 0.7945, "step": 11405 }, { "epoch": 0.13, "learning_rate": 4.789433666991467e-05, "loss": 0.8652, "step": 11410 }, { "epoch": 0.13, "learning_rate": 4.789341394277616e-05, "loss": 0.8372, "step": 11415 }, { "epoch": 0.13, "learning_rate": 4.789249121563764e-05, "loss": 0.8354, "step": 11420 }, { "epoch": 0.13, "learning_rate": 4.7891568488499134e-05, "loss": 0.8056, "step": 11425 }, { "epoch": 0.13, "learning_rate": 4.789064576136062e-05, "loss": 0.8028, "step": 11430 }, { "epoch": 0.13, "learning_rate": 4.788972303422211e-05, "loss": 0.8253, "step": 11435 }, { "epoch": 0.13, "learning_rate": 4.788880030708359e-05, "loss": 0.8552, "step": 11440 }, { "epoch": 0.13, "learning_rate": 4.7887877579945085e-05, "loss": 0.922, "step": 11445 }, { "epoch": 0.13, "learning_rate": 4.788695485280657e-05, "loss": 0.8705, "step": 11450 }, { "epoch": 0.13, "learning_rate": 4.7886032125668054e-05, "loss": 0.8545, "step": 11455 }, { "epoch": 0.13, "learning_rate": 4.788510939852954e-05, "loss": 0.8436, "step": 11460 }, { "epoch": 0.13, "learning_rate": 4.788418667139103e-05, "loss": 0.8397, "step": 11465 }, { "epoch": 0.13, "learning_rate": 4.7883263944252524e-05, "loss": 0.8214, "step": 11470 }, { "epoch": 0.13, "learning_rate": 4.7882341217114005e-05, "loss": 0.8127, "step": 11475 }, { "epoch": 0.13, "learning_rate": 4.788141848997549e-05, "loss": 0.855, "step": 11480 }, { "epoch": 0.13, "learning_rate": 4.788049576283698e-05, "loss": 0.8, "step": 11485 }, { "epoch": 0.13, "learning_rate": 4.787957303569847e-05, "loss": 0.8246, "step": 11490 }, { "epoch": 0.13, "learning_rate": 4.787865030855996e-05, "loss": 0.842, "step": 11495 }, { "epoch": 0.13, "learning_rate": 4.7877727581421444e-05, "loss": 0.8576, "step": 11500 }, { "epoch": 0.13, "learning_rate": 4.787680485428293e-05, "loss": 0.8646, "step": 11505 }, { "epoch": 0.13, "learning_rate": 4.787588212714442e-05, "loss": 0.7575, "step": 11510 }, { "epoch": 0.13, "learning_rate": 4.787495940000591e-05, "loss": 0.863, "step": 11515 }, { "epoch": 0.13, "learning_rate": 4.7874036672867396e-05, "loss": 0.8129, "step": 11520 }, { "epoch": 0.13, "learning_rate": 4.7873113945728884e-05, "loss": 0.8159, "step": 11525 }, { "epoch": 0.13, "learning_rate": 4.787219121859037e-05, "loss": 0.8473, "step": 11530 }, { "epoch": 0.13, "learning_rate": 4.787126849145186e-05, "loss": 0.831, "step": 11535 }, { "epoch": 0.13, "learning_rate": 4.787034576431335e-05, "loss": 0.8563, "step": 11540 }, { "epoch": 0.13, "learning_rate": 4.7869423037174835e-05, "loss": 0.9441, "step": 11545 }, { "epoch": 0.13, "learning_rate": 4.7868500310036316e-05, "loss": 0.8737, "step": 11550 }, { "epoch": 0.13, "learning_rate": 4.786757758289781e-05, "loss": 0.8536, "step": 11555 }, { "epoch": 0.13, "learning_rate": 4.78666548557593e-05, "loss": 0.7871, "step": 11560 }, { "epoch": 0.13, "learning_rate": 4.786573212862078e-05, "loss": 0.8152, "step": 11565 }, { "epoch": 0.13, "learning_rate": 4.786480940148227e-05, "loss": 0.8441, "step": 11570 }, { "epoch": 0.13, "learning_rate": 4.786388667434376e-05, "loss": 0.7722, "step": 11575 }, { "epoch": 0.13, "learning_rate": 4.786296394720525e-05, "loss": 0.8484, "step": 11580 }, { "epoch": 0.13, "learning_rate": 4.786204122006673e-05, "loss": 0.8982, "step": 11585 }, { "epoch": 0.13, "learning_rate": 4.786111849292822e-05, "loss": 0.8407, "step": 11590 }, { "epoch": 0.13, "learning_rate": 4.7860195765789714e-05, "loss": 0.793, "step": 11595 }, { "epoch": 0.13, "learning_rate": 4.7859273038651195e-05, "loss": 0.7881, "step": 11600 }, { "epoch": 0.13, "learning_rate": 4.785835031151268e-05, "loss": 0.8939, "step": 11605 }, { "epoch": 0.13, "learning_rate": 4.785742758437417e-05, "loss": 0.7994, "step": 11610 }, { "epoch": 0.13, "learning_rate": 4.785650485723566e-05, "loss": 0.8463, "step": 11615 }, { "epoch": 0.13, "learning_rate": 4.7855582130097146e-05, "loss": 0.8787, "step": 11620 }, { "epoch": 0.13, "learning_rate": 4.7854659402958634e-05, "loss": 0.8886, "step": 11625 }, { "epoch": 0.13, "learning_rate": 4.785373667582012e-05, "loss": 0.818, "step": 11630 }, { "epoch": 0.13, "learning_rate": 4.785281394868161e-05, "loss": 0.8487, "step": 11635 }, { "epoch": 0.13, "learning_rate": 4.78518912215431e-05, "loss": 0.8413, "step": 11640 }, { "epoch": 0.13, "learning_rate": 4.7850968494404585e-05, "loss": 0.7743, "step": 11645 }, { "epoch": 0.13, "learning_rate": 4.785004576726607e-05, "loss": 0.8528, "step": 11650 }, { "epoch": 0.13, "learning_rate": 4.784912304012756e-05, "loss": 0.8018, "step": 11655 }, { "epoch": 0.13, "learning_rate": 4.784820031298905e-05, "loss": 0.8347, "step": 11660 }, { "epoch": 0.13, "learning_rate": 4.784727758585054e-05, "loss": 0.8387, "step": 11665 }, { "epoch": 0.13, "learning_rate": 4.7846354858712025e-05, "loss": 0.8129, "step": 11670 }, { "epoch": 0.13, "learning_rate": 4.7845432131573506e-05, "loss": 0.8993, "step": 11675 }, { "epoch": 0.13, "learning_rate": 4.7844509404435e-05, "loss": 0.876, "step": 11680 }, { "epoch": 0.13, "learning_rate": 4.784358667729649e-05, "loss": 0.8215, "step": 11685 }, { "epoch": 0.13, "learning_rate": 4.7842663950157976e-05, "loss": 0.8188, "step": 11690 }, { "epoch": 0.13, "learning_rate": 4.784174122301946e-05, "loss": 0.7896, "step": 11695 }, { "epoch": 0.13, "learning_rate": 4.7840818495880945e-05, "loss": 0.8788, "step": 11700 }, { "epoch": 0.13, "learning_rate": 4.783989576874244e-05, "loss": 0.8251, "step": 11705 }, { "epoch": 0.13, "learning_rate": 4.783897304160392e-05, "loss": 0.7838, "step": 11710 }, { "epoch": 0.13, "learning_rate": 4.783805031446541e-05, "loss": 0.8381, "step": 11715 }, { "epoch": 0.13, "learning_rate": 4.7837127587326896e-05, "loss": 0.889, "step": 11720 }, { "epoch": 0.13, "learning_rate": 4.783620486018839e-05, "loss": 0.8319, "step": 11725 }, { "epoch": 0.13, "learning_rate": 4.783528213304987e-05, "loss": 0.8266, "step": 11730 }, { "epoch": 0.13, "learning_rate": 4.783435940591136e-05, "loss": 0.8389, "step": 11735 }, { "epoch": 0.13, "learning_rate": 4.783343667877285e-05, "loss": 0.8262, "step": 11740 }, { "epoch": 0.13, "learning_rate": 4.783251395163434e-05, "loss": 0.8486, "step": 11745 }, { "epoch": 0.13, "learning_rate": 4.783159122449582e-05, "loss": 0.9014, "step": 11750 }, { "epoch": 0.13, "learning_rate": 4.783066849735731e-05, "loss": 0.9266, "step": 11755 }, { "epoch": 0.13, "learning_rate": 4.78297457702188e-05, "loss": 0.8211, "step": 11760 }, { "epoch": 0.13, "learning_rate": 4.782882304308029e-05, "loss": 0.8413, "step": 11765 }, { "epoch": 0.13, "learning_rate": 4.7827900315941775e-05, "loss": 0.8538, "step": 11770 }, { "epoch": 0.13, "learning_rate": 4.782697758880326e-05, "loss": 0.7881, "step": 11775 }, { "epoch": 0.13, "learning_rate": 4.782605486166475e-05, "loss": 0.8377, "step": 11780 }, { "epoch": 0.13, "learning_rate": 4.782513213452623e-05, "loss": 0.8274, "step": 11785 }, { "epoch": 0.13, "learning_rate": 4.7824209407387726e-05, "loss": 0.8785, "step": 11790 }, { "epoch": 0.13, "learning_rate": 4.7823286680249214e-05, "loss": 0.8302, "step": 11795 }, { "epoch": 0.13, "learning_rate": 4.78223639531107e-05, "loss": 0.8149, "step": 11800 }, { "epoch": 0.13, "learning_rate": 4.782144122597218e-05, "loss": 0.8799, "step": 11805 }, { "epoch": 0.13, "learning_rate": 4.782051849883368e-05, "loss": 0.854, "step": 11810 }, { "epoch": 0.13, "learning_rate": 4.7819595771695165e-05, "loss": 0.8559, "step": 11815 }, { "epoch": 0.13, "learning_rate": 4.781867304455665e-05, "loss": 0.8934, "step": 11820 }, { "epoch": 0.13, "learning_rate": 4.7817750317418134e-05, "loss": 0.8683, "step": 11825 }, { "epoch": 0.13, "learning_rate": 4.781682759027963e-05, "loss": 0.7973, "step": 11830 }, { "epoch": 0.13, "learning_rate": 4.781590486314112e-05, "loss": 0.8964, "step": 11835 }, { "epoch": 0.13, "learning_rate": 4.78149821360026e-05, "loss": 0.8317, "step": 11840 }, { "epoch": 0.13, "learning_rate": 4.7814059408864086e-05, "loss": 0.8133, "step": 11845 }, { "epoch": 0.13, "learning_rate": 4.7813136681725574e-05, "loss": 0.9027, "step": 11850 }, { "epoch": 0.13, "learning_rate": 4.781221395458707e-05, "loss": 0.8381, "step": 11855 }, { "epoch": 0.13, "learning_rate": 4.781129122744855e-05, "loss": 0.7977, "step": 11860 }, { "epoch": 0.13, "learning_rate": 4.781036850031004e-05, "loss": 0.8167, "step": 11865 }, { "epoch": 0.13, "learning_rate": 4.7809445773171525e-05, "loss": 0.8323, "step": 11870 }, { "epoch": 0.13, "learning_rate": 4.780852304603301e-05, "loss": 0.9033, "step": 11875 }, { "epoch": 0.13, "learning_rate": 4.78076003188945e-05, "loss": 0.8599, "step": 11880 }, { "epoch": 0.13, "learning_rate": 4.780667759175599e-05, "loss": 0.9227, "step": 11885 }, { "epoch": 0.13, "learning_rate": 4.7805754864617476e-05, "loss": 0.8429, "step": 11890 }, { "epoch": 0.13, "learning_rate": 4.7804832137478964e-05, "loss": 0.783, "step": 11895 }, { "epoch": 0.13, "learning_rate": 4.780390941034045e-05, "loss": 0.85, "step": 11900 }, { "epoch": 0.13, "learning_rate": 4.780298668320194e-05, "loss": 0.8289, "step": 11905 }, { "epoch": 0.13, "learning_rate": 4.780206395606343e-05, "loss": 0.8388, "step": 11910 }, { "epoch": 0.13, "learning_rate": 4.7801141228924916e-05, "loss": 0.8172, "step": 11915 }, { "epoch": 0.13, "learning_rate": 4.7800218501786403e-05, "loss": 0.806, "step": 11920 }, { "epoch": 0.13, "learning_rate": 4.779929577464789e-05, "loss": 0.8039, "step": 11925 }, { "epoch": 0.13, "learning_rate": 4.779837304750938e-05, "loss": 0.8381, "step": 11930 }, { "epoch": 0.13, "learning_rate": 4.779745032037086e-05, "loss": 0.9451, "step": 11935 }, { "epoch": 0.13, "learning_rate": 4.7796527593232355e-05, "loss": 0.8505, "step": 11940 }, { "epoch": 0.13, "learning_rate": 4.779560486609384e-05, "loss": 0.8401, "step": 11945 }, { "epoch": 0.13, "learning_rate": 4.7794682138955324e-05, "loss": 0.833, "step": 11950 }, { "epoch": 0.13, "learning_rate": 4.779375941181681e-05, "loss": 0.8375, "step": 11955 }, { "epoch": 0.13, "learning_rate": 4.7792836684678306e-05, "loss": 0.7904, "step": 11960 }, { "epoch": 0.13, "learning_rate": 4.7791913957539794e-05, "loss": 0.8014, "step": 11965 }, { "epoch": 0.13, "learning_rate": 4.7790991230401275e-05, "loss": 0.8419, "step": 11970 }, { "epoch": 0.13, "learning_rate": 4.779006850326276e-05, "loss": 0.8148, "step": 11975 }, { "epoch": 0.13, "learning_rate": 4.778914577612426e-05, "loss": 0.8648, "step": 11980 }, { "epoch": 0.13, "learning_rate": 4.778822304898574e-05, "loss": 0.9261, "step": 11985 }, { "epoch": 0.13, "learning_rate": 4.7787300321847227e-05, "loss": 0.8816, "step": 11990 }, { "epoch": 0.13, "learning_rate": 4.7786377594708714e-05, "loss": 0.8757, "step": 11995 }, { "epoch": 0.13, "learning_rate": 4.77854548675702e-05, "loss": 0.7736, "step": 12000 }, { "epoch": 0.13, "eval_loss": 0.8198770880699158, "eval_runtime": 70.1593, "eval_samples_per_second": 28.507, "eval_steps_per_second": 14.253, "step": 12000 }, { "epoch": 0.13, "learning_rate": 4.778453214043169e-05, "loss": 0.8529, "step": 12005 }, { "epoch": 0.13, "learning_rate": 4.778360941329318e-05, "loss": 0.771, "step": 12010 }, { "epoch": 0.13, "learning_rate": 4.7782686686154666e-05, "loss": 0.8251, "step": 12015 }, { "epoch": 0.13, "learning_rate": 4.7781763959016154e-05, "loss": 0.825, "step": 12020 }, { "epoch": 0.13, "learning_rate": 4.778084123187764e-05, "loss": 0.8856, "step": 12025 }, { "epoch": 0.13, "learning_rate": 4.777991850473913e-05, "loss": 0.8324, "step": 12030 }, { "epoch": 0.13, "learning_rate": 4.777899577760062e-05, "loss": 0.8752, "step": 12035 }, { "epoch": 0.13, "learning_rate": 4.7778073050462105e-05, "loss": 0.8982, "step": 12040 }, { "epoch": 0.13, "learning_rate": 4.777715032332359e-05, "loss": 0.8887, "step": 12045 }, { "epoch": 0.13, "learning_rate": 4.777622759618508e-05, "loss": 0.8929, "step": 12050 }, { "epoch": 0.13, "learning_rate": 4.777530486904657e-05, "loss": 0.8544, "step": 12055 }, { "epoch": 0.13, "learning_rate": 4.777438214190805e-05, "loss": 0.8819, "step": 12060 }, { "epoch": 0.13, "learning_rate": 4.7773459414769544e-05, "loss": 0.8568, "step": 12065 }, { "epoch": 0.13, "learning_rate": 4.777253668763103e-05, "loss": 0.8405, "step": 12070 }, { "epoch": 0.13, "learning_rate": 4.777161396049252e-05, "loss": 0.8145, "step": 12075 }, { "epoch": 0.13, "learning_rate": 4.7770691233354e-05, "loss": 0.8467, "step": 12080 }, { "epoch": 0.13, "learning_rate": 4.776976850621549e-05, "loss": 0.8646, "step": 12085 }, { "epoch": 0.13, "learning_rate": 4.7768845779076984e-05, "loss": 0.8605, "step": 12090 }, { "epoch": 0.13, "learning_rate": 4.7767923051938465e-05, "loss": 0.7983, "step": 12095 }, { "epoch": 0.13, "learning_rate": 4.776700032479995e-05, "loss": 0.8143, "step": 12100 }, { "epoch": 0.13, "learning_rate": 4.776607759766144e-05, "loss": 0.8715, "step": 12105 }, { "epoch": 0.13, "learning_rate": 4.7765154870522935e-05, "loss": 0.8919, "step": 12110 }, { "epoch": 0.13, "learning_rate": 4.7764232143384416e-05, "loss": 0.8299, "step": 12115 }, { "epoch": 0.13, "learning_rate": 4.7763309416245904e-05, "loss": 0.8271, "step": 12120 }, { "epoch": 0.13, "learning_rate": 4.776238668910739e-05, "loss": 0.8423, "step": 12125 }, { "epoch": 0.13, "learning_rate": 4.7761463961968886e-05, "loss": 0.8144, "step": 12130 }, { "epoch": 0.13, "learning_rate": 4.776054123483037e-05, "loss": 0.806, "step": 12135 }, { "epoch": 0.13, "learning_rate": 4.7759618507691855e-05, "loss": 0.8002, "step": 12140 }, { "epoch": 0.13, "learning_rate": 4.775869578055334e-05, "loss": 0.8365, "step": 12145 }, { "epoch": 0.13, "learning_rate": 4.775777305341483e-05, "loss": 0.7968, "step": 12150 }, { "epoch": 0.13, "learning_rate": 4.775685032627632e-05, "loss": 0.8472, "step": 12155 }, { "epoch": 0.13, "learning_rate": 4.775592759913781e-05, "loss": 0.82, "step": 12160 }, { "epoch": 0.13, "learning_rate": 4.7755004871999294e-05, "loss": 0.8507, "step": 12165 }, { "epoch": 0.13, "learning_rate": 4.7754082144860776e-05, "loss": 0.8047, "step": 12170 }, { "epoch": 0.13, "learning_rate": 4.775315941772227e-05, "loss": 0.7816, "step": 12175 }, { "epoch": 0.13, "learning_rate": 4.775223669058376e-05, "loss": 0.8342, "step": 12180 }, { "epoch": 0.13, "learning_rate": 4.7751313963445246e-05, "loss": 0.8781, "step": 12185 }, { "epoch": 0.13, "learning_rate": 4.775039123630673e-05, "loss": 0.845, "step": 12190 }, { "epoch": 0.14, "learning_rate": 4.774946850916822e-05, "loss": 0.8913, "step": 12195 }, { "epoch": 0.14, "learning_rate": 4.774854578202971e-05, "loss": 0.8118, "step": 12200 }, { "epoch": 0.14, "learning_rate": 4.77476230548912e-05, "loss": 0.8639, "step": 12205 }, { "epoch": 0.14, "learning_rate": 4.774670032775268e-05, "loss": 0.8722, "step": 12210 }, { "epoch": 0.14, "learning_rate": 4.774577760061417e-05, "loss": 0.8953, "step": 12215 }, { "epoch": 0.14, "learning_rate": 4.774485487347566e-05, "loss": 0.9268, "step": 12220 }, { "epoch": 0.14, "learning_rate": 4.774393214633714e-05, "loss": 0.9469, "step": 12225 }, { "epoch": 0.14, "learning_rate": 4.774300941919863e-05, "loss": 0.8951, "step": 12230 }, { "epoch": 0.14, "learning_rate": 4.774208669206012e-05, "loss": 0.8367, "step": 12235 }, { "epoch": 0.14, "learning_rate": 4.774116396492161e-05, "loss": 0.8456, "step": 12240 }, { "epoch": 0.14, "learning_rate": 4.774024123778309e-05, "loss": 0.8608, "step": 12245 }, { "epoch": 0.14, "learning_rate": 4.773931851064458e-05, "loss": 0.9189, "step": 12250 }, { "epoch": 0.14, "learning_rate": 4.773839578350607e-05, "loss": 0.873, "step": 12255 }, { "epoch": 0.14, "learning_rate": 4.773747305636756e-05, "loss": 0.797, "step": 12260 }, { "epoch": 0.14, "learning_rate": 4.7736550329229045e-05, "loss": 0.7977, "step": 12265 }, { "epoch": 0.14, "learning_rate": 4.773562760209053e-05, "loss": 0.8529, "step": 12270 }, { "epoch": 0.14, "learning_rate": 4.773470487495202e-05, "loss": 0.8456, "step": 12275 }, { "epoch": 0.14, "learning_rate": 4.773378214781351e-05, "loss": 0.8913, "step": 12280 }, { "epoch": 0.14, "learning_rate": 4.7732859420674996e-05, "loss": 0.89, "step": 12285 }, { "epoch": 0.14, "learning_rate": 4.7731936693536484e-05, "loss": 0.7735, "step": 12290 }, { "epoch": 0.14, "learning_rate": 4.773101396639797e-05, "loss": 0.8617, "step": 12295 }, { "epoch": 0.14, "learning_rate": 4.773009123925945e-05, "loss": 0.804, "step": 12300 }, { "epoch": 0.14, "learning_rate": 4.772916851212095e-05, "loss": 0.8423, "step": 12305 }, { "epoch": 0.14, "learning_rate": 4.7728245784982435e-05, "loss": 0.8826, "step": 12310 }, { "epoch": 0.14, "learning_rate": 4.772732305784392e-05, "loss": 0.8908, "step": 12315 }, { "epoch": 0.14, "learning_rate": 4.7726400330705404e-05, "loss": 0.8127, "step": 12320 }, { "epoch": 0.14, "learning_rate": 4.77254776035669e-05, "loss": 0.8431, "step": 12325 }, { "epoch": 0.14, "learning_rate": 4.772455487642839e-05, "loss": 0.839, "step": 12330 }, { "epoch": 0.14, "learning_rate": 4.772363214928987e-05, "loss": 0.8572, "step": 12335 }, { "epoch": 0.14, "learning_rate": 4.7722709422151356e-05, "loss": 0.834, "step": 12340 }, { "epoch": 0.14, "learning_rate": 4.772178669501285e-05, "loss": 0.8775, "step": 12345 }, { "epoch": 0.14, "learning_rate": 4.772086396787434e-05, "loss": 0.814, "step": 12350 }, { "epoch": 0.14, "learning_rate": 4.771994124073582e-05, "loss": 0.8437, "step": 12355 }, { "epoch": 0.14, "learning_rate": 4.771901851359731e-05, "loss": 0.8759, "step": 12360 }, { "epoch": 0.14, "learning_rate": 4.77180957864588e-05, "loss": 0.8523, "step": 12365 }, { "epoch": 0.14, "learning_rate": 4.771717305932028e-05, "loss": 0.8624, "step": 12370 }, { "epoch": 0.14, "learning_rate": 4.771625033218177e-05, "loss": 0.9025, "step": 12375 }, { "epoch": 0.14, "learning_rate": 4.771532760504326e-05, "loss": 0.8825, "step": 12380 }, { "epoch": 0.14, "learning_rate": 4.7714404877904746e-05, "loss": 0.7432, "step": 12385 }, { "epoch": 0.14, "learning_rate": 4.7713482150766234e-05, "loss": 0.849, "step": 12390 }, { "epoch": 0.14, "learning_rate": 4.771255942362772e-05, "loss": 0.805, "step": 12395 }, { "epoch": 0.14, "learning_rate": 4.771163669648921e-05, "loss": 0.9521, "step": 12400 }, { "epoch": 0.14, "learning_rate": 4.77107139693507e-05, "loss": 0.9521, "step": 12405 }, { "epoch": 0.14, "learning_rate": 4.7709791242212186e-05, "loss": 0.9883, "step": 12410 }, { "epoch": 0.14, "learning_rate": 4.770886851507367e-05, "loss": 1.1382, "step": 12415 }, { "epoch": 0.14, "learning_rate": 4.770794578793516e-05, "loss": 1.3258, "step": 12420 }, { "epoch": 0.14, "learning_rate": 4.770702306079665e-05, "loss": 1.2937, "step": 12425 }, { "epoch": 0.14, "learning_rate": 4.770610033365814e-05, "loss": 1.211, "step": 12430 }, { "epoch": 0.14, "learning_rate": 4.7705177606519625e-05, "loss": 0.9394, "step": 12435 }, { "epoch": 0.14, "learning_rate": 4.770425487938111e-05, "loss": 0.9401, "step": 12440 }, { "epoch": 0.14, "learning_rate": 4.7703332152242594e-05, "loss": 0.8694, "step": 12445 }, { "epoch": 0.14, "learning_rate": 4.770240942510408e-05, "loss": 0.9025, "step": 12450 }, { "epoch": 0.14, "learning_rate": 4.7701486697965576e-05, "loss": 0.9656, "step": 12455 }, { "epoch": 0.14, "learning_rate": 4.7700563970827064e-05, "loss": 0.9298, "step": 12460 }, { "epoch": 0.14, "learning_rate": 4.7699641243688545e-05, "loss": 0.8985, "step": 12465 }, { "epoch": 0.14, "learning_rate": 4.769871851655003e-05, "loss": 0.9132, "step": 12470 }, { "epoch": 0.14, "learning_rate": 4.769779578941153e-05, "loss": 0.9211, "step": 12475 }, { "epoch": 0.14, "learning_rate": 4.769687306227301e-05, "loss": 0.8675, "step": 12480 }, { "epoch": 0.14, "learning_rate": 4.7695950335134496e-05, "loss": 0.867, "step": 12485 }, { "epoch": 0.14, "learning_rate": 4.7695027607995984e-05, "loss": 0.8428, "step": 12490 }, { "epoch": 0.14, "learning_rate": 4.769410488085748e-05, "loss": 0.9903, "step": 12495 }, { "epoch": 0.14, "learning_rate": 4.769318215371896e-05, "loss": 0.9507, "step": 12500 }, { "epoch": 0.14, "learning_rate": 4.769225942658045e-05, "loss": 0.9467, "step": 12505 }, { "epoch": 0.14, "learning_rate": 4.7691336699441936e-05, "loss": 0.888, "step": 12510 }, { "epoch": 0.14, "learning_rate": 4.769041397230343e-05, "loss": 0.9319, "step": 12515 }, { "epoch": 0.14, "learning_rate": 4.768949124516491e-05, "loss": 0.959, "step": 12520 }, { "epoch": 0.14, "learning_rate": 4.76885685180264e-05, "loss": 0.9735, "step": 12525 }, { "epoch": 0.14, "learning_rate": 4.768764579088789e-05, "loss": 0.9704, "step": 12530 }, { "epoch": 0.14, "learning_rate": 4.7686723063749375e-05, "loss": 0.9191, "step": 12535 }, { "epoch": 0.14, "learning_rate": 4.768580033661086e-05, "loss": 0.8508, "step": 12540 }, { "epoch": 0.14, "learning_rate": 4.768487760947235e-05, "loss": 1.0028, "step": 12545 }, { "epoch": 0.14, "learning_rate": 4.768395488233384e-05, "loss": 0.9859, "step": 12550 }, { "epoch": 0.14, "learning_rate": 4.7683032155195326e-05, "loss": 0.8923, "step": 12555 }, { "epoch": 0.14, "learning_rate": 4.7682109428056814e-05, "loss": 0.9361, "step": 12560 }, { "epoch": 0.14, "learning_rate": 4.76811867009183e-05, "loss": 0.8647, "step": 12565 }, { "epoch": 0.14, "learning_rate": 4.768026397377979e-05, "loss": 0.8842, "step": 12570 }, { "epoch": 0.14, "learning_rate": 4.767934124664127e-05, "loss": 0.9184, "step": 12575 }, { "epoch": 0.14, "learning_rate": 4.7678418519502766e-05, "loss": 0.9293, "step": 12580 }, { "epoch": 0.14, "learning_rate": 4.7677495792364253e-05, "loss": 0.8945, "step": 12585 }, { "epoch": 0.14, "learning_rate": 4.767657306522574e-05, "loss": 0.9442, "step": 12590 }, { "epoch": 0.14, "learning_rate": 4.767565033808722e-05, "loss": 0.928, "step": 12595 }, { "epoch": 0.14, "learning_rate": 4.767472761094871e-05, "loss": 0.9067, "step": 12600 }, { "epoch": 0.14, "learning_rate": 4.7673804883810205e-05, "loss": 0.9241, "step": 12605 }, { "epoch": 0.14, "learning_rate": 4.7672882156671686e-05, "loss": 0.8527, "step": 12610 }, { "epoch": 0.14, "learning_rate": 4.7671959429533174e-05, "loss": 0.8786, "step": 12615 }, { "epoch": 0.14, "learning_rate": 4.767103670239466e-05, "loss": 0.8802, "step": 12620 }, { "epoch": 0.14, "learning_rate": 4.7670113975256156e-05, "loss": 0.8316, "step": 12625 }, { "epoch": 0.14, "learning_rate": 4.766919124811764e-05, "loss": 0.8029, "step": 12630 }, { "epoch": 0.14, "learning_rate": 4.7668268520979125e-05, "loss": 0.8613, "step": 12635 }, { "epoch": 0.14, "learning_rate": 4.766734579384061e-05, "loss": 0.8402, "step": 12640 }, { "epoch": 0.14, "learning_rate": 4.76664230667021e-05, "loss": 0.8565, "step": 12645 }, { "epoch": 0.14, "learning_rate": 4.766550033956359e-05, "loss": 0.8097, "step": 12650 }, { "epoch": 0.14, "learning_rate": 4.7664577612425077e-05, "loss": 0.8341, "step": 12655 }, { "epoch": 0.14, "learning_rate": 4.7663654885286564e-05, "loss": 0.7959, "step": 12660 }, { "epoch": 0.14, "learning_rate": 4.766273215814805e-05, "loss": 0.9004, "step": 12665 }, { "epoch": 0.14, "learning_rate": 4.766180943100954e-05, "loss": 0.7649, "step": 12670 }, { "epoch": 0.14, "learning_rate": 4.766088670387103e-05, "loss": 0.8536, "step": 12675 }, { "epoch": 0.14, "learning_rate": 4.7659963976732516e-05, "loss": 0.8234, "step": 12680 }, { "epoch": 0.14, "learning_rate": 4.7659041249594e-05, "loss": 0.8364, "step": 12685 }, { "epoch": 0.14, "learning_rate": 4.765811852245549e-05, "loss": 0.9852, "step": 12690 }, { "epoch": 0.14, "learning_rate": 4.765719579531698e-05, "loss": 0.9006, "step": 12695 }, { "epoch": 0.14, "learning_rate": 4.765627306817847e-05, "loss": 0.8474, "step": 12700 }, { "epoch": 0.14, "learning_rate": 4.765535034103995e-05, "loss": 0.8805, "step": 12705 }, { "epoch": 0.14, "learning_rate": 4.765442761390144e-05, "loss": 0.815, "step": 12710 }, { "epoch": 0.14, "learning_rate": 4.765350488676293e-05, "loss": 0.8409, "step": 12715 }, { "epoch": 0.14, "learning_rate": 4.765258215962441e-05, "loss": 0.8047, "step": 12720 }, { "epoch": 0.14, "learning_rate": 4.76516594324859e-05, "loss": 0.8163, "step": 12725 }, { "epoch": 0.14, "learning_rate": 4.7650736705347394e-05, "loss": 0.8461, "step": 12730 }, { "epoch": 0.14, "learning_rate": 4.764981397820888e-05, "loss": 0.8986, "step": 12735 }, { "epoch": 0.14, "learning_rate": 4.764889125107036e-05, "loss": 0.8341, "step": 12740 }, { "epoch": 0.14, "learning_rate": 4.764796852393185e-05, "loss": 0.8994, "step": 12745 }, { "epoch": 0.14, "learning_rate": 4.7647045796793346e-05, "loss": 0.91, "step": 12750 }, { "epoch": 0.14, "learning_rate": 4.764612306965483e-05, "loss": 0.8001, "step": 12755 }, { "epoch": 0.14, "learning_rate": 4.7645200342516315e-05, "loss": 0.8223, "step": 12760 }, { "epoch": 0.14, "learning_rate": 4.76442776153778e-05, "loss": 0.8054, "step": 12765 }, { "epoch": 0.14, "learning_rate": 4.764335488823929e-05, "loss": 0.858, "step": 12770 }, { "epoch": 0.14, "learning_rate": 4.764243216110078e-05, "loss": 0.8733, "step": 12775 }, { "epoch": 0.14, "learning_rate": 4.7641509433962266e-05, "loss": 0.8184, "step": 12780 }, { "epoch": 0.14, "learning_rate": 4.7640586706823754e-05, "loss": 0.9025, "step": 12785 }, { "epoch": 0.14, "learning_rate": 4.763966397968524e-05, "loss": 0.8058, "step": 12790 }, { "epoch": 0.14, "learning_rate": 4.763874125254673e-05, "loss": 0.8894, "step": 12795 }, { "epoch": 0.14, "learning_rate": 4.763781852540822e-05, "loss": 0.8013, "step": 12800 }, { "epoch": 0.14, "learning_rate": 4.7636895798269705e-05, "loss": 0.8008, "step": 12805 }, { "epoch": 0.14, "learning_rate": 4.763597307113119e-05, "loss": 0.8278, "step": 12810 }, { "epoch": 0.14, "learning_rate": 4.763505034399268e-05, "loss": 0.7965, "step": 12815 }, { "epoch": 0.14, "learning_rate": 4.763412761685417e-05, "loss": 0.8613, "step": 12820 }, { "epoch": 0.14, "learning_rate": 4.763320488971566e-05, "loss": 0.9069, "step": 12825 }, { "epoch": 0.14, "learning_rate": 4.763228216257714e-05, "loss": 0.8064, "step": 12830 }, { "epoch": 0.14, "learning_rate": 4.7631359435438626e-05, "loss": 0.8017, "step": 12835 }, { "epoch": 0.14, "learning_rate": 4.763043670830012e-05, "loss": 0.9088, "step": 12840 }, { "epoch": 0.14, "learning_rate": 4.762951398116161e-05, "loss": 0.9258, "step": 12845 }, { "epoch": 0.14, "learning_rate": 4.762859125402309e-05, "loss": 0.887, "step": 12850 }, { "epoch": 0.14, "learning_rate": 4.762766852688458e-05, "loss": 0.7996, "step": 12855 }, { "epoch": 0.14, "learning_rate": 4.762674579974607e-05, "loss": 0.8308, "step": 12860 }, { "epoch": 0.14, "learning_rate": 4.762582307260756e-05, "loss": 0.925, "step": 12865 }, { "epoch": 0.14, "learning_rate": 4.762490034546904e-05, "loss": 0.8532, "step": 12870 }, { "epoch": 0.14, "learning_rate": 4.762397761833053e-05, "loss": 0.8068, "step": 12875 }, { "epoch": 0.14, "learning_rate": 4.762305489119202e-05, "loss": 0.9327, "step": 12880 }, { "epoch": 0.14, "learning_rate": 4.7622132164053504e-05, "loss": 0.8576, "step": 12885 }, { "epoch": 0.14, "learning_rate": 4.762120943691499e-05, "loss": 0.8151, "step": 12890 }, { "epoch": 0.14, "learning_rate": 4.762028670977648e-05, "loss": 0.8186, "step": 12895 }, { "epoch": 0.14, "learning_rate": 4.7619363982637974e-05, "loss": 0.8201, "step": 12900 }, { "epoch": 0.14, "learning_rate": 4.7618441255499455e-05, "loss": 0.8221, "step": 12905 }, { "epoch": 0.14, "learning_rate": 4.761751852836094e-05, "loss": 0.8594, "step": 12910 }, { "epoch": 0.14, "learning_rate": 4.761659580122243e-05, "loss": 0.8555, "step": 12915 }, { "epoch": 0.14, "learning_rate": 4.761567307408392e-05, "loss": 0.9101, "step": 12920 }, { "epoch": 0.14, "learning_rate": 4.761475034694541e-05, "loss": 0.8455, "step": 12925 }, { "epoch": 0.14, "learning_rate": 4.7613827619806895e-05, "loss": 0.8489, "step": 12930 }, { "epoch": 0.14, "learning_rate": 4.761290489266838e-05, "loss": 0.8975, "step": 12935 }, { "epoch": 0.14, "learning_rate": 4.761198216552987e-05, "loss": 0.835, "step": 12940 }, { "epoch": 0.14, "learning_rate": 4.761105943839136e-05, "loss": 0.7951, "step": 12945 }, { "epoch": 0.14, "learning_rate": 4.7610136711252846e-05, "loss": 0.8271, "step": 12950 }, { "epoch": 0.14, "learning_rate": 4.7609213984114334e-05, "loss": 0.7807, "step": 12955 }, { "epoch": 0.14, "learning_rate": 4.7608291256975815e-05, "loss": 0.8342, "step": 12960 }, { "epoch": 0.14, "learning_rate": 4.760736852983731e-05, "loss": 0.778, "step": 12965 }, { "epoch": 0.14, "learning_rate": 4.76064458026988e-05, "loss": 0.8435, "step": 12970 }, { "epoch": 0.14, "learning_rate": 4.7605523075560285e-05, "loss": 0.8727, "step": 12975 }, { "epoch": 0.14, "learning_rate": 4.7604600348421766e-05, "loss": 0.8428, "step": 12980 }, { "epoch": 0.14, "learning_rate": 4.7603677621283254e-05, "loss": 0.8767, "step": 12985 }, { "epoch": 0.14, "learning_rate": 4.760275489414475e-05, "loss": 0.8791, "step": 12990 }, { "epoch": 0.14, "learning_rate": 4.760183216700623e-05, "loss": 0.8077, "step": 12995 }, { "epoch": 0.14, "learning_rate": 4.760090943986772e-05, "loss": 0.8054, "step": 13000 }, { "epoch": 0.14, "eval_loss": 0.7972371578216553, "eval_runtime": 69.9372, "eval_samples_per_second": 28.597, "eval_steps_per_second": 14.299, "step": 13000 }, { "epoch": 0.14, "learning_rate": 4.7599986712729206e-05, "loss": 0.9109, "step": 13005 }, { "epoch": 0.14, "learning_rate": 4.75990639855907e-05, "loss": 0.8276, "step": 13010 }, { "epoch": 0.14, "learning_rate": 4.759814125845218e-05, "loss": 0.8812, "step": 13015 }, { "epoch": 0.14, "learning_rate": 4.759721853131367e-05, "loss": 0.8249, "step": 13020 }, { "epoch": 0.14, "learning_rate": 4.759629580417516e-05, "loss": 0.8656, "step": 13025 }, { "epoch": 0.14, "learning_rate": 4.7595373077036645e-05, "loss": 0.9001, "step": 13030 }, { "epoch": 0.14, "learning_rate": 4.759445034989813e-05, "loss": 0.8353, "step": 13035 }, { "epoch": 0.14, "learning_rate": 4.759352762275962e-05, "loss": 0.7802, "step": 13040 }, { "epoch": 0.14, "learning_rate": 4.759260489562111e-05, "loss": 0.8534, "step": 13045 }, { "epoch": 0.14, "learning_rate": 4.7591682168482596e-05, "loss": 0.8429, "step": 13050 }, { "epoch": 0.14, "learning_rate": 4.7590759441344084e-05, "loss": 0.8813, "step": 13055 }, { "epoch": 0.14, "learning_rate": 4.758983671420557e-05, "loss": 0.8017, "step": 13060 }, { "epoch": 0.14, "learning_rate": 4.758891398706706e-05, "loss": 0.8072, "step": 13065 }, { "epoch": 0.14, "learning_rate": 4.758799125992854e-05, "loss": 0.8092, "step": 13070 }, { "epoch": 0.14, "learning_rate": 4.7587068532790036e-05, "loss": 0.8603, "step": 13075 }, { "epoch": 0.14, "learning_rate": 4.758614580565152e-05, "loss": 0.9214, "step": 13080 }, { "epoch": 0.14, "learning_rate": 4.758522307851301e-05, "loss": 0.8168, "step": 13085 }, { "epoch": 0.14, "learning_rate": 4.758430035137449e-05, "loss": 0.8495, "step": 13090 }, { "epoch": 0.14, "learning_rate": 4.758337762423599e-05, "loss": 0.7977, "step": 13095 }, { "epoch": 0.15, "learning_rate": 4.7582454897097475e-05, "loss": 0.8542, "step": 13100 }, { "epoch": 0.15, "learning_rate": 4.7581532169958956e-05, "loss": 0.8883, "step": 13105 }, { "epoch": 0.15, "learning_rate": 4.7580609442820444e-05, "loss": 0.8514, "step": 13110 }, { "epoch": 0.15, "learning_rate": 4.757968671568194e-05, "loss": 0.8482, "step": 13115 }, { "epoch": 0.15, "learning_rate": 4.7578763988543426e-05, "loss": 0.8698, "step": 13120 }, { "epoch": 0.15, "learning_rate": 4.757784126140491e-05, "loss": 0.8486, "step": 13125 }, { "epoch": 0.15, "learning_rate": 4.7576918534266395e-05, "loss": 0.8335, "step": 13130 }, { "epoch": 0.15, "learning_rate": 4.757599580712788e-05, "loss": 0.7999, "step": 13135 }, { "epoch": 0.15, "learning_rate": 4.757507307998937e-05, "loss": 0.797, "step": 13140 }, { "epoch": 0.15, "learning_rate": 4.757415035285086e-05, "loss": 0.8263, "step": 13145 }, { "epoch": 0.15, "learning_rate": 4.7573227625712346e-05, "loss": 0.8115, "step": 13150 }, { "epoch": 0.15, "learning_rate": 4.7572304898573834e-05, "loss": 0.8295, "step": 13155 }, { "epoch": 0.15, "learning_rate": 4.757138217143532e-05, "loss": 0.864, "step": 13160 }, { "epoch": 0.15, "learning_rate": 4.757045944429681e-05, "loss": 0.8504, "step": 13165 }, { "epoch": 0.15, "learning_rate": 4.75695367171583e-05, "loss": 0.7849, "step": 13170 }, { "epoch": 0.15, "learning_rate": 4.7568613990019786e-05, "loss": 0.8325, "step": 13175 }, { "epoch": 0.15, "learning_rate": 4.7567691262881274e-05, "loss": 0.8339, "step": 13180 }, { "epoch": 0.15, "learning_rate": 4.756676853574276e-05, "loss": 0.8322, "step": 13185 }, { "epoch": 0.15, "learning_rate": 4.756584580860425e-05, "loss": 0.8054, "step": 13190 }, { "epoch": 0.15, "learning_rate": 4.756492308146574e-05, "loss": 0.8361, "step": 13195 }, { "epoch": 0.15, "learning_rate": 4.7564000354327225e-05, "loss": 0.8705, "step": 13200 }, { "epoch": 0.15, "learning_rate": 4.756307762718871e-05, "loss": 0.9418, "step": 13205 }, { "epoch": 0.15, "learning_rate": 4.75621549000502e-05, "loss": 0.8734, "step": 13210 }, { "epoch": 0.15, "learning_rate": 4.756123217291168e-05, "loss": 0.8543, "step": 13215 }, { "epoch": 0.15, "learning_rate": 4.756030944577317e-05, "loss": 0.7989, "step": 13220 }, { "epoch": 0.15, "learning_rate": 4.7559386718634664e-05, "loss": 0.8084, "step": 13225 }, { "epoch": 0.15, "learning_rate": 4.755846399149615e-05, "loss": 0.8271, "step": 13230 }, { "epoch": 0.15, "learning_rate": 4.755754126435763e-05, "loss": 0.8475, "step": 13235 }, { "epoch": 0.15, "learning_rate": 4.755661853721912e-05, "loss": 0.8445, "step": 13240 }, { "epoch": 0.15, "learning_rate": 4.7555695810080616e-05, "loss": 0.814, "step": 13245 }, { "epoch": 0.15, "learning_rate": 4.7554773082942103e-05, "loss": 0.7791, "step": 13250 }, { "epoch": 0.15, "learning_rate": 4.7553850355803585e-05, "loss": 0.8449, "step": 13255 }, { "epoch": 0.15, "learning_rate": 4.755292762866507e-05, "loss": 0.8281, "step": 13260 }, { "epoch": 0.15, "learning_rate": 4.755200490152657e-05, "loss": 0.874, "step": 13265 }, { "epoch": 0.15, "learning_rate": 4.755108217438805e-05, "loss": 0.7854, "step": 13270 }, { "epoch": 0.15, "learning_rate": 4.7550159447249536e-05, "loss": 0.8304, "step": 13275 }, { "epoch": 0.15, "learning_rate": 4.7549236720111024e-05, "loss": 0.8579, "step": 13280 }, { "epoch": 0.15, "learning_rate": 4.754831399297251e-05, "loss": 0.8263, "step": 13285 }, { "epoch": 0.15, "learning_rate": 4.7547391265834e-05, "loss": 0.8717, "step": 13290 }, { "epoch": 0.15, "learning_rate": 4.754646853869549e-05, "loss": 0.8224, "step": 13295 }, { "epoch": 0.15, "learning_rate": 4.7545545811556975e-05, "loss": 0.7969, "step": 13300 }, { "epoch": 0.15, "learning_rate": 4.754462308441846e-05, "loss": 0.8644, "step": 13305 }, { "epoch": 0.15, "learning_rate": 4.754370035727995e-05, "loss": 0.8404, "step": 13310 }, { "epoch": 0.15, "learning_rate": 4.754277763014144e-05, "loss": 0.8061, "step": 13315 }, { "epoch": 0.15, "learning_rate": 4.7541854903002927e-05, "loss": 0.8334, "step": 13320 }, { "epoch": 0.15, "learning_rate": 4.7540932175864414e-05, "loss": 0.8855, "step": 13325 }, { "epoch": 0.15, "learning_rate": 4.75400094487259e-05, "loss": 0.7323, "step": 13330 }, { "epoch": 0.15, "learning_rate": 4.753908672158739e-05, "loss": 0.8627, "step": 13335 }, { "epoch": 0.15, "learning_rate": 4.753816399444888e-05, "loss": 0.8431, "step": 13340 }, { "epoch": 0.15, "learning_rate": 4.753724126731036e-05, "loss": 0.8931, "step": 13345 }, { "epoch": 0.15, "learning_rate": 4.7536318540171854e-05, "loss": 0.9047, "step": 13350 }, { "epoch": 0.15, "learning_rate": 4.753539581303334e-05, "loss": 0.8285, "step": 13355 }, { "epoch": 0.15, "learning_rate": 4.753447308589483e-05, "loss": 0.838, "step": 13360 }, { "epoch": 0.15, "learning_rate": 4.753355035875631e-05, "loss": 0.841, "step": 13365 }, { "epoch": 0.15, "learning_rate": 4.75326276316178e-05, "loss": 0.8128, "step": 13370 }, { "epoch": 0.15, "learning_rate": 4.753170490447929e-05, "loss": 0.8307, "step": 13375 }, { "epoch": 0.15, "learning_rate": 4.7530782177340774e-05, "loss": 0.7972, "step": 13380 }, { "epoch": 0.15, "learning_rate": 4.752985945020226e-05, "loss": 0.9193, "step": 13385 }, { "epoch": 0.15, "learning_rate": 4.752893672306375e-05, "loss": 0.8021, "step": 13390 }, { "epoch": 0.15, "learning_rate": 4.7528013995925244e-05, "loss": 0.8194, "step": 13395 }, { "epoch": 0.15, "learning_rate": 4.7527091268786725e-05, "loss": 0.8131, "step": 13400 }, { "epoch": 0.15, "learning_rate": 4.752616854164821e-05, "loss": 0.8703, "step": 13405 }, { "epoch": 0.15, "learning_rate": 4.75252458145097e-05, "loss": 0.7853, "step": 13410 }, { "epoch": 0.15, "learning_rate": 4.752432308737119e-05, "loss": 0.8873, "step": 13415 }, { "epoch": 0.15, "learning_rate": 4.752340036023268e-05, "loss": 0.8037, "step": 13420 }, { "epoch": 0.15, "learning_rate": 4.7522477633094165e-05, "loss": 0.8548, "step": 13425 }, { "epoch": 0.15, "learning_rate": 4.752155490595565e-05, "loss": 0.7686, "step": 13430 }, { "epoch": 0.15, "learning_rate": 4.752063217881714e-05, "loss": 0.7872, "step": 13435 }, { "epoch": 0.15, "learning_rate": 4.751970945167863e-05, "loss": 0.8368, "step": 13440 }, { "epoch": 0.15, "learning_rate": 4.7518786724540116e-05, "loss": 0.8446, "step": 13445 }, { "epoch": 0.15, "learning_rate": 4.7517863997401604e-05, "loss": 0.8222, "step": 13450 }, { "epoch": 0.15, "learning_rate": 4.7516941270263085e-05, "loss": 0.7993, "step": 13455 }, { "epoch": 0.15, "learning_rate": 4.751601854312458e-05, "loss": 0.8619, "step": 13460 }, { "epoch": 0.15, "learning_rate": 4.751509581598607e-05, "loss": 0.904, "step": 13465 }, { "epoch": 0.15, "learning_rate": 4.7514173088847555e-05, "loss": 0.8198, "step": 13470 }, { "epoch": 0.15, "learning_rate": 4.7513250361709036e-05, "loss": 0.7504, "step": 13475 }, { "epoch": 0.15, "learning_rate": 4.751232763457053e-05, "loss": 0.8904, "step": 13480 }, { "epoch": 0.15, "learning_rate": 4.751140490743202e-05, "loss": 0.8324, "step": 13485 }, { "epoch": 0.15, "learning_rate": 4.75104821802935e-05, "loss": 0.8457, "step": 13490 }, { "epoch": 0.15, "learning_rate": 4.750955945315499e-05, "loss": 0.8312, "step": 13495 }, { "epoch": 0.15, "learning_rate": 4.750863672601648e-05, "loss": 0.8066, "step": 13500 }, { "epoch": 0.15, "learning_rate": 4.750771399887797e-05, "loss": 0.8857, "step": 13505 }, { "epoch": 0.15, "learning_rate": 4.750679127173945e-05, "loss": 0.8553, "step": 13510 }, { "epoch": 0.15, "learning_rate": 4.750586854460094e-05, "loss": 0.7777, "step": 13515 }, { "epoch": 0.15, "learning_rate": 4.750494581746243e-05, "loss": 0.8806, "step": 13520 }, { "epoch": 0.15, "learning_rate": 4.7504023090323915e-05, "loss": 0.7598, "step": 13525 }, { "epoch": 0.15, "learning_rate": 4.75031003631854e-05, "loss": 0.8374, "step": 13530 }, { "epoch": 0.15, "learning_rate": 4.750217763604689e-05, "loss": 0.9035, "step": 13535 }, { "epoch": 0.15, "learning_rate": 4.750125490890838e-05, "loss": 0.8719, "step": 13540 }, { "epoch": 0.15, "learning_rate": 4.7500332181769866e-05, "loss": 0.8614, "step": 13545 }, { "epoch": 0.15, "learning_rate": 4.7499409454631354e-05, "loss": 0.8373, "step": 13550 }, { "epoch": 0.15, "learning_rate": 4.749848672749284e-05, "loss": 0.7916, "step": 13555 }, { "epoch": 0.15, "learning_rate": 4.749756400035433e-05, "loss": 0.8109, "step": 13560 }, { "epoch": 0.15, "learning_rate": 4.749664127321582e-05, "loss": 0.7989, "step": 13565 }, { "epoch": 0.15, "learning_rate": 4.7495718546077305e-05, "loss": 0.7805, "step": 13570 }, { "epoch": 0.15, "learning_rate": 4.749479581893879e-05, "loss": 0.827, "step": 13575 }, { "epoch": 0.15, "learning_rate": 4.749387309180028e-05, "loss": 0.8649, "step": 13580 }, { "epoch": 0.15, "learning_rate": 4.749295036466177e-05, "loss": 0.8185, "step": 13585 }, { "epoch": 0.15, "learning_rate": 4.749202763752326e-05, "loss": 0.8271, "step": 13590 }, { "epoch": 0.15, "learning_rate": 4.7491104910384745e-05, "loss": 0.7876, "step": 13595 }, { "epoch": 0.15, "learning_rate": 4.7490182183246226e-05, "loss": 0.8325, "step": 13600 }, { "epoch": 0.15, "learning_rate": 4.7489259456107714e-05, "loss": 0.8326, "step": 13605 }, { "epoch": 0.15, "learning_rate": 4.748833672896921e-05, "loss": 0.8525, "step": 13610 }, { "epoch": 0.15, "learning_rate": 4.7487414001830696e-05, "loss": 0.7772, "step": 13615 }, { "epoch": 0.15, "learning_rate": 4.748649127469218e-05, "loss": 0.8179, "step": 13620 }, { "epoch": 0.15, "learning_rate": 4.7485568547553665e-05, "loss": 0.8179, "step": 13625 }, { "epoch": 0.15, "learning_rate": 4.748464582041516e-05, "loss": 0.8504, "step": 13630 }, { "epoch": 0.15, "learning_rate": 4.748372309327665e-05, "loss": 0.8139, "step": 13635 }, { "epoch": 0.15, "learning_rate": 4.748280036613813e-05, "loss": 0.8688, "step": 13640 }, { "epoch": 0.15, "learning_rate": 4.7481877638999616e-05, "loss": 0.8459, "step": 13645 }, { "epoch": 0.15, "learning_rate": 4.748095491186111e-05, "loss": 0.8426, "step": 13650 }, { "epoch": 0.15, "learning_rate": 4.748003218472259e-05, "loss": 0.8379, "step": 13655 }, { "epoch": 0.15, "learning_rate": 4.747910945758408e-05, "loss": 0.8928, "step": 13660 }, { "epoch": 0.15, "learning_rate": 4.747818673044557e-05, "loss": 0.9038, "step": 13665 }, { "epoch": 0.15, "learning_rate": 4.7477264003307056e-05, "loss": 0.8941, "step": 13670 }, { "epoch": 0.15, "learning_rate": 4.7476341276168543e-05, "loss": 0.8353, "step": 13675 }, { "epoch": 0.15, "learning_rate": 4.747541854903003e-05, "loss": 0.8121, "step": 13680 }, { "epoch": 0.15, "learning_rate": 4.747449582189152e-05, "loss": 0.8159, "step": 13685 }, { "epoch": 0.15, "learning_rate": 4.747357309475301e-05, "loss": 0.8503, "step": 13690 }, { "epoch": 0.15, "learning_rate": 4.7472650367614495e-05, "loss": 0.788, "step": 13695 }, { "epoch": 0.15, "learning_rate": 4.747172764047598e-05, "loss": 0.8461, "step": 13700 }, { "epoch": 0.15, "learning_rate": 4.747080491333747e-05, "loss": 0.879, "step": 13705 }, { "epoch": 0.15, "learning_rate": 4.746988218619896e-05, "loss": 0.8609, "step": 13710 }, { "epoch": 0.15, "learning_rate": 4.7468959459060446e-05, "loss": 0.804, "step": 13715 }, { "epoch": 0.15, "learning_rate": 4.7468036731921934e-05, "loss": 0.7879, "step": 13720 }, { "epoch": 0.15, "learning_rate": 4.746711400478342e-05, "loss": 0.8537, "step": 13725 }, { "epoch": 0.15, "learning_rate": 4.74661912776449e-05, "loss": 0.806, "step": 13730 }, { "epoch": 0.15, "learning_rate": 4.74652685505064e-05, "loss": 0.8787, "step": 13735 }, { "epoch": 0.15, "learning_rate": 4.7464345823367885e-05, "loss": 0.8564, "step": 13740 }, { "epoch": 0.15, "learning_rate": 4.746342309622937e-05, "loss": 0.7761, "step": 13745 }, { "epoch": 0.15, "learning_rate": 4.7462500369090854e-05, "loss": 0.8745, "step": 13750 }, { "epoch": 0.15, "learning_rate": 4.746157764195234e-05, "loss": 0.8461, "step": 13755 }, { "epoch": 0.15, "learning_rate": 4.746065491481384e-05, "loss": 0.8327, "step": 13760 }, { "epoch": 0.15, "learning_rate": 4.745973218767532e-05, "loss": 0.7849, "step": 13765 }, { "epoch": 0.15, "learning_rate": 4.7458809460536806e-05, "loss": 0.835, "step": 13770 }, { "epoch": 0.15, "learning_rate": 4.7457886733398294e-05, "loss": 0.8404, "step": 13775 }, { "epoch": 0.15, "learning_rate": 4.745696400625979e-05, "loss": 0.7996, "step": 13780 }, { "epoch": 0.15, "learning_rate": 4.745604127912127e-05, "loss": 0.8754, "step": 13785 }, { "epoch": 0.15, "learning_rate": 4.745511855198276e-05, "loss": 0.8119, "step": 13790 }, { "epoch": 0.15, "learning_rate": 4.7454195824844245e-05, "loss": 0.8209, "step": 13795 }, { "epoch": 0.15, "learning_rate": 4.745327309770573e-05, "loss": 0.8705, "step": 13800 }, { "epoch": 0.15, "learning_rate": 4.745235037056722e-05, "loss": 0.87, "step": 13805 }, { "epoch": 0.15, "learning_rate": 4.745142764342871e-05, "loss": 0.8292, "step": 13810 }, { "epoch": 0.15, "learning_rate": 4.7450504916290196e-05, "loss": 0.8465, "step": 13815 }, { "epoch": 0.15, "learning_rate": 4.7449582189151684e-05, "loss": 0.8422, "step": 13820 }, { "epoch": 0.15, "learning_rate": 4.744865946201317e-05, "loss": 0.8033, "step": 13825 }, { "epoch": 0.15, "learning_rate": 4.744773673487466e-05, "loss": 0.8631, "step": 13830 }, { "epoch": 0.15, "learning_rate": 4.744681400773615e-05, "loss": 0.8476, "step": 13835 }, { "epoch": 0.15, "learning_rate": 4.744589128059763e-05, "loss": 0.8284, "step": 13840 }, { "epoch": 0.15, "learning_rate": 4.7444968553459124e-05, "loss": 0.778, "step": 13845 }, { "epoch": 0.15, "learning_rate": 4.744404582632061e-05, "loss": 0.8569, "step": 13850 }, { "epoch": 0.15, "learning_rate": 4.74431230991821e-05, "loss": 0.8405, "step": 13855 }, { "epoch": 0.15, "learning_rate": 4.744220037204358e-05, "loss": 0.8766, "step": 13860 }, { "epoch": 0.15, "learning_rate": 4.7441277644905075e-05, "loss": 0.8086, "step": 13865 }, { "epoch": 0.15, "learning_rate": 4.744035491776656e-05, "loss": 0.9039, "step": 13870 }, { "epoch": 0.15, "learning_rate": 4.7439432190628044e-05, "loss": 0.7771, "step": 13875 }, { "epoch": 0.15, "learning_rate": 4.743850946348953e-05, "loss": 0.8026, "step": 13880 }, { "epoch": 0.15, "learning_rate": 4.7437586736351026e-05, "loss": 0.842, "step": 13885 }, { "epoch": 0.15, "learning_rate": 4.7436664009212514e-05, "loss": 0.8351, "step": 13890 }, { "epoch": 0.15, "learning_rate": 4.7435741282073995e-05, "loss": 0.8212, "step": 13895 }, { "epoch": 0.15, "learning_rate": 4.743481855493548e-05, "loss": 0.8501, "step": 13900 }, { "epoch": 0.15, "learning_rate": 4.743389582779697e-05, "loss": 0.8269, "step": 13905 }, { "epoch": 0.15, "learning_rate": 4.743297310065846e-05, "loss": 0.8661, "step": 13910 }, { "epoch": 0.15, "learning_rate": 4.743205037351995e-05, "loss": 0.9271, "step": 13915 }, { "epoch": 0.15, "learning_rate": 4.7431127646381434e-05, "loss": 0.8397, "step": 13920 }, { "epoch": 0.15, "learning_rate": 4.743020491924292e-05, "loss": 0.7838, "step": 13925 }, { "epoch": 0.15, "learning_rate": 4.742928219210441e-05, "loss": 0.8451, "step": 13930 }, { "epoch": 0.15, "learning_rate": 4.74283594649659e-05, "loss": 0.7705, "step": 13935 }, { "epoch": 0.15, "learning_rate": 4.7427436737827386e-05, "loss": 0.8949, "step": 13940 }, { "epoch": 0.15, "learning_rate": 4.7426514010688874e-05, "loss": 0.8256, "step": 13945 }, { "epoch": 0.15, "learning_rate": 4.742559128355036e-05, "loss": 0.8379, "step": 13950 }, { "epoch": 0.15, "learning_rate": 4.742466855641185e-05, "loss": 0.8081, "step": 13955 }, { "epoch": 0.15, "learning_rate": 4.742374582927334e-05, "loss": 0.8, "step": 13960 }, { "epoch": 0.15, "learning_rate": 4.7422823102134825e-05, "loss": 0.8175, "step": 13965 }, { "epoch": 0.15, "learning_rate": 4.7421900374996306e-05, "loss": 0.8513, "step": 13970 }, { "epoch": 0.15, "learning_rate": 4.74209776478578e-05, "loss": 0.8226, "step": 13975 }, { "epoch": 0.15, "learning_rate": 4.742005492071929e-05, "loss": 0.8381, "step": 13980 }, { "epoch": 0.15, "learning_rate": 4.741913219358077e-05, "loss": 0.8389, "step": 13985 }, { "epoch": 0.15, "learning_rate": 4.741820946644226e-05, "loss": 0.9518, "step": 13990 }, { "epoch": 0.15, "learning_rate": 4.741728673930375e-05, "loss": 0.8526, "step": 13995 }, { "epoch": 0.16, "learning_rate": 4.741636401216524e-05, "loss": 0.8934, "step": 14000 }, { "epoch": 0.16, "eval_loss": 0.7998090982437134, "eval_runtime": 69.9007, "eval_samples_per_second": 28.612, "eval_steps_per_second": 14.306, "step": 14000 }, { "epoch": 0.16, "learning_rate": 4.741544128502672e-05, "loss": 0.8435, "step": 14005 }, { "epoch": 0.16, "learning_rate": 4.741451855788821e-05, "loss": 0.8792, "step": 14010 }, { "epoch": 0.16, "learning_rate": 4.7413595830749704e-05, "loss": 0.8946, "step": 14015 }, { "epoch": 0.16, "learning_rate": 4.741267310361119e-05, "loss": 0.8346, "step": 14020 }, { "epoch": 0.16, "learning_rate": 4.741175037647267e-05, "loss": 0.7998, "step": 14025 }, { "epoch": 0.16, "learning_rate": 4.741082764933416e-05, "loss": 0.8089, "step": 14030 }, { "epoch": 0.16, "learning_rate": 4.7409904922195655e-05, "loss": 0.8097, "step": 14035 }, { "epoch": 0.16, "learning_rate": 4.7408982195057136e-05, "loss": 0.7933, "step": 14040 }, { "epoch": 0.16, "learning_rate": 4.7408059467918624e-05, "loss": 0.8576, "step": 14045 }, { "epoch": 0.16, "learning_rate": 4.740713674078011e-05, "loss": 0.9217, "step": 14050 }, { "epoch": 0.16, "learning_rate": 4.74062140136416e-05, "loss": 0.8378, "step": 14055 }, { "epoch": 0.16, "learning_rate": 4.740529128650309e-05, "loss": 0.7513, "step": 14060 }, { "epoch": 0.16, "learning_rate": 4.7404368559364575e-05, "loss": 0.7837, "step": 14065 }, { "epoch": 0.16, "learning_rate": 4.740344583222606e-05, "loss": 0.8071, "step": 14070 }, { "epoch": 0.16, "learning_rate": 4.740252310508755e-05, "loss": 0.8239, "step": 14075 }, { "epoch": 0.16, "learning_rate": 4.740160037794904e-05, "loss": 0.8328, "step": 14080 }, { "epoch": 0.16, "learning_rate": 4.740067765081053e-05, "loss": 0.8829, "step": 14085 }, { "epoch": 0.16, "learning_rate": 4.7399754923672015e-05, "loss": 0.8229, "step": 14090 }, { "epoch": 0.16, "learning_rate": 4.73988321965335e-05, "loss": 0.8851, "step": 14095 }, { "epoch": 0.16, "learning_rate": 4.739790946939499e-05, "loss": 0.7843, "step": 14100 }, { "epoch": 0.16, "learning_rate": 4.739698674225648e-05, "loss": 0.7946, "step": 14105 }, { "epoch": 0.16, "learning_rate": 4.7396064015117966e-05, "loss": 0.8941, "step": 14110 }, { "epoch": 0.16, "learning_rate": 4.739514128797945e-05, "loss": 0.8463, "step": 14115 }, { "epoch": 0.16, "learning_rate": 4.7394218560840935e-05, "loss": 0.8859, "step": 14120 }, { "epoch": 0.16, "learning_rate": 4.739329583370243e-05, "loss": 0.8661, "step": 14125 }, { "epoch": 0.16, "learning_rate": 4.739237310656392e-05, "loss": 0.8438, "step": 14130 }, { "epoch": 0.16, "learning_rate": 4.73914503794254e-05, "loss": 0.8505, "step": 14135 }, { "epoch": 0.16, "learning_rate": 4.7390527652286886e-05, "loss": 0.8367, "step": 14140 }, { "epoch": 0.16, "learning_rate": 4.738960492514838e-05, "loss": 0.8512, "step": 14145 }, { "epoch": 0.16, "learning_rate": 4.738868219800986e-05, "loss": 0.8162, "step": 14150 }, { "epoch": 0.16, "learning_rate": 4.738775947087135e-05, "loss": 0.8712, "step": 14155 }, { "epoch": 0.16, "learning_rate": 4.738683674373284e-05, "loss": 0.8199, "step": 14160 }, { "epoch": 0.16, "learning_rate": 4.738591401659433e-05, "loss": 0.8169, "step": 14165 }, { "epoch": 0.16, "learning_rate": 4.738499128945581e-05, "loss": 0.8511, "step": 14170 }, { "epoch": 0.16, "learning_rate": 4.73840685623173e-05, "loss": 0.816, "step": 14175 }, { "epoch": 0.16, "learning_rate": 4.738314583517879e-05, "loss": 0.9109, "step": 14180 }, { "epoch": 0.16, "learning_rate": 4.738222310804028e-05, "loss": 0.7942, "step": 14185 }, { "epoch": 0.16, "learning_rate": 4.7381300380901765e-05, "loss": 0.7994, "step": 14190 }, { "epoch": 0.16, "learning_rate": 4.738037765376325e-05, "loss": 0.8377, "step": 14195 }, { "epoch": 0.16, "learning_rate": 4.737945492662474e-05, "loss": 0.8964, "step": 14200 }, { "epoch": 0.16, "learning_rate": 4.737853219948623e-05, "loss": 0.8299, "step": 14205 }, { "epoch": 0.16, "learning_rate": 4.7377609472347716e-05, "loss": 0.8361, "step": 14210 }, { "epoch": 0.16, "learning_rate": 4.7376686745209204e-05, "loss": 0.8004, "step": 14215 }, { "epoch": 0.16, "learning_rate": 4.737576401807069e-05, "loss": 0.8543, "step": 14220 }, { "epoch": 0.16, "learning_rate": 4.737484129093217e-05, "loss": 0.7158, "step": 14225 }, { "epoch": 0.16, "learning_rate": 4.737391856379367e-05, "loss": 0.7852, "step": 14230 }, { "epoch": 0.16, "learning_rate": 4.7372995836655155e-05, "loss": 0.7778, "step": 14235 }, { "epoch": 0.16, "learning_rate": 4.737207310951664e-05, "loss": 0.8741, "step": 14240 }, { "epoch": 0.16, "learning_rate": 4.7371150382378124e-05, "loss": 0.7868, "step": 14245 }, { "epoch": 0.16, "learning_rate": 4.737022765523962e-05, "loss": 0.8552, "step": 14250 }, { "epoch": 0.16, "learning_rate": 4.736930492810111e-05, "loss": 0.812, "step": 14255 }, { "epoch": 0.16, "learning_rate": 4.736838220096259e-05, "loss": 0.911, "step": 14260 }, { "epoch": 0.16, "learning_rate": 4.7367459473824076e-05, "loss": 0.8135, "step": 14265 }, { "epoch": 0.16, "learning_rate": 4.7366536746685564e-05, "loss": 0.7876, "step": 14270 }, { "epoch": 0.16, "learning_rate": 4.736561401954706e-05, "loss": 0.8525, "step": 14275 }, { "epoch": 0.16, "learning_rate": 4.736469129240854e-05, "loss": 0.863, "step": 14280 }, { "epoch": 0.16, "learning_rate": 4.736376856527003e-05, "loss": 0.8469, "step": 14285 }, { "epoch": 0.16, "learning_rate": 4.7362845838131515e-05, "loss": 0.8314, "step": 14290 }, { "epoch": 0.16, "learning_rate": 4.7361923110993e-05, "loss": 0.792, "step": 14295 }, { "epoch": 0.16, "learning_rate": 4.736100038385449e-05, "loss": 0.8508, "step": 14300 }, { "epoch": 0.16, "learning_rate": 4.736007765671598e-05, "loss": 0.8708, "step": 14305 }, { "epoch": 0.16, "learning_rate": 4.7359154929577466e-05, "loss": 0.8042, "step": 14310 }, { "epoch": 0.16, "learning_rate": 4.7358232202438954e-05, "loss": 0.826, "step": 14315 }, { "epoch": 0.16, "learning_rate": 4.735730947530044e-05, "loss": 0.7842, "step": 14320 }, { "epoch": 0.16, "learning_rate": 4.735638674816193e-05, "loss": 0.7743, "step": 14325 }, { "epoch": 0.16, "learning_rate": 4.735546402102342e-05, "loss": 0.8266, "step": 14330 }, { "epoch": 0.16, "learning_rate": 4.7354541293884906e-05, "loss": 0.8634, "step": 14335 }, { "epoch": 0.16, "learning_rate": 4.7353618566746393e-05, "loss": 0.819, "step": 14340 }, { "epoch": 0.16, "learning_rate": 4.735269583960788e-05, "loss": 0.8305, "step": 14345 }, { "epoch": 0.16, "learning_rate": 4.735177311246937e-05, "loss": 0.8445, "step": 14350 }, { "epoch": 0.16, "learning_rate": 4.735085038533085e-05, "loss": 0.7814, "step": 14355 }, { "epoch": 0.16, "learning_rate": 4.7349927658192345e-05, "loss": 0.7532, "step": 14360 }, { "epoch": 0.16, "learning_rate": 4.734900493105383e-05, "loss": 0.8671, "step": 14365 }, { "epoch": 0.16, "learning_rate": 4.7348082203915314e-05, "loss": 0.7768, "step": 14370 }, { "epoch": 0.16, "learning_rate": 4.73471594767768e-05, "loss": 0.807, "step": 14375 }, { "epoch": 0.16, "learning_rate": 4.7346236749638296e-05, "loss": 0.8074, "step": 14380 }, { "epoch": 0.16, "learning_rate": 4.7345314022499784e-05, "loss": 0.8171, "step": 14385 }, { "epoch": 0.16, "learning_rate": 4.7344391295361265e-05, "loss": 0.8173, "step": 14390 }, { "epoch": 0.16, "learning_rate": 4.734346856822275e-05, "loss": 0.773, "step": 14395 }, { "epoch": 0.16, "learning_rate": 4.734254584108425e-05, "loss": 0.7873, "step": 14400 }, { "epoch": 0.16, "learning_rate": 4.7341623113945735e-05, "loss": 0.8883, "step": 14405 }, { "epoch": 0.16, "learning_rate": 4.7340700386807217e-05, "loss": 0.8677, "step": 14410 }, { "epoch": 0.16, "learning_rate": 4.7339777659668704e-05, "loss": 0.8592, "step": 14415 }, { "epoch": 0.16, "learning_rate": 4.73388549325302e-05, "loss": 0.8137, "step": 14420 }, { "epoch": 0.16, "learning_rate": 4.733793220539168e-05, "loss": 0.8211, "step": 14425 }, { "epoch": 0.16, "learning_rate": 4.733700947825317e-05, "loss": 0.7943, "step": 14430 }, { "epoch": 0.16, "learning_rate": 4.7336086751114656e-05, "loss": 0.7926, "step": 14435 }, { "epoch": 0.16, "learning_rate": 4.7335164023976144e-05, "loss": 0.8368, "step": 14440 }, { "epoch": 0.16, "learning_rate": 4.733424129683763e-05, "loss": 0.8609, "step": 14445 }, { "epoch": 0.16, "learning_rate": 4.733331856969912e-05, "loss": 0.7992, "step": 14450 }, { "epoch": 0.16, "learning_rate": 4.733239584256061e-05, "loss": 0.838, "step": 14455 }, { "epoch": 0.16, "learning_rate": 4.7331473115422095e-05, "loss": 0.8789, "step": 14460 }, { "epoch": 0.16, "learning_rate": 4.733055038828358e-05, "loss": 0.8229, "step": 14465 }, { "epoch": 0.16, "learning_rate": 4.732962766114507e-05, "loss": 0.9016, "step": 14470 }, { "epoch": 0.16, "learning_rate": 4.732870493400656e-05, "loss": 0.883, "step": 14475 }, { "epoch": 0.16, "learning_rate": 4.7327782206868046e-05, "loss": 0.7941, "step": 14480 }, { "epoch": 0.16, "learning_rate": 4.7326859479729534e-05, "loss": 0.8666, "step": 14485 }, { "epoch": 0.16, "learning_rate": 4.732593675259102e-05, "loss": 0.8268, "step": 14490 }, { "epoch": 0.16, "learning_rate": 4.732501402545251e-05, "loss": 0.8765, "step": 14495 }, { "epoch": 0.16, "learning_rate": 4.732409129831399e-05, "loss": 0.7724, "step": 14500 }, { "epoch": 0.16, "learning_rate": 4.732316857117548e-05, "loss": 0.89, "step": 14505 }, { "epoch": 0.16, "learning_rate": 4.7322245844036974e-05, "loss": 0.8385, "step": 14510 }, { "epoch": 0.16, "learning_rate": 4.732132311689846e-05, "loss": 0.8624, "step": 14515 }, { "epoch": 0.16, "learning_rate": 4.732040038975994e-05, "loss": 0.8328, "step": 14520 }, { "epoch": 0.16, "learning_rate": 4.731947766262143e-05, "loss": 0.8373, "step": 14525 }, { "epoch": 0.16, "learning_rate": 4.7318554935482925e-05, "loss": 0.8495, "step": 14530 }, { "epoch": 0.16, "learning_rate": 4.7317632208344406e-05, "loss": 0.7939, "step": 14535 }, { "epoch": 0.16, "learning_rate": 4.7316709481205894e-05, "loss": 0.9252, "step": 14540 }, { "epoch": 0.16, "learning_rate": 4.731578675406738e-05, "loss": 0.8132, "step": 14545 }, { "epoch": 0.16, "learning_rate": 4.7314864026928876e-05, "loss": 0.8165, "step": 14550 }, { "epoch": 0.16, "learning_rate": 4.731394129979036e-05, "loss": 0.7857, "step": 14555 }, { "epoch": 0.16, "learning_rate": 4.7313018572651845e-05, "loss": 0.7796, "step": 14560 }, { "epoch": 0.16, "learning_rate": 4.731209584551333e-05, "loss": 0.8163, "step": 14565 }, { "epoch": 0.16, "learning_rate": 4.731117311837482e-05, "loss": 0.8602, "step": 14570 }, { "epoch": 0.16, "learning_rate": 4.731025039123631e-05, "loss": 0.8861, "step": 14575 }, { "epoch": 0.16, "learning_rate": 4.73093276640978e-05, "loss": 0.8225, "step": 14580 }, { "epoch": 0.16, "learning_rate": 4.7308404936959284e-05, "loss": 0.7767, "step": 14585 }, { "epoch": 0.16, "learning_rate": 4.730748220982077e-05, "loss": 0.8162, "step": 14590 }, { "epoch": 0.16, "learning_rate": 4.730655948268226e-05, "loss": 0.9053, "step": 14595 }, { "epoch": 0.16, "learning_rate": 4.730563675554375e-05, "loss": 0.8439, "step": 14600 }, { "epoch": 0.16, "learning_rate": 4.7304714028405236e-05, "loss": 0.7998, "step": 14605 }, { "epoch": 0.16, "learning_rate": 4.730379130126672e-05, "loss": 0.8197, "step": 14610 }, { "epoch": 0.16, "learning_rate": 4.730286857412821e-05, "loss": 0.8693, "step": 14615 }, { "epoch": 0.16, "learning_rate": 4.73019458469897e-05, "loss": 0.8434, "step": 14620 }, { "epoch": 0.16, "learning_rate": 4.730102311985119e-05, "loss": 0.8651, "step": 14625 }, { "epoch": 0.16, "learning_rate": 4.730010039271267e-05, "loss": 0.8786, "step": 14630 }, { "epoch": 0.16, "learning_rate": 4.729917766557416e-05, "loss": 0.8993, "step": 14635 }, { "epoch": 0.16, "learning_rate": 4.729825493843565e-05, "loss": 0.8338, "step": 14640 }, { "epoch": 0.16, "learning_rate": 4.729733221129713e-05, "loss": 0.8643, "step": 14645 }, { "epoch": 0.16, "learning_rate": 4.729640948415862e-05, "loss": 0.7321, "step": 14650 }, { "epoch": 0.16, "learning_rate": 4.729548675702011e-05, "loss": 0.7922, "step": 14655 }, { "epoch": 0.16, "learning_rate": 4.72945640298816e-05, "loss": 0.867, "step": 14660 }, { "epoch": 0.16, "learning_rate": 4.729364130274308e-05, "loss": 0.7746, "step": 14665 }, { "epoch": 0.16, "learning_rate": 4.729271857560457e-05, "loss": 0.7915, "step": 14670 }, { "epoch": 0.16, "learning_rate": 4.729179584846606e-05, "loss": 0.7913, "step": 14675 }, { "epoch": 0.16, "learning_rate": 4.729087312132755e-05, "loss": 0.822, "step": 14680 }, { "epoch": 0.16, "learning_rate": 4.7289950394189035e-05, "loss": 0.8522, "step": 14685 }, { "epoch": 0.16, "learning_rate": 4.728902766705052e-05, "loss": 0.8017, "step": 14690 }, { "epoch": 0.16, "learning_rate": 4.728810493991201e-05, "loss": 0.8267, "step": 14695 }, { "epoch": 0.16, "learning_rate": 4.72871822127735e-05, "loss": 0.8462, "step": 14700 }, { "epoch": 0.16, "learning_rate": 4.7286259485634986e-05, "loss": 0.7824, "step": 14705 }, { "epoch": 0.16, "learning_rate": 4.7285336758496474e-05, "loss": 0.8141, "step": 14710 }, { "epoch": 0.16, "learning_rate": 4.728441403135796e-05, "loss": 0.8842, "step": 14715 }, { "epoch": 0.16, "learning_rate": 4.728349130421945e-05, "loss": 0.8019, "step": 14720 }, { "epoch": 0.16, "learning_rate": 4.728256857708094e-05, "loss": 0.7585, "step": 14725 }, { "epoch": 0.16, "learning_rate": 4.7281645849942425e-05, "loss": 0.863, "step": 14730 }, { "epoch": 0.16, "learning_rate": 4.728072312280391e-05, "loss": 0.7858, "step": 14735 }, { "epoch": 0.16, "learning_rate": 4.7279800395665394e-05, "loss": 0.8981, "step": 14740 }, { "epoch": 0.16, "learning_rate": 4.727887766852689e-05, "loss": 0.7734, "step": 14745 }, { "epoch": 0.16, "learning_rate": 4.727795494138838e-05, "loss": 0.8699, "step": 14750 }, { "epoch": 0.16, "learning_rate": 4.727703221424986e-05, "loss": 0.8447, "step": 14755 }, { "epoch": 0.16, "learning_rate": 4.7276109487111346e-05, "loss": 0.8089, "step": 14760 }, { "epoch": 0.16, "learning_rate": 4.727518675997284e-05, "loss": 0.7877, "step": 14765 }, { "epoch": 0.16, "learning_rate": 4.727426403283433e-05, "loss": 0.8272, "step": 14770 }, { "epoch": 0.16, "learning_rate": 4.727334130569581e-05, "loss": 0.8229, "step": 14775 }, { "epoch": 0.16, "learning_rate": 4.72724185785573e-05, "loss": 0.8158, "step": 14780 }, { "epoch": 0.16, "learning_rate": 4.727149585141879e-05, "loss": 0.856, "step": 14785 }, { "epoch": 0.16, "learning_rate": 4.727057312428028e-05, "loss": 0.8045, "step": 14790 }, { "epoch": 0.16, "learning_rate": 4.726965039714176e-05, "loss": 0.7846, "step": 14795 }, { "epoch": 0.16, "learning_rate": 4.726872767000325e-05, "loss": 0.7787, "step": 14800 }, { "epoch": 0.16, "learning_rate": 4.7267804942864736e-05, "loss": 0.8607, "step": 14805 }, { "epoch": 0.16, "learning_rate": 4.7266882215726224e-05, "loss": 0.8703, "step": 14810 }, { "epoch": 0.16, "learning_rate": 4.726595948858771e-05, "loss": 0.8551, "step": 14815 }, { "epoch": 0.16, "learning_rate": 4.72650367614492e-05, "loss": 0.8325, "step": 14820 }, { "epoch": 0.16, "learning_rate": 4.726411403431069e-05, "loss": 0.8152, "step": 14825 }, { "epoch": 0.16, "learning_rate": 4.7263191307172176e-05, "loss": 0.8076, "step": 14830 }, { "epoch": 0.16, "learning_rate": 4.726226858003366e-05, "loss": 0.8388, "step": 14835 }, { "epoch": 0.16, "learning_rate": 4.726134585289515e-05, "loss": 0.8392, "step": 14840 }, { "epoch": 0.16, "learning_rate": 4.726042312575664e-05, "loss": 0.8521, "step": 14845 }, { "epoch": 0.16, "learning_rate": 4.725950039861813e-05, "loss": 0.8388, "step": 14850 }, { "epoch": 0.16, "learning_rate": 4.7258577671479615e-05, "loss": 0.8626, "step": 14855 }, { "epoch": 0.16, "learning_rate": 4.72576549443411e-05, "loss": 0.8429, "step": 14860 }, { "epoch": 0.16, "learning_rate": 4.725673221720259e-05, "loss": 0.8477, "step": 14865 }, { "epoch": 0.16, "learning_rate": 4.725580949006408e-05, "loss": 0.8194, "step": 14870 }, { "epoch": 0.16, "learning_rate": 4.7254886762925566e-05, "loss": 0.8412, "step": 14875 }, { "epoch": 0.16, "learning_rate": 4.7253964035787054e-05, "loss": 0.7728, "step": 14880 }, { "epoch": 0.16, "learning_rate": 4.7253041308648535e-05, "loss": 0.8163, "step": 14885 }, { "epoch": 0.16, "learning_rate": 4.725211858151002e-05, "loss": 0.8678, "step": 14890 }, { "epoch": 0.16, "learning_rate": 4.725119585437152e-05, "loss": 0.7976, "step": 14895 }, { "epoch": 0.16, "learning_rate": 4.7250273127233005e-05, "loss": 0.828, "step": 14900 }, { "epoch": 0.17, "learning_rate": 4.7249350400094486e-05, "loss": 0.8536, "step": 14905 }, { "epoch": 0.17, "learning_rate": 4.7248427672955974e-05, "loss": 0.7929, "step": 14910 }, { "epoch": 0.17, "learning_rate": 4.724750494581747e-05, "loss": 0.7463, "step": 14915 }, { "epoch": 0.17, "learning_rate": 4.724658221867895e-05, "loss": 0.8219, "step": 14920 }, { "epoch": 0.17, "learning_rate": 4.724565949154044e-05, "loss": 0.8047, "step": 14925 }, { "epoch": 0.17, "learning_rate": 4.7244736764401926e-05, "loss": 0.8895, "step": 14930 }, { "epoch": 0.17, "learning_rate": 4.724381403726342e-05, "loss": 0.8023, "step": 14935 }, { "epoch": 0.17, "learning_rate": 4.72428913101249e-05, "loss": 0.8036, "step": 14940 }, { "epoch": 0.17, "learning_rate": 4.724196858298639e-05, "loss": 0.8011, "step": 14945 }, { "epoch": 0.17, "learning_rate": 4.724104585584788e-05, "loss": 0.778, "step": 14950 }, { "epoch": 0.17, "learning_rate": 4.7240123128709365e-05, "loss": 0.8341, "step": 14955 }, { "epoch": 0.17, "learning_rate": 4.723920040157085e-05, "loss": 0.8514, "step": 14960 }, { "epoch": 0.17, "learning_rate": 4.723827767443234e-05, "loss": 0.8256, "step": 14965 }, { "epoch": 0.17, "learning_rate": 4.723735494729383e-05, "loss": 0.8135, "step": 14970 }, { "epoch": 0.17, "learning_rate": 4.7236432220155316e-05, "loss": 0.7621, "step": 14975 }, { "epoch": 0.17, "learning_rate": 4.7235509493016804e-05, "loss": 0.8142, "step": 14980 }, { "epoch": 0.17, "learning_rate": 4.723458676587829e-05, "loss": 0.7941, "step": 14985 }, { "epoch": 0.17, "learning_rate": 4.723366403873978e-05, "loss": 0.8094, "step": 14990 }, { "epoch": 0.17, "learning_rate": 4.723274131160126e-05, "loss": 0.768, "step": 14995 }, { "epoch": 0.17, "learning_rate": 4.7231818584462756e-05, "loss": 0.7884, "step": 15000 }, { "epoch": 0.17, "eval_loss": 0.7895081639289856, "eval_runtime": 70.7083, "eval_samples_per_second": 28.285, "eval_steps_per_second": 14.143, "step": 15000 }, { "epoch": 0.17, "learning_rate": 4.7230895857324243e-05, "loss": 0.833, "step": 15005 }, { "epoch": 0.17, "learning_rate": 4.722997313018573e-05, "loss": 0.807, "step": 15010 }, { "epoch": 0.17, "learning_rate": 4.722905040304721e-05, "loss": 0.8879, "step": 15015 }, { "epoch": 0.17, "learning_rate": 4.722812767590871e-05, "loss": 0.8154, "step": 15020 }, { "epoch": 0.17, "learning_rate": 4.7227204948770195e-05, "loss": 0.8689, "step": 15025 }, { "epoch": 0.17, "learning_rate": 4.7226282221631676e-05, "loss": 0.8008, "step": 15030 }, { "epoch": 0.17, "learning_rate": 4.7225359494493164e-05, "loss": 0.8388, "step": 15035 }, { "epoch": 0.17, "learning_rate": 4.722443676735465e-05, "loss": 0.7922, "step": 15040 }, { "epoch": 0.17, "learning_rate": 4.7223514040216146e-05, "loss": 0.7991, "step": 15045 }, { "epoch": 0.17, "learning_rate": 4.722259131307763e-05, "loss": 0.8319, "step": 15050 }, { "epoch": 0.17, "learning_rate": 4.7221668585939115e-05, "loss": 0.8118, "step": 15055 }, { "epoch": 0.17, "learning_rate": 4.72207458588006e-05, "loss": 0.8543, "step": 15060 }, { "epoch": 0.17, "learning_rate": 4.72198231316621e-05, "loss": 0.7852, "step": 15065 }, { "epoch": 0.17, "learning_rate": 4.721890040452358e-05, "loss": 0.8311, "step": 15070 }, { "epoch": 0.17, "learning_rate": 4.7217977677385067e-05, "loss": 0.9311, "step": 15075 }, { "epoch": 0.17, "learning_rate": 4.7217054950246554e-05, "loss": 0.834, "step": 15080 }, { "epoch": 0.17, "learning_rate": 4.721613222310804e-05, "loss": 0.8348, "step": 15085 }, { "epoch": 0.17, "learning_rate": 4.721520949596953e-05, "loss": 0.7989, "step": 15090 }, { "epoch": 0.17, "learning_rate": 4.721428676883102e-05, "loss": 0.7965, "step": 15095 }, { "epoch": 0.17, "learning_rate": 4.7213364041692506e-05, "loss": 0.77, "step": 15100 }, { "epoch": 0.17, "learning_rate": 4.721244131455399e-05, "loss": 0.8059, "step": 15105 }, { "epoch": 0.17, "learning_rate": 4.721151858741548e-05, "loss": 0.7913, "step": 15110 }, { "epoch": 0.17, "learning_rate": 4.721059586027697e-05, "loss": 0.7786, "step": 15115 }, { "epoch": 0.17, "learning_rate": 4.720967313313846e-05, "loss": 0.8798, "step": 15120 }, { "epoch": 0.17, "learning_rate": 4.720875040599994e-05, "loss": 0.8607, "step": 15125 }, { "epoch": 0.17, "learning_rate": 4.720782767886143e-05, "loss": 0.8992, "step": 15130 }, { "epoch": 0.17, "learning_rate": 4.720690495172292e-05, "loss": 0.8095, "step": 15135 }, { "epoch": 0.17, "learning_rate": 4.720598222458441e-05, "loss": 0.8404, "step": 15140 }, { "epoch": 0.17, "learning_rate": 4.720505949744589e-05, "loss": 0.8034, "step": 15145 }, { "epoch": 0.17, "learning_rate": 4.7204136770307384e-05, "loss": 0.8528, "step": 15150 }, { "epoch": 0.17, "learning_rate": 4.720321404316887e-05, "loss": 0.8604, "step": 15155 }, { "epoch": 0.17, "learning_rate": 4.720229131603035e-05, "loss": 0.8158, "step": 15160 }, { "epoch": 0.17, "learning_rate": 4.720136858889184e-05, "loss": 0.7521, "step": 15165 }, { "epoch": 0.17, "learning_rate": 4.7200445861753336e-05, "loss": 0.845, "step": 15170 }, { "epoch": 0.17, "learning_rate": 4.7199523134614824e-05, "loss": 0.8608, "step": 15175 }, { "epoch": 0.17, "learning_rate": 4.7198600407476305e-05, "loss": 0.7776, "step": 15180 }, { "epoch": 0.17, "learning_rate": 4.719767768033779e-05, "loss": 0.8037, "step": 15185 }, { "epoch": 0.17, "learning_rate": 4.719675495319928e-05, "loss": 0.8317, "step": 15190 }, { "epoch": 0.17, "learning_rate": 4.719583222606077e-05, "loss": 0.8098, "step": 15195 }, { "epoch": 0.17, "learning_rate": 4.7194909498922256e-05, "loss": 0.8846, "step": 15200 }, { "epoch": 0.17, "learning_rate": 4.7193986771783744e-05, "loss": 0.8078, "step": 15205 }, { "epoch": 0.17, "learning_rate": 4.719306404464523e-05, "loss": 0.7556, "step": 15210 }, { "epoch": 0.17, "learning_rate": 4.719214131750672e-05, "loss": 0.8063, "step": 15215 }, { "epoch": 0.17, "learning_rate": 4.719121859036821e-05, "loss": 0.8364, "step": 15220 }, { "epoch": 0.17, "learning_rate": 4.7190295863229695e-05, "loss": 0.8575, "step": 15225 }, { "epoch": 0.17, "learning_rate": 4.718937313609118e-05, "loss": 0.8669, "step": 15230 }, { "epoch": 0.17, "learning_rate": 4.718845040895267e-05, "loss": 0.8434, "step": 15235 }, { "epoch": 0.17, "learning_rate": 4.718752768181416e-05, "loss": 0.8003, "step": 15240 }, { "epoch": 0.17, "learning_rate": 4.718660495467565e-05, "loss": 0.7883, "step": 15245 }, { "epoch": 0.17, "learning_rate": 4.7185682227537134e-05, "loss": 0.7971, "step": 15250 }, { "epoch": 0.17, "learning_rate": 4.718475950039862e-05, "loss": 0.8422, "step": 15255 }, { "epoch": 0.17, "learning_rate": 4.718383677326011e-05, "loss": 0.8592, "step": 15260 }, { "epoch": 0.17, "learning_rate": 4.71829140461216e-05, "loss": 0.8047, "step": 15265 }, { "epoch": 0.17, "learning_rate": 4.718199131898308e-05, "loss": 0.8449, "step": 15270 }, { "epoch": 0.17, "learning_rate": 4.718106859184457e-05, "loss": 0.8046, "step": 15275 }, { "epoch": 0.17, "learning_rate": 4.718014586470606e-05, "loss": 0.8373, "step": 15280 }, { "epoch": 0.17, "learning_rate": 4.717922313756755e-05, "loss": 0.8003, "step": 15285 }, { "epoch": 0.17, "learning_rate": 4.717830041042903e-05, "loss": 0.8491, "step": 15290 }, { "epoch": 0.17, "learning_rate": 4.717737768329052e-05, "loss": 0.8542, "step": 15295 }, { "epoch": 0.17, "learning_rate": 4.717645495615201e-05, "loss": 0.8365, "step": 15300 }, { "epoch": 0.17, "learning_rate": 4.7175532229013494e-05, "loss": 0.7822, "step": 15305 }, { "epoch": 0.17, "learning_rate": 4.717460950187498e-05, "loss": 0.8715, "step": 15310 }, { "epoch": 0.17, "learning_rate": 4.717368677473647e-05, "loss": 0.8102, "step": 15315 }, { "epoch": 0.17, "learning_rate": 4.7172764047597964e-05, "loss": 0.8129, "step": 15320 }, { "epoch": 0.17, "learning_rate": 4.7171841320459445e-05, "loss": 0.9033, "step": 15325 }, { "epoch": 0.17, "learning_rate": 4.717091859332093e-05, "loss": 0.8277, "step": 15330 }, { "epoch": 0.17, "learning_rate": 4.716999586618242e-05, "loss": 0.8273, "step": 15335 }, { "epoch": 0.17, "learning_rate": 4.716907313904391e-05, "loss": 0.8084, "step": 15340 }, { "epoch": 0.17, "learning_rate": 4.71681504119054e-05, "loss": 0.8835, "step": 15345 }, { "epoch": 0.17, "learning_rate": 4.7167227684766885e-05, "loss": 0.8145, "step": 15350 }, { "epoch": 0.17, "learning_rate": 4.716630495762837e-05, "loss": 0.86, "step": 15355 }, { "epoch": 0.17, "learning_rate": 4.716538223048986e-05, "loss": 0.8095, "step": 15360 }, { "epoch": 0.17, "learning_rate": 4.716445950335135e-05, "loss": 0.7946, "step": 15365 }, { "epoch": 0.17, "learning_rate": 4.7163536776212836e-05, "loss": 0.841, "step": 15370 }, { "epoch": 0.17, "learning_rate": 4.7162614049074324e-05, "loss": 0.7823, "step": 15375 }, { "epoch": 0.17, "learning_rate": 4.7161691321935805e-05, "loss": 0.8784, "step": 15380 }, { "epoch": 0.17, "learning_rate": 4.71607685947973e-05, "loss": 0.8176, "step": 15385 }, { "epoch": 0.17, "learning_rate": 4.715984586765879e-05, "loss": 0.794, "step": 15390 }, { "epoch": 0.17, "learning_rate": 4.7158923140520275e-05, "loss": 0.8302, "step": 15395 }, { "epoch": 0.17, "learning_rate": 4.7158000413381756e-05, "loss": 0.7619, "step": 15400 }, { "epoch": 0.17, "learning_rate": 4.715707768624325e-05, "loss": 0.852, "step": 15405 }, { "epoch": 0.17, "learning_rate": 4.715615495910474e-05, "loss": 0.8132, "step": 15410 }, { "epoch": 0.17, "learning_rate": 4.715523223196622e-05, "loss": 0.8483, "step": 15415 }, { "epoch": 0.17, "learning_rate": 4.715430950482771e-05, "loss": 0.8119, "step": 15420 }, { "epoch": 0.17, "learning_rate": 4.7153386777689196e-05, "loss": 0.8068, "step": 15425 }, { "epoch": 0.17, "learning_rate": 4.715246405055069e-05, "loss": 0.8356, "step": 15430 }, { "epoch": 0.17, "learning_rate": 4.715154132341217e-05, "loss": 0.8549, "step": 15435 }, { "epoch": 0.17, "learning_rate": 4.715061859627366e-05, "loss": 0.8019, "step": 15440 }, { "epoch": 0.17, "learning_rate": 4.714969586913515e-05, "loss": 0.8182, "step": 15445 }, { "epoch": 0.17, "learning_rate": 4.714877314199664e-05, "loss": 0.8474, "step": 15450 }, { "epoch": 0.17, "learning_rate": 4.714785041485812e-05, "loss": 0.8571, "step": 15455 }, { "epoch": 0.17, "learning_rate": 4.714692768771961e-05, "loss": 0.8692, "step": 15460 }, { "epoch": 0.17, "learning_rate": 4.71460049605811e-05, "loss": 0.9102, "step": 15465 }, { "epoch": 0.17, "learning_rate": 4.7145082233442586e-05, "loss": 0.8142, "step": 15470 }, { "epoch": 0.17, "learning_rate": 4.7144159506304074e-05, "loss": 0.8673, "step": 15475 }, { "epoch": 0.17, "learning_rate": 4.714323677916556e-05, "loss": 0.8239, "step": 15480 }, { "epoch": 0.17, "learning_rate": 4.714231405202705e-05, "loss": 0.7953, "step": 15485 }, { "epoch": 0.17, "learning_rate": 4.714139132488853e-05, "loss": 0.8166, "step": 15490 }, { "epoch": 0.17, "learning_rate": 4.7140468597750026e-05, "loss": 0.8223, "step": 15495 }, { "epoch": 0.17, "learning_rate": 4.713954587061151e-05, "loss": 0.8761, "step": 15500 }, { "epoch": 0.17, "learning_rate": 4.7138623143473e-05, "loss": 0.8327, "step": 15505 }, { "epoch": 0.17, "learning_rate": 4.713770041633448e-05, "loss": 0.8862, "step": 15510 }, { "epoch": 0.17, "learning_rate": 4.713677768919598e-05, "loss": 0.8767, "step": 15515 }, { "epoch": 0.17, "learning_rate": 4.7135854962057465e-05, "loss": 0.8371, "step": 15520 }, { "epoch": 0.17, "learning_rate": 4.713493223491895e-05, "loss": 0.8284, "step": 15525 }, { "epoch": 0.17, "learning_rate": 4.7134009507780434e-05, "loss": 0.8221, "step": 15530 }, { "epoch": 0.17, "learning_rate": 4.713308678064193e-05, "loss": 0.7925, "step": 15535 }, { "epoch": 0.17, "learning_rate": 4.7132164053503416e-05, "loss": 0.9383, "step": 15540 }, { "epoch": 0.17, "learning_rate": 4.71312413263649e-05, "loss": 0.8051, "step": 15545 }, { "epoch": 0.17, "learning_rate": 4.7130318599226385e-05, "loss": 0.8235, "step": 15550 }, { "epoch": 0.17, "learning_rate": 4.712939587208788e-05, "loss": 0.7674, "step": 15555 }, { "epoch": 0.17, "learning_rate": 4.712847314494937e-05, "loss": 0.8221, "step": 15560 }, { "epoch": 0.17, "learning_rate": 4.712755041781085e-05, "loss": 0.8193, "step": 15565 }, { "epoch": 0.17, "learning_rate": 4.7126627690672336e-05, "loss": 0.7434, "step": 15570 }, { "epoch": 0.17, "learning_rate": 4.7125704963533824e-05, "loss": 0.8242, "step": 15575 }, { "epoch": 0.17, "learning_rate": 4.712478223639531e-05, "loss": 0.8193, "step": 15580 }, { "epoch": 0.17, "learning_rate": 4.71238595092568e-05, "loss": 0.8341, "step": 15585 }, { "epoch": 0.17, "learning_rate": 4.712293678211829e-05, "loss": 0.8064, "step": 15590 }, { "epoch": 0.17, "learning_rate": 4.7122014054979776e-05, "loss": 0.7821, "step": 15595 }, { "epoch": 0.17, "learning_rate": 4.7121091327841264e-05, "loss": 0.8577, "step": 15600 }, { "epoch": 0.17, "learning_rate": 4.712016860070275e-05, "loss": 0.8388, "step": 15605 }, { "epoch": 0.17, "learning_rate": 4.711924587356424e-05, "loss": 0.8406, "step": 15610 }, { "epoch": 0.17, "learning_rate": 4.711832314642573e-05, "loss": 0.8572, "step": 15615 }, { "epoch": 0.17, "learning_rate": 4.7117400419287215e-05, "loss": 0.841, "step": 15620 }, { "epoch": 0.17, "learning_rate": 4.71164776921487e-05, "loss": 0.8386, "step": 15625 }, { "epoch": 0.17, "learning_rate": 4.711555496501019e-05, "loss": 0.8444, "step": 15630 }, { "epoch": 0.17, "learning_rate": 4.711463223787168e-05, "loss": 0.8707, "step": 15635 }, { "epoch": 0.17, "learning_rate": 4.711370951073316e-05, "loss": 0.8645, "step": 15640 }, { "epoch": 0.17, "learning_rate": 4.7112786783594654e-05, "loss": 0.8284, "step": 15645 }, { "epoch": 0.17, "learning_rate": 4.711186405645614e-05, "loss": 0.857, "step": 15650 }, { "epoch": 0.17, "learning_rate": 4.711094132931762e-05, "loss": 0.8278, "step": 15655 }, { "epoch": 0.17, "learning_rate": 4.711001860217911e-05, "loss": 0.8311, "step": 15660 }, { "epoch": 0.17, "learning_rate": 4.7109095875040606e-05, "loss": 0.8881, "step": 15665 }, { "epoch": 0.17, "learning_rate": 4.7108173147902093e-05, "loss": 0.8281, "step": 15670 }, { "epoch": 0.17, "learning_rate": 4.7107250420763575e-05, "loss": 0.8868, "step": 15675 }, { "epoch": 0.17, "learning_rate": 4.710632769362506e-05, "loss": 0.8217, "step": 15680 }, { "epoch": 0.17, "learning_rate": 4.710540496648656e-05, "loss": 0.8019, "step": 15685 }, { "epoch": 0.17, "learning_rate": 4.710448223934804e-05, "loss": 0.8496, "step": 15690 }, { "epoch": 0.17, "learning_rate": 4.7103559512209526e-05, "loss": 0.8435, "step": 15695 }, { "epoch": 0.17, "learning_rate": 4.7102636785071014e-05, "loss": 0.9144, "step": 15700 }, { "epoch": 0.17, "learning_rate": 4.710171405793251e-05, "loss": 0.821, "step": 15705 }, { "epoch": 0.17, "learning_rate": 4.710079133079399e-05, "loss": 0.8584, "step": 15710 }, { "epoch": 0.17, "learning_rate": 4.709986860365548e-05, "loss": 0.8185, "step": 15715 }, { "epoch": 0.17, "learning_rate": 4.7098945876516965e-05, "loss": 0.7711, "step": 15720 }, { "epoch": 0.17, "learning_rate": 4.709802314937845e-05, "loss": 0.8245, "step": 15725 }, { "epoch": 0.17, "learning_rate": 4.709710042223994e-05, "loss": 0.7969, "step": 15730 }, { "epoch": 0.17, "learning_rate": 4.709617769510143e-05, "loss": 0.7605, "step": 15735 }, { "epoch": 0.17, "learning_rate": 4.7095254967962917e-05, "loss": 0.7575, "step": 15740 }, { "epoch": 0.17, "learning_rate": 4.7094332240824404e-05, "loss": 0.7769, "step": 15745 }, { "epoch": 0.17, "learning_rate": 4.709340951368589e-05, "loss": 0.814, "step": 15750 }, { "epoch": 0.17, "learning_rate": 4.709248678654738e-05, "loss": 0.8884, "step": 15755 }, { "epoch": 0.17, "learning_rate": 4.709156405940887e-05, "loss": 0.8232, "step": 15760 }, { "epoch": 0.17, "learning_rate": 4.709064133227035e-05, "loss": 0.8511, "step": 15765 }, { "epoch": 0.17, "learning_rate": 4.7089718605131844e-05, "loss": 0.8176, "step": 15770 }, { "epoch": 0.17, "learning_rate": 4.708879587799333e-05, "loss": 0.8826, "step": 15775 }, { "epoch": 0.17, "learning_rate": 4.708787315085482e-05, "loss": 0.8248, "step": 15780 }, { "epoch": 0.17, "learning_rate": 4.70869504237163e-05, "loss": 0.8635, "step": 15785 }, { "epoch": 0.17, "learning_rate": 4.708602769657779e-05, "loss": 0.813, "step": 15790 }, { "epoch": 0.17, "learning_rate": 4.708510496943928e-05, "loss": 0.7777, "step": 15795 }, { "epoch": 0.17, "learning_rate": 4.7084182242300764e-05, "loss": 0.818, "step": 15800 }, { "epoch": 0.18, "learning_rate": 4.708325951516225e-05, "loss": 0.7854, "step": 15805 }, { "epoch": 0.18, "learning_rate": 4.708233678802374e-05, "loss": 0.8859, "step": 15810 }, { "epoch": 0.18, "learning_rate": 4.7081414060885234e-05, "loss": 0.7831, "step": 15815 }, { "epoch": 0.18, "learning_rate": 4.7080491333746715e-05, "loss": 0.8618, "step": 15820 }, { "epoch": 0.18, "learning_rate": 4.70795686066082e-05, "loss": 0.8654, "step": 15825 }, { "epoch": 0.18, "learning_rate": 4.707864587946969e-05, "loss": 0.8337, "step": 15830 }, { "epoch": 0.18, "learning_rate": 4.7077723152331186e-05, "loss": 0.8293, "step": 15835 }, { "epoch": 0.18, "learning_rate": 4.707680042519267e-05, "loss": 0.8138, "step": 15840 }, { "epoch": 0.18, "learning_rate": 4.7075877698054155e-05, "loss": 0.8737, "step": 15845 }, { "epoch": 0.18, "learning_rate": 4.707495497091564e-05, "loss": 0.8354, "step": 15850 }, { "epoch": 0.18, "learning_rate": 4.707403224377713e-05, "loss": 0.845, "step": 15855 }, { "epoch": 0.18, "learning_rate": 4.707310951663862e-05, "loss": 0.7658, "step": 15860 }, { "epoch": 0.18, "learning_rate": 4.7072186789500106e-05, "loss": 0.8172, "step": 15865 }, { "epoch": 0.18, "learning_rate": 4.7071264062361594e-05, "loss": 0.7821, "step": 15870 }, { "epoch": 0.18, "learning_rate": 4.7070341335223075e-05, "loss": 0.897, "step": 15875 }, { "epoch": 0.18, "learning_rate": 4.706941860808457e-05, "loss": 0.8273, "step": 15880 }, { "epoch": 0.18, "learning_rate": 4.706849588094606e-05, "loss": 0.8465, "step": 15885 }, { "epoch": 0.18, "learning_rate": 4.7067573153807545e-05, "loss": 0.8227, "step": 15890 }, { "epoch": 0.18, "learning_rate": 4.7066650426669026e-05, "loss": 0.8125, "step": 15895 }, { "epoch": 0.18, "learning_rate": 4.706572769953052e-05, "loss": 0.8714, "step": 15900 }, { "epoch": 0.18, "learning_rate": 4.706480497239201e-05, "loss": 0.807, "step": 15905 }, { "epoch": 0.18, "learning_rate": 4.70638822452535e-05, "loss": 0.8936, "step": 15910 }, { "epoch": 0.18, "learning_rate": 4.706295951811498e-05, "loss": 0.8375, "step": 15915 }, { "epoch": 0.18, "learning_rate": 4.706203679097647e-05, "loss": 0.8602, "step": 15920 }, { "epoch": 0.18, "learning_rate": 4.706111406383796e-05, "loss": 0.8356, "step": 15925 }, { "epoch": 0.18, "learning_rate": 4.706019133669944e-05, "loss": 0.8204, "step": 15930 }, { "epoch": 0.18, "learning_rate": 4.705926860956093e-05, "loss": 0.8389, "step": 15935 }, { "epoch": 0.18, "learning_rate": 4.705834588242242e-05, "loss": 0.9226, "step": 15940 }, { "epoch": 0.18, "learning_rate": 4.705742315528391e-05, "loss": 0.8215, "step": 15945 }, { "epoch": 0.18, "learning_rate": 4.705650042814539e-05, "loss": 0.7955, "step": 15950 }, { "epoch": 0.18, "learning_rate": 4.705557770100688e-05, "loss": 0.7495, "step": 15955 }, { "epoch": 0.18, "learning_rate": 4.705465497386837e-05, "loss": 0.8321, "step": 15960 }, { "epoch": 0.18, "learning_rate": 4.7053732246729856e-05, "loss": 0.8234, "step": 15965 }, { "epoch": 0.18, "learning_rate": 4.7052809519591344e-05, "loss": 0.8301, "step": 15970 }, { "epoch": 0.18, "learning_rate": 4.705188679245283e-05, "loss": 0.8885, "step": 15975 }, { "epoch": 0.18, "learning_rate": 4.705096406531432e-05, "loss": 0.7896, "step": 15980 }, { "epoch": 0.18, "learning_rate": 4.705004133817581e-05, "loss": 0.8253, "step": 15985 }, { "epoch": 0.18, "learning_rate": 4.7049118611037295e-05, "loss": 0.812, "step": 15990 }, { "epoch": 0.18, "learning_rate": 4.704819588389878e-05, "loss": 0.832, "step": 15995 }, { "epoch": 0.18, "learning_rate": 4.704727315676027e-05, "loss": 0.8278, "step": 16000 }, { "epoch": 0.18, "eval_loss": 0.758574366569519, "eval_runtime": 70.3634, "eval_samples_per_second": 28.424, "eval_steps_per_second": 14.212, "step": 16000 }, { "epoch": 0.18, "learning_rate": 4.704635042962176e-05, "loss": 0.8436, "step": 16005 }, { "epoch": 0.18, "learning_rate": 4.704542770248325e-05, "loss": 0.8735, "step": 16010 }, { "epoch": 0.18, "learning_rate": 4.7044504975344735e-05, "loss": 0.8292, "step": 16015 }, { "epoch": 0.18, "learning_rate": 4.704358224820622e-05, "loss": 0.7685, "step": 16020 }, { "epoch": 0.18, "learning_rate": 4.7042659521067704e-05, "loss": 0.8583, "step": 16025 }, { "epoch": 0.18, "learning_rate": 4.70417367939292e-05, "loss": 0.7855, "step": 16030 }, { "epoch": 0.18, "learning_rate": 4.7040814066790686e-05, "loss": 0.7891, "step": 16035 }, { "epoch": 0.18, "learning_rate": 4.703989133965217e-05, "loss": 0.7979, "step": 16040 }, { "epoch": 0.18, "learning_rate": 4.7038968612513655e-05, "loss": 0.8171, "step": 16045 }, { "epoch": 0.18, "learning_rate": 4.703804588537515e-05, "loss": 0.8116, "step": 16050 }, { "epoch": 0.18, "learning_rate": 4.703712315823664e-05, "loss": 0.822, "step": 16055 }, { "epoch": 0.18, "learning_rate": 4.703620043109812e-05, "loss": 0.8155, "step": 16060 }, { "epoch": 0.18, "learning_rate": 4.7035277703959606e-05, "loss": 0.789, "step": 16065 }, { "epoch": 0.18, "learning_rate": 4.70343549768211e-05, "loss": 0.8341, "step": 16070 }, { "epoch": 0.18, "learning_rate": 4.703343224968258e-05, "loss": 0.8038, "step": 16075 }, { "epoch": 0.18, "learning_rate": 4.703250952254407e-05, "loss": 0.8139, "step": 16080 }, { "epoch": 0.18, "learning_rate": 4.703158679540556e-05, "loss": 0.8515, "step": 16085 }, { "epoch": 0.18, "learning_rate": 4.703066406826705e-05, "loss": 0.8035, "step": 16090 }, { "epoch": 0.18, "learning_rate": 4.7029741341128533e-05, "loss": 0.8161, "step": 16095 }, { "epoch": 0.18, "learning_rate": 4.702881861399002e-05, "loss": 0.8505, "step": 16100 }, { "epoch": 0.18, "learning_rate": 4.702789588685151e-05, "loss": 0.8142, "step": 16105 }, { "epoch": 0.18, "learning_rate": 4.7026973159713e-05, "loss": 0.8244, "step": 16110 }, { "epoch": 0.18, "learning_rate": 4.7026050432574485e-05, "loss": 0.8123, "step": 16115 }, { "epoch": 0.18, "learning_rate": 4.702512770543597e-05, "loss": 0.8115, "step": 16120 }, { "epoch": 0.18, "learning_rate": 4.702420497829746e-05, "loss": 0.7597, "step": 16125 }, { "epoch": 0.18, "learning_rate": 4.702328225115895e-05, "loss": 0.7713, "step": 16130 }, { "epoch": 0.18, "learning_rate": 4.7022359524020436e-05, "loss": 0.8207, "step": 16135 }, { "epoch": 0.18, "learning_rate": 4.7021436796881924e-05, "loss": 0.8631, "step": 16140 }, { "epoch": 0.18, "learning_rate": 4.702051406974341e-05, "loss": 0.8301, "step": 16145 }, { "epoch": 0.18, "learning_rate": 4.701959134260489e-05, "loss": 0.8102, "step": 16150 }, { "epoch": 0.18, "learning_rate": 4.701866861546639e-05, "loss": 0.8077, "step": 16155 }, { "epoch": 0.18, "learning_rate": 4.7017745888327876e-05, "loss": 0.8348, "step": 16160 }, { "epoch": 0.18, "learning_rate": 4.701682316118936e-05, "loss": 0.8555, "step": 16165 }, { "epoch": 0.18, "learning_rate": 4.7015900434050844e-05, "loss": 0.8308, "step": 16170 }, { "epoch": 0.18, "learning_rate": 4.701497770691233e-05, "loss": 0.8022, "step": 16175 }, { "epoch": 0.18, "learning_rate": 4.701405497977383e-05, "loss": 0.8242, "step": 16180 }, { "epoch": 0.18, "learning_rate": 4.701313225263531e-05, "loss": 0.7798, "step": 16185 }, { "epoch": 0.18, "learning_rate": 4.7012209525496796e-05, "loss": 0.8131, "step": 16190 }, { "epoch": 0.18, "learning_rate": 4.7011286798358284e-05, "loss": 0.9182, "step": 16195 }, { "epoch": 0.18, "learning_rate": 4.701036407121978e-05, "loss": 0.8685, "step": 16200 }, { "epoch": 0.18, "learning_rate": 4.700944134408126e-05, "loss": 0.7873, "step": 16205 }, { "epoch": 0.18, "learning_rate": 4.700851861694275e-05, "loss": 0.7959, "step": 16210 }, { "epoch": 0.18, "learning_rate": 4.7007595889804235e-05, "loss": 0.8234, "step": 16215 }, { "epoch": 0.18, "learning_rate": 4.700667316266573e-05, "loss": 0.7817, "step": 16220 }, { "epoch": 0.18, "learning_rate": 4.700575043552721e-05, "loss": 0.8032, "step": 16225 }, { "epoch": 0.18, "learning_rate": 4.70048277083887e-05, "loss": 0.8382, "step": 16230 }, { "epoch": 0.18, "learning_rate": 4.7003904981250186e-05, "loss": 0.8583, "step": 16235 }, { "epoch": 0.18, "learning_rate": 4.7002982254111674e-05, "loss": 0.8668, "step": 16240 }, { "epoch": 0.18, "learning_rate": 4.700205952697316e-05, "loss": 0.813, "step": 16245 }, { "epoch": 0.18, "learning_rate": 4.700113679983465e-05, "loss": 0.7845, "step": 16250 }, { "epoch": 0.18, "learning_rate": 4.700021407269614e-05, "loss": 0.7917, "step": 16255 }, { "epoch": 0.18, "learning_rate": 4.699929134555762e-05, "loss": 0.8913, "step": 16260 }, { "epoch": 0.18, "learning_rate": 4.6998368618419114e-05, "loss": 0.8138, "step": 16265 }, { "epoch": 0.18, "learning_rate": 4.69974458912806e-05, "loss": 0.8893, "step": 16270 }, { "epoch": 0.18, "learning_rate": 4.699652316414209e-05, "loss": 0.8146, "step": 16275 }, { "epoch": 0.18, "learning_rate": 4.699560043700357e-05, "loss": 0.8183, "step": 16280 }, { "epoch": 0.18, "learning_rate": 4.6994677709865065e-05, "loss": 0.7401, "step": 16285 }, { "epoch": 0.18, "learning_rate": 4.699375498272655e-05, "loss": 0.8886, "step": 16290 }, { "epoch": 0.18, "learning_rate": 4.699283225558804e-05, "loss": 0.9127, "step": 16295 }, { "epoch": 0.18, "learning_rate": 4.699190952844952e-05, "loss": 0.7778, "step": 16300 }, { "epoch": 0.18, "learning_rate": 4.6990986801311016e-05, "loss": 0.8321, "step": 16305 }, { "epoch": 0.18, "learning_rate": 4.6990064074172504e-05, "loss": 0.8066, "step": 16310 }, { "epoch": 0.18, "learning_rate": 4.6989141347033985e-05, "loss": 0.8221, "step": 16315 }, { "epoch": 0.18, "learning_rate": 4.698821861989547e-05, "loss": 0.83, "step": 16320 }, { "epoch": 0.18, "learning_rate": 4.698729589275696e-05, "loss": 0.7654, "step": 16325 }, { "epoch": 0.18, "learning_rate": 4.6986373165618456e-05, "loss": 0.8198, "step": 16330 }, { "epoch": 0.18, "learning_rate": 4.698545043847994e-05, "loss": 0.7939, "step": 16335 }, { "epoch": 0.18, "learning_rate": 4.6984527711341425e-05, "loss": 0.8247, "step": 16340 }, { "epoch": 0.18, "learning_rate": 4.698360498420291e-05, "loss": 0.817, "step": 16345 }, { "epoch": 0.18, "learning_rate": 4.69826822570644e-05, "loss": 0.8476, "step": 16350 }, { "epoch": 0.18, "learning_rate": 4.698175952992589e-05, "loss": 0.7708, "step": 16355 }, { "epoch": 0.18, "learning_rate": 4.6980836802787376e-05, "loss": 0.8255, "step": 16360 }, { "epoch": 0.18, "learning_rate": 4.6979914075648864e-05, "loss": 0.8199, "step": 16365 }, { "epoch": 0.18, "learning_rate": 4.697899134851035e-05, "loss": 0.8109, "step": 16370 }, { "epoch": 0.18, "learning_rate": 4.697806862137184e-05, "loss": 0.7512, "step": 16375 }, { "epoch": 0.18, "learning_rate": 4.697714589423333e-05, "loss": 0.7649, "step": 16380 }, { "epoch": 0.18, "learning_rate": 4.6976223167094815e-05, "loss": 0.7769, "step": 16385 }, { "epoch": 0.18, "learning_rate": 4.69753004399563e-05, "loss": 0.8215, "step": 16390 }, { "epoch": 0.18, "learning_rate": 4.697437771281779e-05, "loss": 0.8268, "step": 16395 }, { "epoch": 0.18, "learning_rate": 4.697345498567928e-05, "loss": 0.8771, "step": 16400 }, { "epoch": 0.18, "learning_rate": 4.6972532258540767e-05, "loss": 0.7571, "step": 16405 }, { "epoch": 0.18, "learning_rate": 4.697160953140225e-05, "loss": 0.9011, "step": 16410 }, { "epoch": 0.18, "learning_rate": 4.697068680426374e-05, "loss": 0.7703, "step": 16415 }, { "epoch": 0.18, "learning_rate": 4.696976407712523e-05, "loss": 0.8436, "step": 16420 }, { "epoch": 0.18, "learning_rate": 4.696884134998671e-05, "loss": 0.8151, "step": 16425 }, { "epoch": 0.18, "learning_rate": 4.69679186228482e-05, "loss": 0.8334, "step": 16430 }, { "epoch": 0.18, "learning_rate": 4.6966995895709694e-05, "loss": 0.8531, "step": 16435 }, { "epoch": 0.18, "learning_rate": 4.696607316857118e-05, "loss": 0.861, "step": 16440 }, { "epoch": 0.18, "learning_rate": 4.696515044143266e-05, "loss": 0.8315, "step": 16445 }, { "epoch": 0.18, "learning_rate": 4.696422771429415e-05, "loss": 0.8182, "step": 16450 }, { "epoch": 0.18, "learning_rate": 4.6963304987155645e-05, "loss": 0.8209, "step": 16455 }, { "epoch": 0.18, "learning_rate": 4.6962382260017126e-05, "loss": 0.767, "step": 16460 }, { "epoch": 0.18, "learning_rate": 4.6961459532878614e-05, "loss": 0.7964, "step": 16465 }, { "epoch": 0.18, "learning_rate": 4.69605368057401e-05, "loss": 0.8923, "step": 16470 }, { "epoch": 0.18, "learning_rate": 4.695961407860159e-05, "loss": 0.8764, "step": 16475 }, { "epoch": 0.18, "learning_rate": 4.695869135146308e-05, "loss": 0.8683, "step": 16480 }, { "epoch": 0.18, "learning_rate": 4.6957768624324565e-05, "loss": 0.8292, "step": 16485 }, { "epoch": 0.18, "learning_rate": 4.695684589718605e-05, "loss": 0.8463, "step": 16490 }, { "epoch": 0.18, "learning_rate": 4.695592317004754e-05, "loss": 0.8171, "step": 16495 }, { "epoch": 0.18, "learning_rate": 4.695500044290903e-05, "loss": 0.8319, "step": 16500 }, { "epoch": 0.18, "learning_rate": 4.695407771577052e-05, "loss": 0.8176, "step": 16505 }, { "epoch": 0.18, "learning_rate": 4.6953154988632005e-05, "loss": 0.8176, "step": 16510 }, { "epoch": 0.18, "learning_rate": 4.695223226149349e-05, "loss": 0.813, "step": 16515 }, { "epoch": 0.18, "learning_rate": 4.695130953435498e-05, "loss": 0.8137, "step": 16520 }, { "epoch": 0.18, "learning_rate": 4.695038680721647e-05, "loss": 0.8247, "step": 16525 }, { "epoch": 0.18, "learning_rate": 4.6949464080077956e-05, "loss": 0.8245, "step": 16530 }, { "epoch": 0.18, "learning_rate": 4.694854135293944e-05, "loss": 0.8806, "step": 16535 }, { "epoch": 0.18, "learning_rate": 4.694761862580093e-05, "loss": 0.8237, "step": 16540 }, { "epoch": 0.18, "learning_rate": 4.694669589866242e-05, "loss": 0.8515, "step": 16545 }, { "epoch": 0.18, "learning_rate": 4.694577317152391e-05, "loss": 0.8156, "step": 16550 }, { "epoch": 0.18, "learning_rate": 4.694485044438539e-05, "loss": 0.7851, "step": 16555 }, { "epoch": 0.18, "learning_rate": 4.6943927717246876e-05, "loss": 0.7548, "step": 16560 }, { "epoch": 0.18, "learning_rate": 4.694300499010837e-05, "loss": 0.7937, "step": 16565 }, { "epoch": 0.18, "learning_rate": 4.694208226296985e-05, "loss": 0.8191, "step": 16570 }, { "epoch": 0.18, "learning_rate": 4.694115953583134e-05, "loss": 0.8178, "step": 16575 }, { "epoch": 0.18, "learning_rate": 4.694023680869283e-05, "loss": 0.7547, "step": 16580 }, { "epoch": 0.18, "learning_rate": 4.693931408155432e-05, "loss": 0.731, "step": 16585 }, { "epoch": 0.18, "learning_rate": 4.6938391354415803e-05, "loss": 0.8188, "step": 16590 }, { "epoch": 0.18, "learning_rate": 4.693746862727729e-05, "loss": 0.8353, "step": 16595 }, { "epoch": 0.18, "learning_rate": 4.693654590013878e-05, "loss": 0.8173, "step": 16600 }, { "epoch": 0.18, "learning_rate": 4.6935623173000274e-05, "loss": 0.8352, "step": 16605 }, { "epoch": 0.18, "learning_rate": 4.6934700445861755e-05, "loss": 0.8189, "step": 16610 }, { "epoch": 0.18, "learning_rate": 4.693377771872324e-05, "loss": 0.8068, "step": 16615 }, { "epoch": 0.18, "learning_rate": 4.693285499158473e-05, "loss": 0.7829, "step": 16620 }, { "epoch": 0.18, "learning_rate": 4.693193226444622e-05, "loss": 0.7806, "step": 16625 }, { "epoch": 0.18, "learning_rate": 4.6931009537307706e-05, "loss": 0.8411, "step": 16630 }, { "epoch": 0.18, "learning_rate": 4.6930086810169194e-05, "loss": 0.7894, "step": 16635 }, { "epoch": 0.18, "learning_rate": 4.692916408303068e-05, "loss": 0.8596, "step": 16640 }, { "epoch": 0.18, "learning_rate": 4.692824135589216e-05, "loss": 0.8248, "step": 16645 }, { "epoch": 0.18, "learning_rate": 4.692731862875366e-05, "loss": 0.8, "step": 16650 }, { "epoch": 0.18, "learning_rate": 4.6926395901615145e-05, "loss": 0.8643, "step": 16655 }, { "epoch": 0.18, "learning_rate": 4.692547317447663e-05, "loss": 0.8347, "step": 16660 }, { "epoch": 0.18, "learning_rate": 4.6924550447338114e-05, "loss": 0.8422, "step": 16665 }, { "epoch": 0.18, "learning_rate": 4.692362772019961e-05, "loss": 0.8562, "step": 16670 }, { "epoch": 0.18, "learning_rate": 4.69227049930611e-05, "loss": 0.8391, "step": 16675 }, { "epoch": 0.18, "learning_rate": 4.6921782265922585e-05, "loss": 0.872, "step": 16680 }, { "epoch": 0.18, "learning_rate": 4.6920859538784066e-05, "loss": 0.8137, "step": 16685 }, { "epoch": 0.18, "learning_rate": 4.691993681164556e-05, "loss": 0.8668, "step": 16690 }, { "epoch": 0.18, "learning_rate": 4.691901408450705e-05, "loss": 0.8179, "step": 16695 }, { "epoch": 0.18, "learning_rate": 4.691809135736853e-05, "loss": 0.8131, "step": 16700 }, { "epoch": 0.18, "learning_rate": 4.691716863023002e-05, "loss": 0.8522, "step": 16705 }, { "epoch": 0.19, "learning_rate": 4.6916245903091505e-05, "loss": 0.8192, "step": 16710 }, { "epoch": 0.19, "learning_rate": 4.6915323175953e-05, "loss": 0.8142, "step": 16715 }, { "epoch": 0.19, "learning_rate": 4.691440044881448e-05, "loss": 0.8482, "step": 16720 }, { "epoch": 0.19, "learning_rate": 4.691347772167597e-05, "loss": 0.8019, "step": 16725 }, { "epoch": 0.19, "learning_rate": 4.6912554994537456e-05, "loss": 0.8547, "step": 16730 }, { "epoch": 0.19, "learning_rate": 4.6911632267398944e-05, "loss": 0.8228, "step": 16735 }, { "epoch": 0.19, "learning_rate": 4.691070954026043e-05, "loss": 0.8357, "step": 16740 }, { "epoch": 0.19, "learning_rate": 4.690978681312192e-05, "loss": 0.8346, "step": 16745 }, { "epoch": 0.19, "learning_rate": 4.690886408598341e-05, "loss": 0.8145, "step": 16750 }, { "epoch": 0.19, "learning_rate": 4.6907941358844896e-05, "loss": 0.8355, "step": 16755 }, { "epoch": 0.19, "learning_rate": 4.6907018631706383e-05, "loss": 0.8238, "step": 16760 }, { "epoch": 0.19, "learning_rate": 4.690609590456787e-05, "loss": 0.8516, "step": 16765 }, { "epoch": 0.19, "learning_rate": 4.690517317742936e-05, "loss": 0.8586, "step": 16770 }, { "epoch": 0.19, "learning_rate": 4.690425045029084e-05, "loss": 0.8246, "step": 16775 }, { "epoch": 0.19, "learning_rate": 4.6903327723152335e-05, "loss": 0.8195, "step": 16780 }, { "epoch": 0.19, "learning_rate": 4.690240499601382e-05, "loss": 0.8206, "step": 16785 }, { "epoch": 0.19, "learning_rate": 4.690148226887531e-05, "loss": 0.7826, "step": 16790 }, { "epoch": 0.19, "learning_rate": 4.690055954173679e-05, "loss": 0.8314, "step": 16795 }, { "epoch": 0.19, "learning_rate": 4.6899636814598286e-05, "loss": 0.7936, "step": 16800 }, { "epoch": 0.19, "learning_rate": 4.6898714087459774e-05, "loss": 0.8003, "step": 16805 }, { "epoch": 0.19, "learning_rate": 4.6897791360321255e-05, "loss": 0.8713, "step": 16810 }, { "epoch": 0.19, "learning_rate": 4.689686863318274e-05, "loss": 0.8272, "step": 16815 }, { "epoch": 0.19, "learning_rate": 4.689594590604424e-05, "loss": 0.8014, "step": 16820 }, { "epoch": 0.19, "learning_rate": 4.6895023178905726e-05, "loss": 0.8214, "step": 16825 }, { "epoch": 0.19, "learning_rate": 4.6894100451767207e-05, "loss": 0.7673, "step": 16830 }, { "epoch": 0.19, "learning_rate": 4.6893177724628694e-05, "loss": 0.8181, "step": 16835 }, { "epoch": 0.19, "learning_rate": 4.689225499749019e-05, "loss": 0.7989, "step": 16840 }, { "epoch": 0.19, "learning_rate": 4.689133227035167e-05, "loss": 0.8134, "step": 16845 }, { "epoch": 0.19, "learning_rate": 4.689040954321316e-05, "loss": 0.7523, "step": 16850 }, { "epoch": 0.19, "learning_rate": 4.6889486816074646e-05, "loss": 0.8512, "step": 16855 }, { "epoch": 0.19, "learning_rate": 4.6888564088936134e-05, "loss": 0.8016, "step": 16860 }, { "epoch": 0.19, "learning_rate": 4.688764136179762e-05, "loss": 0.8173, "step": 16865 }, { "epoch": 0.19, "learning_rate": 4.688671863465911e-05, "loss": 0.8467, "step": 16870 }, { "epoch": 0.19, "learning_rate": 4.68857959075206e-05, "loss": 0.7786, "step": 16875 }, { "epoch": 0.19, "learning_rate": 4.6884873180382085e-05, "loss": 0.7955, "step": 16880 }, { "epoch": 0.19, "learning_rate": 4.688395045324357e-05, "loss": 0.8114, "step": 16885 }, { "epoch": 0.19, "learning_rate": 4.688302772610506e-05, "loss": 0.8854, "step": 16890 }, { "epoch": 0.19, "learning_rate": 4.688210499896655e-05, "loss": 0.7888, "step": 16895 }, { "epoch": 0.19, "learning_rate": 4.6881182271828036e-05, "loss": 0.7698, "step": 16900 }, { "epoch": 0.19, "learning_rate": 4.6880259544689524e-05, "loss": 0.7898, "step": 16905 }, { "epoch": 0.19, "learning_rate": 4.687933681755101e-05, "loss": 0.7913, "step": 16910 }, { "epoch": 0.19, "learning_rate": 4.68784140904125e-05, "loss": 0.8446, "step": 16915 }, { "epoch": 0.19, "learning_rate": 4.687749136327398e-05, "loss": 0.762, "step": 16920 }, { "epoch": 0.19, "learning_rate": 4.6876568636135476e-05, "loss": 0.7622, "step": 16925 }, { "epoch": 0.19, "learning_rate": 4.6875645908996964e-05, "loss": 0.7636, "step": 16930 }, { "epoch": 0.19, "learning_rate": 4.687472318185845e-05, "loss": 0.8419, "step": 16935 }, { "epoch": 0.19, "learning_rate": 4.687380045471993e-05, "loss": 0.8693, "step": 16940 }, { "epoch": 0.19, "learning_rate": 4.687287772758142e-05, "loss": 0.8282, "step": 16945 }, { "epoch": 0.19, "learning_rate": 4.6871955000442915e-05, "loss": 0.8412, "step": 16950 }, { "epoch": 0.19, "learning_rate": 4.6871032273304396e-05, "loss": 0.8581, "step": 16955 }, { "epoch": 0.19, "learning_rate": 4.6870109546165884e-05, "loss": 0.8058, "step": 16960 }, { "epoch": 0.19, "learning_rate": 4.686918681902737e-05, "loss": 0.7907, "step": 16965 }, { "epoch": 0.19, "learning_rate": 4.6868264091888866e-05, "loss": 0.8293, "step": 16970 }, { "epoch": 0.19, "learning_rate": 4.686734136475035e-05, "loss": 0.8182, "step": 16975 }, { "epoch": 0.19, "learning_rate": 4.6866418637611835e-05, "loss": 0.7718, "step": 16980 }, { "epoch": 0.19, "learning_rate": 4.686549591047332e-05, "loss": 0.7853, "step": 16985 }, { "epoch": 0.19, "learning_rate": 4.686457318333482e-05, "loss": 0.8108, "step": 16990 }, { "epoch": 0.19, "learning_rate": 4.68636504561963e-05, "loss": 0.8201, "step": 16995 }, { "epoch": 0.19, "learning_rate": 4.686272772905779e-05, "loss": 0.8482, "step": 17000 }, { "epoch": 0.19, "eval_loss": 0.7562331557273865, "eval_runtime": 70.2087, "eval_samples_per_second": 28.487, "eval_steps_per_second": 14.243, "step": 17000 }, { "epoch": 0.19, "learning_rate": 4.6861805001919275e-05, "loss": 0.7919, "step": 17005 }, { "epoch": 0.19, "learning_rate": 4.686088227478076e-05, "loss": 0.8496, "step": 17010 }, { "epoch": 0.19, "learning_rate": 4.685995954764225e-05, "loss": 0.8031, "step": 17015 }, { "epoch": 0.19, "learning_rate": 4.685903682050374e-05, "loss": 0.818, "step": 17020 }, { "epoch": 0.19, "learning_rate": 4.6858114093365226e-05, "loss": 0.7955, "step": 17025 }, { "epoch": 0.19, "learning_rate": 4.685719136622671e-05, "loss": 0.8049, "step": 17030 }, { "epoch": 0.19, "learning_rate": 4.68562686390882e-05, "loss": 0.7997, "step": 17035 }, { "epoch": 0.19, "learning_rate": 4.685534591194969e-05, "loss": 0.7856, "step": 17040 }, { "epoch": 0.19, "learning_rate": 4.685442318481118e-05, "loss": 0.7865, "step": 17045 }, { "epoch": 0.19, "learning_rate": 4.685350045767266e-05, "loss": 0.848, "step": 17050 }, { "epoch": 0.19, "learning_rate": 4.685257773053415e-05, "loss": 0.8538, "step": 17055 }, { "epoch": 0.19, "learning_rate": 4.685165500339564e-05, "loss": 0.7804, "step": 17060 }, { "epoch": 0.19, "learning_rate": 4.685073227625713e-05, "loss": 0.8017, "step": 17065 }, { "epoch": 0.19, "learning_rate": 4.684980954911861e-05, "loss": 0.8998, "step": 17070 }, { "epoch": 0.19, "learning_rate": 4.6848886821980104e-05, "loss": 0.8303, "step": 17075 }, { "epoch": 0.19, "learning_rate": 4.684796409484159e-05, "loss": 0.8429, "step": 17080 }, { "epoch": 0.19, "learning_rate": 4.684704136770307e-05, "loss": 0.7917, "step": 17085 }, { "epoch": 0.19, "learning_rate": 4.684611864056456e-05, "loss": 0.7481, "step": 17090 }, { "epoch": 0.19, "learning_rate": 4.684519591342605e-05, "loss": 0.8531, "step": 17095 }, { "epoch": 0.19, "learning_rate": 4.6844273186287544e-05, "loss": 0.8008, "step": 17100 }, { "epoch": 0.19, "learning_rate": 4.6843350459149025e-05, "loss": 0.8647, "step": 17105 }, { "epoch": 0.19, "learning_rate": 4.684242773201051e-05, "loss": 0.8086, "step": 17110 }, { "epoch": 0.19, "learning_rate": 4.6841505004872e-05, "loss": 0.8057, "step": 17115 }, { "epoch": 0.19, "learning_rate": 4.684058227773349e-05, "loss": 0.8718, "step": 17120 }, { "epoch": 0.19, "learning_rate": 4.6839659550594976e-05, "loss": 0.8287, "step": 17125 }, { "epoch": 0.19, "learning_rate": 4.6838736823456464e-05, "loss": 0.8167, "step": 17130 }, { "epoch": 0.19, "learning_rate": 4.683781409631795e-05, "loss": 0.7728, "step": 17135 }, { "epoch": 0.19, "learning_rate": 4.683689136917944e-05, "loss": 0.878, "step": 17140 }, { "epoch": 0.19, "learning_rate": 4.683596864204093e-05, "loss": 0.8548, "step": 17145 }, { "epoch": 0.19, "learning_rate": 4.6835045914902415e-05, "loss": 0.8345, "step": 17150 }, { "epoch": 0.19, "learning_rate": 4.68341231877639e-05, "loss": 0.8155, "step": 17155 }, { "epoch": 0.19, "learning_rate": 4.6833200460625384e-05, "loss": 0.7774, "step": 17160 }, { "epoch": 0.19, "learning_rate": 4.683227773348688e-05, "loss": 0.839, "step": 17165 }, { "epoch": 0.19, "learning_rate": 4.683135500634837e-05, "loss": 0.8262, "step": 17170 }, { "epoch": 0.19, "learning_rate": 4.6830432279209855e-05, "loss": 0.7874, "step": 17175 }, { "epoch": 0.19, "learning_rate": 4.6829509552071336e-05, "loss": 0.8359, "step": 17180 }, { "epoch": 0.19, "learning_rate": 4.682858682493283e-05, "loss": 0.822, "step": 17185 }, { "epoch": 0.19, "learning_rate": 4.682766409779432e-05, "loss": 0.8033, "step": 17190 }, { "epoch": 0.19, "learning_rate": 4.68267413706558e-05, "loss": 0.7904, "step": 17195 }, { "epoch": 0.19, "learning_rate": 4.682581864351729e-05, "loss": 0.8626, "step": 17200 }, { "epoch": 0.19, "learning_rate": 4.682489591637878e-05, "loss": 0.8733, "step": 17205 }, { "epoch": 0.19, "learning_rate": 4.682397318924027e-05, "loss": 0.7869, "step": 17210 }, { "epoch": 0.19, "learning_rate": 4.682305046210175e-05, "loss": 0.7887, "step": 17215 }, { "epoch": 0.19, "learning_rate": 4.682212773496324e-05, "loss": 0.8509, "step": 17220 }, { "epoch": 0.19, "learning_rate": 4.682120500782473e-05, "loss": 0.7941, "step": 17225 }, { "epoch": 0.19, "learning_rate": 4.6820282280686214e-05, "loss": 0.7713, "step": 17230 }, { "epoch": 0.19, "learning_rate": 4.68193595535477e-05, "loss": 0.8354, "step": 17235 }, { "epoch": 0.19, "learning_rate": 4.681843682640919e-05, "loss": 0.8011, "step": 17240 }, { "epoch": 0.19, "learning_rate": 4.681751409927068e-05, "loss": 0.7855, "step": 17245 }, { "epoch": 0.19, "learning_rate": 4.6816591372132166e-05, "loss": 0.7237, "step": 17250 }, { "epoch": 0.19, "learning_rate": 4.681566864499365e-05, "loss": 0.8394, "step": 17255 }, { "epoch": 0.19, "learning_rate": 4.681474591785514e-05, "loss": 0.7745, "step": 17260 }, { "epoch": 0.19, "learning_rate": 4.681382319071663e-05, "loss": 0.8628, "step": 17265 }, { "epoch": 0.19, "learning_rate": 4.681290046357812e-05, "loss": 0.8331, "step": 17270 }, { "epoch": 0.19, "learning_rate": 4.6811977736439605e-05, "loss": 0.7659, "step": 17275 }, { "epoch": 0.19, "learning_rate": 4.681105500930109e-05, "loss": 0.8088, "step": 17280 }, { "epoch": 0.19, "learning_rate": 4.681013228216258e-05, "loss": 0.7978, "step": 17285 }, { "epoch": 0.19, "learning_rate": 4.680920955502407e-05, "loss": 0.8859, "step": 17290 }, { "epoch": 0.19, "learning_rate": 4.6808286827885556e-05, "loss": 0.8141, "step": 17295 }, { "epoch": 0.19, "learning_rate": 4.6807364100747044e-05, "loss": 0.8161, "step": 17300 }, { "epoch": 0.19, "learning_rate": 4.6806441373608525e-05, "loss": 0.8696, "step": 17305 }, { "epoch": 0.19, "learning_rate": 4.680551864647001e-05, "loss": 0.8114, "step": 17310 }, { "epoch": 0.19, "learning_rate": 4.680459591933151e-05, "loss": 0.7813, "step": 17315 }, { "epoch": 0.19, "learning_rate": 4.6803673192192995e-05, "loss": 0.7883, "step": 17320 }, { "epoch": 0.19, "learning_rate": 4.6802750465054476e-05, "loss": 0.8277, "step": 17325 }, { "epoch": 0.19, "learning_rate": 4.6801827737915964e-05, "loss": 0.814, "step": 17330 }, { "epoch": 0.19, "learning_rate": 4.680090501077746e-05, "loss": 0.7999, "step": 17335 }, { "epoch": 0.19, "learning_rate": 4.679998228363895e-05, "loss": 0.7912, "step": 17340 }, { "epoch": 0.19, "learning_rate": 4.679905955650043e-05, "loss": 0.8848, "step": 17345 }, { "epoch": 0.19, "learning_rate": 4.6798136829361916e-05, "loss": 0.771, "step": 17350 }, { "epoch": 0.19, "learning_rate": 4.679721410222341e-05, "loss": 0.8797, "step": 17355 }, { "epoch": 0.19, "learning_rate": 4.679629137508489e-05, "loss": 0.8294, "step": 17360 }, { "epoch": 0.19, "learning_rate": 4.679536864794638e-05, "loss": 0.7974, "step": 17365 }, { "epoch": 0.19, "learning_rate": 4.679444592080787e-05, "loss": 0.8416, "step": 17370 }, { "epoch": 0.19, "learning_rate": 4.679352319366936e-05, "loss": 0.812, "step": 17375 }, { "epoch": 0.19, "learning_rate": 4.679260046653084e-05, "loss": 0.7645, "step": 17380 }, { "epoch": 0.19, "learning_rate": 4.679167773939233e-05, "loss": 0.7851, "step": 17385 }, { "epoch": 0.19, "learning_rate": 4.679075501225382e-05, "loss": 0.858, "step": 17390 }, { "epoch": 0.19, "learning_rate": 4.6789832285115306e-05, "loss": 0.7894, "step": 17395 }, { "epoch": 0.19, "learning_rate": 4.6788909557976794e-05, "loss": 0.7788, "step": 17400 }, { "epoch": 0.19, "learning_rate": 4.678798683083828e-05, "loss": 0.8021, "step": 17405 }, { "epoch": 0.19, "learning_rate": 4.678706410369977e-05, "loss": 0.8383, "step": 17410 }, { "epoch": 0.19, "learning_rate": 4.678614137656126e-05, "loss": 0.817, "step": 17415 }, { "epoch": 0.19, "learning_rate": 4.6785218649422746e-05, "loss": 0.7992, "step": 17420 }, { "epoch": 0.19, "learning_rate": 4.6784295922284233e-05, "loss": 0.8486, "step": 17425 }, { "epoch": 0.19, "learning_rate": 4.678337319514572e-05, "loss": 0.8361, "step": 17430 }, { "epoch": 0.19, "learning_rate": 4.67824504680072e-05, "loss": 0.8182, "step": 17435 }, { "epoch": 0.19, "learning_rate": 4.67815277408687e-05, "loss": 0.7748, "step": 17440 }, { "epoch": 0.19, "learning_rate": 4.6780605013730185e-05, "loss": 0.8466, "step": 17445 }, { "epoch": 0.19, "learning_rate": 4.677968228659167e-05, "loss": 0.7339, "step": 17450 }, { "epoch": 0.19, "learning_rate": 4.6778759559453154e-05, "loss": 0.8093, "step": 17455 }, { "epoch": 0.19, "learning_rate": 4.677783683231464e-05, "loss": 0.8534, "step": 17460 }, { "epoch": 0.19, "learning_rate": 4.6776914105176136e-05, "loss": 0.8, "step": 17465 }, { "epoch": 0.19, "learning_rate": 4.677599137803762e-05, "loss": 0.8057, "step": 17470 }, { "epoch": 0.19, "learning_rate": 4.6775068650899105e-05, "loss": 0.8235, "step": 17475 }, { "epoch": 0.19, "learning_rate": 4.677414592376059e-05, "loss": 0.8227, "step": 17480 }, { "epoch": 0.19, "learning_rate": 4.677322319662209e-05, "loss": 0.784, "step": 17485 }, { "epoch": 0.19, "learning_rate": 4.677230046948357e-05, "loss": 0.8255, "step": 17490 }, { "epoch": 0.19, "learning_rate": 4.6771377742345057e-05, "loss": 0.8332, "step": 17495 }, { "epoch": 0.19, "learning_rate": 4.6770455015206544e-05, "loss": 0.8131, "step": 17500 }, { "epoch": 0.19, "learning_rate": 4.676953228806803e-05, "loss": 0.9121, "step": 17505 }, { "epoch": 0.19, "learning_rate": 4.676860956092952e-05, "loss": 0.8901, "step": 17510 }, { "epoch": 0.19, "learning_rate": 4.676768683379101e-05, "loss": 0.7978, "step": 17515 }, { "epoch": 0.19, "learning_rate": 4.6766764106652496e-05, "loss": 0.8189, "step": 17520 }, { "epoch": 0.19, "learning_rate": 4.6765841379513984e-05, "loss": 0.9114, "step": 17525 }, { "epoch": 0.19, "learning_rate": 4.676491865237547e-05, "loss": 0.891, "step": 17530 }, { "epoch": 0.19, "learning_rate": 4.676399592523696e-05, "loss": 0.8765, "step": 17535 }, { "epoch": 0.19, "learning_rate": 4.676307319809845e-05, "loss": 0.8828, "step": 17540 }, { "epoch": 0.19, "learning_rate": 4.676215047095993e-05, "loss": 0.914, "step": 17545 }, { "epoch": 0.19, "learning_rate": 4.676122774382142e-05, "loss": 0.8435, "step": 17550 }, { "epoch": 0.19, "learning_rate": 4.676030501668291e-05, "loss": 0.8253, "step": 17555 }, { "epoch": 0.19, "learning_rate": 4.67593822895444e-05, "loss": 0.9072, "step": 17560 }, { "epoch": 0.19, "learning_rate": 4.675845956240588e-05, "loss": 0.8431, "step": 17565 }, { "epoch": 0.19, "learning_rate": 4.6757536835267374e-05, "loss": 0.8287, "step": 17570 }, { "epoch": 0.19, "learning_rate": 4.675661410812886e-05, "loss": 0.9313, "step": 17575 }, { "epoch": 0.19, "learning_rate": 4.675569138099034e-05, "loss": 0.8867, "step": 17580 }, { "epoch": 0.19, "learning_rate": 4.675476865385183e-05, "loss": 0.9207, "step": 17585 }, { "epoch": 0.19, "learning_rate": 4.6753845926713326e-05, "loss": 0.8029, "step": 17590 }, { "epoch": 0.19, "learning_rate": 4.6752923199574814e-05, "loss": 0.9554, "step": 17595 }, { "epoch": 0.19, "learning_rate": 4.6752000472436295e-05, "loss": 0.8623, "step": 17600 }, { "epoch": 0.19, "learning_rate": 4.675107774529778e-05, "loss": 0.9008, "step": 17605 }, { "epoch": 0.19, "learning_rate": 4.675015501815927e-05, "loss": 0.9764, "step": 17610 }, { "epoch": 0.2, "learning_rate": 4.674923229102076e-05, "loss": 0.8728, "step": 17615 }, { "epoch": 0.2, "learning_rate": 4.6748309563882246e-05, "loss": 0.8843, "step": 17620 }, { "epoch": 0.2, "learning_rate": 4.6747386836743734e-05, "loss": 0.898, "step": 17625 }, { "epoch": 0.2, "learning_rate": 4.674646410960522e-05, "loss": 0.8959, "step": 17630 }, { "epoch": 0.2, "learning_rate": 4.674554138246671e-05, "loss": 0.891, "step": 17635 }, { "epoch": 0.2, "learning_rate": 4.67446186553282e-05, "loss": 0.8072, "step": 17640 }, { "epoch": 0.2, "learning_rate": 4.6743695928189685e-05, "loss": 0.9152, "step": 17645 }, { "epoch": 0.2, "learning_rate": 4.674277320105117e-05, "loss": 0.8284, "step": 17650 }, { "epoch": 0.2, "learning_rate": 4.674185047391266e-05, "loss": 0.9174, "step": 17655 }, { "epoch": 0.2, "learning_rate": 4.674092774677415e-05, "loss": 0.9224, "step": 17660 }, { "epoch": 0.2, "learning_rate": 4.674000501963564e-05, "loss": 0.9383, "step": 17665 }, { "epoch": 0.2, "learning_rate": 4.6739082292497124e-05, "loss": 0.9194, "step": 17670 }, { "epoch": 0.2, "learning_rate": 4.673815956535861e-05, "loss": 0.9105, "step": 17675 }, { "epoch": 0.2, "learning_rate": 4.67372368382201e-05, "loss": 0.8332, "step": 17680 }, { "epoch": 0.2, "learning_rate": 4.673631411108159e-05, "loss": 0.8354, "step": 17685 }, { "epoch": 0.2, "learning_rate": 4.673539138394307e-05, "loss": 0.8885, "step": 17690 }, { "epoch": 0.2, "learning_rate": 4.673446865680456e-05, "loss": 0.827, "step": 17695 }, { "epoch": 0.2, "learning_rate": 4.673354592966605e-05, "loss": 0.8958, "step": 17700 }, { "epoch": 0.2, "learning_rate": 4.673262320252754e-05, "loss": 0.8505, "step": 17705 }, { "epoch": 0.2, "learning_rate": 4.673170047538902e-05, "loss": 0.8524, "step": 17710 }, { "epoch": 0.2, "learning_rate": 4.673077774825051e-05, "loss": 0.7994, "step": 17715 }, { "epoch": 0.2, "learning_rate": 4.6729855021112e-05, "loss": 0.855, "step": 17720 }, { "epoch": 0.2, "learning_rate": 4.672893229397349e-05, "loss": 0.8762, "step": 17725 }, { "epoch": 0.2, "learning_rate": 4.672800956683497e-05, "loss": 0.8996, "step": 17730 }, { "epoch": 0.2, "learning_rate": 4.672708683969646e-05, "loss": 0.9077, "step": 17735 }, { "epoch": 0.2, "learning_rate": 4.6726164112557954e-05, "loss": 0.8611, "step": 17740 }, { "epoch": 0.2, "learning_rate": 4.6725241385419435e-05, "loss": 0.9225, "step": 17745 }, { "epoch": 0.2, "learning_rate": 4.672431865828092e-05, "loss": 0.941, "step": 17750 }, { "epoch": 0.2, "learning_rate": 4.672339593114241e-05, "loss": 0.9293, "step": 17755 }, { "epoch": 0.2, "learning_rate": 4.6722473204003906e-05, "loss": 0.8505, "step": 17760 }, { "epoch": 0.2, "learning_rate": 4.672155047686539e-05, "loss": 0.8809, "step": 17765 }, { "epoch": 0.2, "learning_rate": 4.6720627749726875e-05, "loss": 0.8528, "step": 17770 }, { "epoch": 0.2, "learning_rate": 4.671970502258836e-05, "loss": 0.8613, "step": 17775 }, { "epoch": 0.2, "learning_rate": 4.671878229544985e-05, "loss": 0.8889, "step": 17780 }, { "epoch": 0.2, "learning_rate": 4.671785956831134e-05, "loss": 0.8896, "step": 17785 }, { "epoch": 0.2, "learning_rate": 4.6716936841172826e-05, "loss": 0.9062, "step": 17790 }, { "epoch": 0.2, "learning_rate": 4.6716014114034314e-05, "loss": 0.8793, "step": 17795 }, { "epoch": 0.2, "learning_rate": 4.67150913868958e-05, "loss": 0.8963, "step": 17800 }, { "epoch": 0.2, "learning_rate": 4.671416865975729e-05, "loss": 0.8276, "step": 17805 }, { "epoch": 0.2, "learning_rate": 4.671324593261878e-05, "loss": 0.9384, "step": 17810 }, { "epoch": 0.2, "learning_rate": 4.6712323205480265e-05, "loss": 0.8855, "step": 17815 }, { "epoch": 0.2, "learning_rate": 4.6711400478341746e-05, "loss": 0.8551, "step": 17820 }, { "epoch": 0.2, "learning_rate": 4.671047775120324e-05, "loss": 0.9072, "step": 17825 }, { "epoch": 0.2, "learning_rate": 4.670955502406473e-05, "loss": 0.8719, "step": 17830 }, { "epoch": 0.2, "learning_rate": 4.670863229692622e-05, "loss": 0.8316, "step": 17835 }, { "epoch": 0.2, "learning_rate": 4.67077095697877e-05, "loss": 0.8083, "step": 17840 }, { "epoch": 0.2, "learning_rate": 4.6706786842649186e-05, "loss": 0.8707, "step": 17845 }, { "epoch": 0.2, "learning_rate": 4.670586411551068e-05, "loss": 0.8084, "step": 17850 }, { "epoch": 0.2, "learning_rate": 4.670494138837216e-05, "loss": 0.8175, "step": 17855 }, { "epoch": 0.2, "learning_rate": 4.670401866123365e-05, "loss": 0.8731, "step": 17860 }, { "epoch": 0.2, "learning_rate": 4.670309593409514e-05, "loss": 0.9141, "step": 17865 }, { "epoch": 0.2, "learning_rate": 4.670217320695663e-05, "loss": 0.9058, "step": 17870 }, { "epoch": 0.2, "learning_rate": 4.670125047981811e-05, "loss": 0.9388, "step": 17875 }, { "epoch": 0.2, "learning_rate": 4.67003277526796e-05, "loss": 0.8796, "step": 17880 }, { "epoch": 0.2, "learning_rate": 4.669940502554109e-05, "loss": 0.9071, "step": 17885 }, { "epoch": 0.2, "learning_rate": 4.6698482298402576e-05, "loss": 0.8703, "step": 17890 }, { "epoch": 0.2, "learning_rate": 4.6697559571264064e-05, "loss": 0.97, "step": 17895 }, { "epoch": 0.2, "learning_rate": 4.669663684412555e-05, "loss": 0.873, "step": 17900 }, { "epoch": 0.2, "learning_rate": 4.669571411698704e-05, "loss": 0.8115, "step": 17905 }, { "epoch": 0.2, "learning_rate": 4.669479138984853e-05, "loss": 0.8541, "step": 17910 }, { "epoch": 0.2, "learning_rate": 4.6693868662710016e-05, "loss": 0.9106, "step": 17915 }, { "epoch": 0.2, "learning_rate": 4.66929459355715e-05, "loss": 0.9269, "step": 17920 }, { "epoch": 0.2, "learning_rate": 4.669202320843299e-05, "loss": 0.8494, "step": 17925 }, { "epoch": 0.2, "learning_rate": 4.669110048129447e-05, "loss": 0.9023, "step": 17930 }, { "epoch": 0.2, "learning_rate": 4.669017775415597e-05, "loss": 0.9111, "step": 17935 }, { "epoch": 0.2, "learning_rate": 4.6689255027017455e-05, "loss": 0.9239, "step": 17940 }, { "epoch": 0.2, "learning_rate": 4.668833229987894e-05, "loss": 0.9301, "step": 17945 }, { "epoch": 0.2, "learning_rate": 4.6687409572740424e-05, "loss": 0.8391, "step": 17950 }, { "epoch": 0.2, "learning_rate": 4.668648684560192e-05, "loss": 0.9303, "step": 17955 }, { "epoch": 0.2, "learning_rate": 4.6685564118463406e-05, "loss": 0.9729, "step": 17960 }, { "epoch": 0.2, "learning_rate": 4.668464139132489e-05, "loss": 0.8665, "step": 17965 }, { "epoch": 0.2, "learning_rate": 4.6683718664186375e-05, "loss": 0.9606, "step": 17970 }, { "epoch": 0.2, "learning_rate": 4.668279593704787e-05, "loss": 0.7959, "step": 17975 }, { "epoch": 0.2, "learning_rate": 4.668187320990936e-05, "loss": 0.8447, "step": 17980 }, { "epoch": 0.2, "learning_rate": 4.668095048277084e-05, "loss": 0.9327, "step": 17985 }, { "epoch": 0.2, "learning_rate": 4.6680027755632326e-05, "loss": 0.9207, "step": 17990 }, { "epoch": 0.2, "learning_rate": 4.6679105028493814e-05, "loss": 0.8302, "step": 17995 }, { "epoch": 0.2, "learning_rate": 4.66781823013553e-05, "loss": 0.8716, "step": 18000 }, { "epoch": 0.2, "eval_loss": 0.7818576693534851, "eval_runtime": 70.0014, "eval_samples_per_second": 28.571, "eval_steps_per_second": 14.285, "step": 18000 }, { "epoch": 0.2, "learning_rate": 4.667725957421679e-05, "loss": 0.8053, "step": 18005 }, { "epoch": 0.2, "learning_rate": 4.667633684707828e-05, "loss": 0.9584, "step": 18010 }, { "epoch": 0.2, "learning_rate": 4.6675414119939766e-05, "loss": 0.8671, "step": 18015 }, { "epoch": 0.2, "learning_rate": 4.6674491392801254e-05, "loss": 0.8888, "step": 18020 }, { "epoch": 0.2, "learning_rate": 4.667356866566274e-05, "loss": 0.8422, "step": 18025 }, { "epoch": 0.2, "learning_rate": 4.667264593852423e-05, "loss": 0.8606, "step": 18030 }, { "epoch": 0.2, "learning_rate": 4.667172321138572e-05, "loss": 0.8882, "step": 18035 }, { "epoch": 0.2, "learning_rate": 4.6670800484247205e-05, "loss": 0.8457, "step": 18040 }, { "epoch": 0.2, "learning_rate": 4.666987775710869e-05, "loss": 0.8893, "step": 18045 }, { "epoch": 0.2, "learning_rate": 4.666895502997018e-05, "loss": 0.9129, "step": 18050 }, { "epoch": 0.2, "learning_rate": 4.666803230283167e-05, "loss": 0.8385, "step": 18055 }, { "epoch": 0.2, "learning_rate": 4.6667109575693156e-05, "loss": 0.8087, "step": 18060 }, { "epoch": 0.2, "learning_rate": 4.6666186848554644e-05, "loss": 0.8645, "step": 18065 }, { "epoch": 0.2, "learning_rate": 4.666526412141613e-05, "loss": 0.8816, "step": 18070 }, { "epoch": 0.2, "learning_rate": 4.666434139427761e-05, "loss": 0.842, "step": 18075 }, { "epoch": 0.2, "learning_rate": 4.66634186671391e-05, "loss": 0.8404, "step": 18080 }, { "epoch": 0.2, "learning_rate": 4.6662495940000596e-05, "loss": 0.7746, "step": 18085 }, { "epoch": 0.2, "learning_rate": 4.6661573212862083e-05, "loss": 0.8902, "step": 18090 }, { "epoch": 0.2, "learning_rate": 4.6660650485723565e-05, "loss": 0.8715, "step": 18095 }, { "epoch": 0.2, "learning_rate": 4.665972775858505e-05, "loss": 0.8587, "step": 18100 }, { "epoch": 0.2, "learning_rate": 4.665880503144655e-05, "loss": 0.9138, "step": 18105 }, { "epoch": 0.2, "learning_rate": 4.6657882304308035e-05, "loss": 0.8838, "step": 18110 }, { "epoch": 0.2, "learning_rate": 4.6656959577169516e-05, "loss": 0.8556, "step": 18115 }, { "epoch": 0.2, "learning_rate": 4.6656036850031004e-05, "loss": 0.881, "step": 18120 }, { "epoch": 0.2, "learning_rate": 4.66551141228925e-05, "loss": 0.9344, "step": 18125 }, { "epoch": 0.2, "learning_rate": 4.665419139575398e-05, "loss": 0.7763, "step": 18130 }, { "epoch": 0.2, "learning_rate": 4.665326866861547e-05, "loss": 0.9443, "step": 18135 }, { "epoch": 0.2, "learning_rate": 4.6652345941476955e-05, "loss": 0.9473, "step": 18140 }, { "epoch": 0.2, "learning_rate": 4.665142321433844e-05, "loss": 0.8149, "step": 18145 }, { "epoch": 0.2, "learning_rate": 4.665050048719993e-05, "loss": 0.8323, "step": 18150 }, { "epoch": 0.2, "learning_rate": 4.664957776006142e-05, "loss": 1.0098, "step": 18155 }, { "epoch": 0.2, "learning_rate": 4.6648655032922907e-05, "loss": 0.9488, "step": 18160 }, { "epoch": 0.2, "learning_rate": 4.6647732305784394e-05, "loss": 0.8997, "step": 18165 }, { "epoch": 0.2, "learning_rate": 4.664680957864588e-05, "loss": 0.8575, "step": 18170 }, { "epoch": 0.2, "learning_rate": 4.664588685150737e-05, "loss": 0.9082, "step": 18175 }, { "epoch": 0.2, "learning_rate": 4.664496412436886e-05, "loss": 0.911, "step": 18180 }, { "epoch": 0.2, "learning_rate": 4.6644041397230346e-05, "loss": 0.9284, "step": 18185 }, { "epoch": 0.2, "learning_rate": 4.6643118670091834e-05, "loss": 0.8316, "step": 18190 }, { "epoch": 0.2, "learning_rate": 4.664219594295332e-05, "loss": 0.9155, "step": 18195 }, { "epoch": 0.2, "learning_rate": 4.664127321581481e-05, "loss": 0.8705, "step": 18200 }, { "epoch": 0.2, "learning_rate": 4.664035048867629e-05, "loss": 0.899, "step": 18205 }, { "epoch": 0.2, "learning_rate": 4.6639427761537785e-05, "loss": 0.8475, "step": 18210 }, { "epoch": 0.2, "learning_rate": 4.663850503439927e-05, "loss": 0.8104, "step": 18215 }, { "epoch": 0.2, "learning_rate": 4.663758230726076e-05, "loss": 0.9002, "step": 18220 }, { "epoch": 0.2, "learning_rate": 4.663665958012224e-05, "loss": 0.8938, "step": 18225 }, { "epoch": 0.2, "learning_rate": 4.663573685298373e-05, "loss": 0.8759, "step": 18230 }, { "epoch": 0.2, "learning_rate": 4.6634814125845224e-05, "loss": 0.8738, "step": 18235 }, { "epoch": 0.2, "learning_rate": 4.6633891398706705e-05, "loss": 0.9331, "step": 18240 }, { "epoch": 0.2, "learning_rate": 4.663296867156819e-05, "loss": 0.892, "step": 18245 }, { "epoch": 0.2, "learning_rate": 4.663204594442968e-05, "loss": 0.9054, "step": 18250 }, { "epoch": 0.2, "learning_rate": 4.6631123217291176e-05, "loss": 0.8335, "step": 18255 }, { "epoch": 0.2, "learning_rate": 4.663020049015266e-05, "loss": 0.8642, "step": 18260 }, { "epoch": 0.2, "learning_rate": 4.6629277763014145e-05, "loss": 0.8207, "step": 18265 }, { "epoch": 0.2, "learning_rate": 4.662835503587563e-05, "loss": 0.9547, "step": 18270 }, { "epoch": 0.2, "learning_rate": 4.662743230873712e-05, "loss": 0.8714, "step": 18275 }, { "epoch": 0.2, "learning_rate": 4.662650958159861e-05, "loss": 0.8856, "step": 18280 }, { "epoch": 0.2, "learning_rate": 4.6625586854460096e-05, "loss": 0.8568, "step": 18285 }, { "epoch": 0.2, "learning_rate": 4.6624664127321584e-05, "loss": 0.8859, "step": 18290 }, { "epoch": 0.2, "learning_rate": 4.662374140018307e-05, "loss": 0.866, "step": 18295 }, { "epoch": 0.2, "learning_rate": 4.662281867304456e-05, "loss": 0.8709, "step": 18300 }, { "epoch": 0.2, "learning_rate": 4.662189594590605e-05, "loss": 0.9422, "step": 18305 }, { "epoch": 0.2, "learning_rate": 4.6620973218767535e-05, "loss": 0.8924, "step": 18310 }, { "epoch": 0.2, "learning_rate": 4.6620050491629016e-05, "loss": 0.864, "step": 18315 }, { "epoch": 0.2, "learning_rate": 4.661912776449051e-05, "loss": 0.9403, "step": 18320 }, { "epoch": 0.2, "learning_rate": 4.6618205037352e-05, "loss": 0.8862, "step": 18325 }, { "epoch": 0.2, "learning_rate": 4.661728231021349e-05, "loss": 0.8931, "step": 18330 }, { "epoch": 0.2, "learning_rate": 4.661635958307497e-05, "loss": 0.8826, "step": 18335 }, { "epoch": 0.2, "learning_rate": 4.661543685593646e-05, "loss": 0.9373, "step": 18340 }, { "epoch": 0.2, "learning_rate": 4.661451412879795e-05, "loss": 0.8369, "step": 18345 }, { "epoch": 0.2, "learning_rate": 4.661359140165943e-05, "loss": 0.9757, "step": 18350 }, { "epoch": 0.2, "learning_rate": 4.661266867452092e-05, "loss": 0.8455, "step": 18355 }, { "epoch": 0.2, "learning_rate": 4.6611745947382414e-05, "loss": 0.9295, "step": 18360 }, { "epoch": 0.2, "learning_rate": 4.66108232202439e-05, "loss": 0.9159, "step": 18365 }, { "epoch": 0.2, "learning_rate": 4.660990049310538e-05, "loss": 0.9268, "step": 18370 }, { "epoch": 0.2, "learning_rate": 4.660897776596687e-05, "loss": 0.9466, "step": 18375 }, { "epoch": 0.2, "learning_rate": 4.660805503882836e-05, "loss": 0.8617, "step": 18380 }, { "epoch": 0.2, "learning_rate": 4.6607132311689846e-05, "loss": 0.9187, "step": 18385 }, { "epoch": 0.2, "learning_rate": 4.6606209584551334e-05, "loss": 0.8742, "step": 18390 }, { "epoch": 0.2, "learning_rate": 4.660528685741282e-05, "loss": 0.8807, "step": 18395 }, { "epoch": 0.2, "learning_rate": 4.660436413027431e-05, "loss": 0.7838, "step": 18400 }, { "epoch": 0.2, "learning_rate": 4.66034414031358e-05, "loss": 0.936, "step": 18405 }, { "epoch": 0.2, "learning_rate": 4.6602518675997285e-05, "loss": 0.8339, "step": 18410 }, { "epoch": 0.2, "learning_rate": 4.660159594885877e-05, "loss": 0.7946, "step": 18415 }, { "epoch": 0.2, "learning_rate": 4.660067322172026e-05, "loss": 0.9102, "step": 18420 }, { "epoch": 0.2, "learning_rate": 4.659975049458175e-05, "loss": 0.8265, "step": 18425 }, { "epoch": 0.2, "learning_rate": 4.659882776744324e-05, "loss": 0.8748, "step": 18430 }, { "epoch": 0.2, "learning_rate": 4.6597905040304725e-05, "loss": 0.8633, "step": 18435 }, { "epoch": 0.2, "learning_rate": 4.659698231316621e-05, "loss": 0.8811, "step": 18440 }, { "epoch": 0.2, "learning_rate": 4.6596059586027694e-05, "loss": 0.9285, "step": 18445 }, { "epoch": 0.2, "learning_rate": 4.659513685888919e-05, "loss": 0.9028, "step": 18450 }, { "epoch": 0.2, "learning_rate": 4.6594214131750676e-05, "loss": 0.8635, "step": 18455 }, { "epoch": 0.2, "learning_rate": 4.659329140461216e-05, "loss": 0.8456, "step": 18460 }, { "epoch": 0.2, "learning_rate": 4.6592368677473645e-05, "loss": 0.8685, "step": 18465 }, { "epoch": 0.2, "learning_rate": 4.659144595033514e-05, "loss": 0.9304, "step": 18470 }, { "epoch": 0.2, "learning_rate": 4.659052322319663e-05, "loss": 0.8561, "step": 18475 }, { "epoch": 0.2, "learning_rate": 4.658960049605811e-05, "loss": 0.8919, "step": 18480 }, { "epoch": 0.2, "learning_rate": 4.6588677768919596e-05, "loss": 0.8704, "step": 18485 }, { "epoch": 0.2, "learning_rate": 4.658775504178109e-05, "loss": 0.848, "step": 18490 }, { "epoch": 0.2, "learning_rate": 4.658683231464258e-05, "loss": 0.9018, "step": 18495 }, { "epoch": 0.2, "learning_rate": 4.658590958750406e-05, "loss": 0.8157, "step": 18500 }, { "epoch": 0.2, "learning_rate": 4.658498686036555e-05, "loss": 0.8087, "step": 18505 }, { "epoch": 0.2, "learning_rate": 4.658406413322704e-05, "loss": 0.8882, "step": 18510 }, { "epoch": 0.21, "learning_rate": 4.6583141406088523e-05, "loss": 0.8908, "step": 18515 }, { "epoch": 0.21, "learning_rate": 4.658221867895001e-05, "loss": 0.8519, "step": 18520 }, { "epoch": 0.21, "learning_rate": 4.65812959518115e-05, "loss": 0.8253, "step": 18525 }, { "epoch": 0.21, "learning_rate": 4.658037322467299e-05, "loss": 0.9624, "step": 18530 }, { "epoch": 0.21, "learning_rate": 4.6579450497534475e-05, "loss": 0.8438, "step": 18535 }, { "epoch": 0.21, "learning_rate": 4.657852777039596e-05, "loss": 0.871, "step": 18540 }, { "epoch": 0.21, "learning_rate": 4.657760504325745e-05, "loss": 0.829, "step": 18545 }, { "epoch": 0.21, "learning_rate": 4.657668231611894e-05, "loss": 0.8759, "step": 18550 }, { "epoch": 0.21, "learning_rate": 4.6575759588980426e-05, "loss": 0.8853, "step": 18555 }, { "epoch": 0.21, "learning_rate": 4.6574836861841914e-05, "loss": 0.9058, "step": 18560 }, { "epoch": 0.21, "learning_rate": 4.65739141347034e-05, "loss": 0.8289, "step": 18565 }, { "epoch": 0.21, "learning_rate": 4.657299140756489e-05, "loss": 0.8928, "step": 18570 }, { "epoch": 0.21, "learning_rate": 4.657206868042638e-05, "loss": 0.8284, "step": 18575 }, { "epoch": 0.21, "learning_rate": 4.6571145953287866e-05, "loss": 0.9214, "step": 18580 }, { "epoch": 0.21, "learning_rate": 4.657022322614935e-05, "loss": 0.9197, "step": 18585 }, { "epoch": 0.21, "learning_rate": 4.6569300499010834e-05, "loss": 0.9328, "step": 18590 }, { "epoch": 0.21, "learning_rate": 4.656837777187233e-05, "loss": 0.8821, "step": 18595 }, { "epoch": 0.21, "learning_rate": 4.656745504473382e-05, "loss": 0.8607, "step": 18600 }, { "epoch": 0.21, "learning_rate": 4.6566532317595305e-05, "loss": 0.7959, "step": 18605 }, { "epoch": 0.21, "learning_rate": 4.6565609590456786e-05, "loss": 0.8517, "step": 18610 }, { "epoch": 0.21, "learning_rate": 4.6564686863318274e-05, "loss": 0.9517, "step": 18615 }, { "epoch": 0.21, "learning_rate": 4.656376413617977e-05, "loss": 0.8029, "step": 18620 }, { "epoch": 0.21, "learning_rate": 4.656284140904125e-05, "loss": 0.8964, "step": 18625 }, { "epoch": 0.21, "learning_rate": 4.656191868190274e-05, "loss": 0.9101, "step": 18630 }, { "epoch": 0.21, "learning_rate": 4.6560995954764225e-05, "loss": 0.856, "step": 18635 }, { "epoch": 0.21, "learning_rate": 4.656007322762572e-05, "loss": 0.9051, "step": 18640 }, { "epoch": 0.21, "learning_rate": 4.65591505004872e-05, "loss": 0.9554, "step": 18645 }, { "epoch": 0.21, "learning_rate": 4.655822777334869e-05, "loss": 0.8428, "step": 18650 }, { "epoch": 0.21, "learning_rate": 4.6557305046210176e-05, "loss": 0.9237, "step": 18655 }, { "epoch": 0.21, "learning_rate": 4.6556382319071664e-05, "loss": 0.8473, "step": 18660 }, { "epoch": 0.21, "learning_rate": 4.655545959193315e-05, "loss": 0.8402, "step": 18665 }, { "epoch": 0.21, "learning_rate": 4.655453686479464e-05, "loss": 0.9025, "step": 18670 }, { "epoch": 0.21, "learning_rate": 4.655361413765613e-05, "loss": 0.8828, "step": 18675 }, { "epoch": 0.21, "learning_rate": 4.6552691410517616e-05, "loss": 0.8588, "step": 18680 }, { "epoch": 0.21, "learning_rate": 4.6551768683379104e-05, "loss": 0.8767, "step": 18685 }, { "epoch": 0.21, "learning_rate": 4.655084595624059e-05, "loss": 0.9239, "step": 18690 }, { "epoch": 0.21, "learning_rate": 4.654992322910208e-05, "loss": 0.8707, "step": 18695 }, { "epoch": 0.21, "learning_rate": 4.654900050196356e-05, "loss": 0.9224, "step": 18700 }, { "epoch": 0.21, "learning_rate": 4.6548077774825055e-05, "loss": 0.9414, "step": 18705 }, { "epoch": 0.21, "learning_rate": 4.654715504768654e-05, "loss": 0.8218, "step": 18710 }, { "epoch": 0.21, "learning_rate": 4.654623232054803e-05, "loss": 0.9178, "step": 18715 }, { "epoch": 0.21, "learning_rate": 4.654530959340951e-05, "loss": 0.8535, "step": 18720 }, { "epoch": 0.21, "learning_rate": 4.6544386866271006e-05, "loss": 0.8013, "step": 18725 }, { "epoch": 0.21, "learning_rate": 4.6543464139132494e-05, "loss": 0.8759, "step": 18730 }, { "epoch": 0.21, "learning_rate": 4.6542541411993975e-05, "loss": 0.8311, "step": 18735 }, { "epoch": 0.21, "learning_rate": 4.654161868485546e-05, "loss": 0.8646, "step": 18740 }, { "epoch": 0.21, "learning_rate": 4.654069595771696e-05, "loss": 0.9312, "step": 18745 }, { "epoch": 0.21, "learning_rate": 4.6539773230578446e-05, "loss": 0.8557, "step": 18750 }, { "epoch": 0.21, "learning_rate": 4.653885050343993e-05, "loss": 0.8668, "step": 18755 }, { "epoch": 0.21, "learning_rate": 4.6537927776301415e-05, "loss": 0.8574, "step": 18760 }, { "epoch": 0.21, "learning_rate": 4.65370050491629e-05, "loss": 0.8479, "step": 18765 }, { "epoch": 0.21, "learning_rate": 4.653608232202439e-05, "loss": 0.8497, "step": 18770 }, { "epoch": 0.21, "learning_rate": 4.653515959488588e-05, "loss": 0.8398, "step": 18775 }, { "epoch": 0.21, "learning_rate": 4.6534236867747366e-05, "loss": 0.8511, "step": 18780 }, { "epoch": 0.21, "learning_rate": 4.6533314140608854e-05, "loss": 0.8612, "step": 18785 }, { "epoch": 0.21, "learning_rate": 4.653239141347034e-05, "loss": 0.8482, "step": 18790 }, { "epoch": 0.21, "learning_rate": 4.653146868633183e-05, "loss": 0.8583, "step": 18795 }, { "epoch": 0.21, "learning_rate": 4.653054595919332e-05, "loss": 0.8833, "step": 18800 }, { "epoch": 0.21, "learning_rate": 4.6529623232054805e-05, "loss": 0.8948, "step": 18805 }, { "epoch": 0.21, "learning_rate": 4.652870050491629e-05, "loss": 0.8901, "step": 18810 }, { "epoch": 0.21, "learning_rate": 4.652777777777778e-05, "loss": 0.841, "step": 18815 }, { "epoch": 0.21, "learning_rate": 4.652685505063927e-05, "loss": 0.9007, "step": 18820 }, { "epoch": 0.21, "learning_rate": 4.6525932323500757e-05, "loss": 0.8425, "step": 18825 }, { "epoch": 0.21, "learning_rate": 4.652500959636224e-05, "loss": 0.9393, "step": 18830 }, { "epoch": 0.21, "learning_rate": 4.652408686922373e-05, "loss": 0.7986, "step": 18835 }, { "epoch": 0.21, "learning_rate": 4.652316414208522e-05, "loss": 0.8678, "step": 18840 }, { "epoch": 0.21, "learning_rate": 4.65222414149467e-05, "loss": 0.8845, "step": 18845 }, { "epoch": 0.21, "learning_rate": 4.652131868780819e-05, "loss": 0.8576, "step": 18850 }, { "epoch": 0.21, "learning_rate": 4.6520395960669684e-05, "loss": 0.8607, "step": 18855 }, { "epoch": 0.21, "learning_rate": 4.651947323353117e-05, "loss": 0.8608, "step": 18860 }, { "epoch": 0.21, "learning_rate": 4.651855050639265e-05, "loss": 0.8794, "step": 18865 }, { "epoch": 0.21, "learning_rate": 4.651762777925414e-05, "loss": 0.9259, "step": 18870 }, { "epoch": 0.21, "learning_rate": 4.6516705052115635e-05, "loss": 0.8998, "step": 18875 }, { "epoch": 0.21, "learning_rate": 4.651578232497712e-05, "loss": 0.8161, "step": 18880 }, { "epoch": 0.21, "learning_rate": 4.6514859597838604e-05, "loss": 0.9679, "step": 18885 }, { "epoch": 0.21, "learning_rate": 4.651393687070009e-05, "loss": 0.8491, "step": 18890 }, { "epoch": 0.21, "learning_rate": 4.6513014143561586e-05, "loss": 0.8351, "step": 18895 }, { "epoch": 0.21, "learning_rate": 4.651209141642307e-05, "loss": 0.955, "step": 18900 }, { "epoch": 0.21, "learning_rate": 4.6511168689284555e-05, "loss": 0.8896, "step": 18905 }, { "epoch": 0.21, "learning_rate": 4.651024596214604e-05, "loss": 0.8139, "step": 18910 }, { "epoch": 0.21, "learning_rate": 4.650932323500753e-05, "loss": 0.9267, "step": 18915 }, { "epoch": 0.21, "learning_rate": 4.650840050786902e-05, "loss": 0.9091, "step": 18920 }, { "epoch": 0.21, "learning_rate": 4.650747778073051e-05, "loss": 0.8723, "step": 18925 }, { "epoch": 0.21, "learning_rate": 4.6506555053591995e-05, "loss": 0.8669, "step": 18930 }, { "epoch": 0.21, "learning_rate": 4.650563232645348e-05, "loss": 0.8617, "step": 18935 }, { "epoch": 0.21, "learning_rate": 4.650470959931497e-05, "loss": 0.9015, "step": 18940 }, { "epoch": 0.21, "learning_rate": 4.650378687217646e-05, "loss": 0.9409, "step": 18945 }, { "epoch": 0.21, "learning_rate": 4.6502864145037946e-05, "loss": 0.8745, "step": 18950 }, { "epoch": 0.21, "learning_rate": 4.6501941417899434e-05, "loss": 0.7855, "step": 18955 }, { "epoch": 0.21, "learning_rate": 4.650101869076092e-05, "loss": 0.8939, "step": 18960 }, { "epoch": 0.21, "learning_rate": 4.650009596362241e-05, "loss": 0.9004, "step": 18965 }, { "epoch": 0.21, "learning_rate": 4.64991732364839e-05, "loss": 0.8816, "step": 18970 }, { "epoch": 0.21, "learning_rate": 4.649825050934538e-05, "loss": 0.86, "step": 18975 }, { "epoch": 0.21, "learning_rate": 4.6497327782206866e-05, "loss": 0.9112, "step": 18980 }, { "epoch": 0.21, "learning_rate": 4.649640505506836e-05, "loss": 0.8546, "step": 18985 }, { "epoch": 0.21, "learning_rate": 4.649548232792985e-05, "loss": 0.8543, "step": 18990 }, { "epoch": 0.21, "learning_rate": 4.649455960079133e-05, "loss": 0.8914, "step": 18995 }, { "epoch": 0.21, "learning_rate": 4.649363687365282e-05, "loss": 0.8881, "step": 19000 }, { "epoch": 0.21, "eval_loss": 0.7878119945526123, "eval_runtime": 69.8623, "eval_samples_per_second": 28.628, "eval_steps_per_second": 14.314, "step": 19000 }, { "epoch": 0.21, "learning_rate": 4.649271414651431e-05, "loss": 0.8287, "step": 19005 }, { "epoch": 0.21, "learning_rate": 4.6491791419375793e-05, "loss": 0.8539, "step": 19010 }, { "epoch": 0.21, "learning_rate": 4.649086869223728e-05, "loss": 0.8583, "step": 19015 }, { "epoch": 0.21, "learning_rate": 4.648994596509877e-05, "loss": 0.84, "step": 19020 }, { "epoch": 0.21, "learning_rate": 4.6489023237960264e-05, "loss": 0.8693, "step": 19025 }, { "epoch": 0.21, "learning_rate": 4.6488100510821745e-05, "loss": 0.807, "step": 19030 }, { "epoch": 0.21, "learning_rate": 4.648717778368323e-05, "loss": 0.86, "step": 19035 }, { "epoch": 0.21, "learning_rate": 4.648625505654472e-05, "loss": 0.8629, "step": 19040 }, { "epoch": 0.21, "learning_rate": 4.648533232940621e-05, "loss": 0.9374, "step": 19045 }, { "epoch": 0.21, "learning_rate": 4.6484409602267696e-05, "loss": 0.8508, "step": 19050 }, { "epoch": 0.21, "learning_rate": 4.6483486875129184e-05, "loss": 0.8478, "step": 19055 }, { "epoch": 0.21, "learning_rate": 4.648256414799067e-05, "loss": 0.9065, "step": 19060 }, { "epoch": 0.21, "learning_rate": 4.648164142085216e-05, "loss": 0.8731, "step": 19065 }, { "epoch": 0.21, "learning_rate": 4.648071869371365e-05, "loss": 0.885, "step": 19070 }, { "epoch": 0.21, "learning_rate": 4.6479795966575135e-05, "loss": 0.8406, "step": 19075 }, { "epoch": 0.21, "learning_rate": 4.647887323943662e-05, "loss": 0.8914, "step": 19080 }, { "epoch": 0.21, "learning_rate": 4.6477950512298104e-05, "loss": 0.8066, "step": 19085 }, { "epoch": 0.21, "learning_rate": 4.64770277851596e-05, "loss": 0.8607, "step": 19090 }, { "epoch": 0.21, "learning_rate": 4.647610505802109e-05, "loss": 0.8569, "step": 19095 }, { "epoch": 0.21, "learning_rate": 4.6475182330882575e-05, "loss": 0.7603, "step": 19100 }, { "epoch": 0.21, "learning_rate": 4.6474259603744056e-05, "loss": 0.8835, "step": 19105 }, { "epoch": 0.21, "learning_rate": 4.647333687660555e-05, "loss": 0.8481, "step": 19110 }, { "epoch": 0.21, "learning_rate": 4.647241414946704e-05, "loss": 0.8442, "step": 19115 }, { "epoch": 0.21, "learning_rate": 4.647149142232852e-05, "loss": 0.8992, "step": 19120 }, { "epoch": 0.21, "learning_rate": 4.647056869519001e-05, "loss": 0.8605, "step": 19125 }, { "epoch": 0.21, "learning_rate": 4.6469645968051495e-05, "loss": 0.9781, "step": 19130 }, { "epoch": 0.21, "learning_rate": 4.646872324091299e-05, "loss": 0.8276, "step": 19135 }, { "epoch": 0.21, "learning_rate": 4.646780051377447e-05, "loss": 0.9165, "step": 19140 }, { "epoch": 0.21, "learning_rate": 4.646687778663596e-05, "loss": 0.9092, "step": 19145 }, { "epoch": 0.21, "learning_rate": 4.6465955059497446e-05, "loss": 0.9379, "step": 19150 }, { "epoch": 0.21, "learning_rate": 4.6465032332358934e-05, "loss": 0.8197, "step": 19155 }, { "epoch": 0.21, "learning_rate": 4.646410960522042e-05, "loss": 0.8492, "step": 19160 }, { "epoch": 0.21, "learning_rate": 4.646318687808191e-05, "loss": 0.8368, "step": 19165 }, { "epoch": 0.21, "learning_rate": 4.64622641509434e-05, "loss": 0.861, "step": 19170 }, { "epoch": 0.21, "learning_rate": 4.6461341423804886e-05, "loss": 0.8951, "step": 19175 }, { "epoch": 0.21, "learning_rate": 4.6460418696666373e-05, "loss": 0.8413, "step": 19180 }, { "epoch": 0.21, "learning_rate": 4.645949596952786e-05, "loss": 0.8576, "step": 19185 }, { "epoch": 0.21, "learning_rate": 4.645857324238935e-05, "loss": 0.857, "step": 19190 }, { "epoch": 0.21, "learning_rate": 4.645765051525084e-05, "loss": 0.861, "step": 19195 }, { "epoch": 0.21, "learning_rate": 4.6456727788112325e-05, "loss": 0.8379, "step": 19200 }, { "epoch": 0.21, "learning_rate": 4.645580506097381e-05, "loss": 0.8895, "step": 19205 }, { "epoch": 0.21, "learning_rate": 4.64548823338353e-05, "loss": 0.8977, "step": 19210 }, { "epoch": 0.21, "learning_rate": 4.645395960669678e-05, "loss": 0.8478, "step": 19215 }, { "epoch": 0.21, "learning_rate": 4.6453036879558276e-05, "loss": 0.8999, "step": 19220 }, { "epoch": 0.21, "learning_rate": 4.6452114152419764e-05, "loss": 0.9008, "step": 19225 }, { "epoch": 0.21, "learning_rate": 4.6451191425281245e-05, "loss": 0.8637, "step": 19230 }, { "epoch": 0.21, "learning_rate": 4.645026869814273e-05, "loss": 0.8619, "step": 19235 }, { "epoch": 0.21, "learning_rate": 4.644934597100423e-05, "loss": 0.9449, "step": 19240 }, { "epoch": 0.21, "learning_rate": 4.6448423243865716e-05, "loss": 0.907, "step": 19245 }, { "epoch": 0.21, "learning_rate": 4.6447500516727197e-05, "loss": 0.8698, "step": 19250 }, { "epoch": 0.21, "learning_rate": 4.6446577789588684e-05, "loss": 0.8543, "step": 19255 }, { "epoch": 0.21, "learning_rate": 4.644565506245018e-05, "loss": 0.8487, "step": 19260 }, { "epoch": 0.21, "learning_rate": 4.644473233531167e-05, "loss": 0.8065, "step": 19265 }, { "epoch": 0.21, "learning_rate": 4.644380960817315e-05, "loss": 0.8862, "step": 19270 }, { "epoch": 0.21, "learning_rate": 4.6442886881034636e-05, "loss": 0.836, "step": 19275 }, { "epoch": 0.21, "learning_rate": 4.6441964153896124e-05, "loss": 0.7997, "step": 19280 }, { "epoch": 0.21, "learning_rate": 4.644104142675761e-05, "loss": 0.9097, "step": 19285 }, { "epoch": 0.21, "learning_rate": 4.64401186996191e-05, "loss": 0.8695, "step": 19290 }, { "epoch": 0.21, "learning_rate": 4.643919597248059e-05, "loss": 0.8376, "step": 19295 }, { "epoch": 0.21, "learning_rate": 4.6438273245342075e-05, "loss": 0.7947, "step": 19300 }, { "epoch": 0.21, "learning_rate": 4.643735051820356e-05, "loss": 0.9058, "step": 19305 }, { "epoch": 0.21, "learning_rate": 4.643642779106505e-05, "loss": 0.7953, "step": 19310 }, { "epoch": 0.21, "learning_rate": 4.643550506392654e-05, "loss": 0.8959, "step": 19315 }, { "epoch": 0.21, "learning_rate": 4.6434582336788026e-05, "loss": 0.9252, "step": 19320 }, { "epoch": 0.21, "learning_rate": 4.6433659609649514e-05, "loss": 0.8724, "step": 19325 }, { "epoch": 0.21, "learning_rate": 4.6432736882511e-05, "loss": 0.863, "step": 19330 }, { "epoch": 0.21, "learning_rate": 4.643181415537249e-05, "loss": 0.9894, "step": 19335 }, { "epoch": 0.21, "learning_rate": 4.643089142823398e-05, "loss": 0.91, "step": 19340 }, { "epoch": 0.21, "learning_rate": 4.6429968701095466e-05, "loss": 0.949, "step": 19345 }, { "epoch": 0.21, "learning_rate": 4.6429045973956954e-05, "loss": 0.8769, "step": 19350 }, { "epoch": 0.21, "learning_rate": 4.642812324681844e-05, "loss": 0.8598, "step": 19355 }, { "epoch": 0.21, "learning_rate": 4.642720051967992e-05, "loss": 0.925, "step": 19360 }, { "epoch": 0.21, "learning_rate": 4.642627779254141e-05, "loss": 0.8408, "step": 19365 }, { "epoch": 0.21, "learning_rate": 4.6425355065402905e-05, "loss": 0.7905, "step": 19370 }, { "epoch": 0.21, "learning_rate": 4.642443233826439e-05, "loss": 0.8118, "step": 19375 }, { "epoch": 0.21, "learning_rate": 4.6423509611125874e-05, "loss": 0.8187, "step": 19380 }, { "epoch": 0.21, "learning_rate": 4.642258688398736e-05, "loss": 0.7629, "step": 19385 }, { "epoch": 0.21, "learning_rate": 4.6421664156848856e-05, "loss": 0.8529, "step": 19390 }, { "epoch": 0.21, "learning_rate": 4.642074142971034e-05, "loss": 0.8203, "step": 19395 }, { "epoch": 0.21, "learning_rate": 4.6419818702571825e-05, "loss": 0.8058, "step": 19400 }, { "epoch": 0.21, "learning_rate": 4.641889597543331e-05, "loss": 0.7508, "step": 19405 }, { "epoch": 0.21, "learning_rate": 4.641797324829481e-05, "loss": 0.7401, "step": 19410 }, { "epoch": 0.21, "learning_rate": 4.641705052115629e-05, "loss": 0.7977, "step": 19415 }, { "epoch": 0.22, "learning_rate": 4.641612779401778e-05, "loss": 0.7764, "step": 19420 }, { "epoch": 0.22, "learning_rate": 4.6415205066879265e-05, "loss": 0.8771, "step": 19425 }, { "epoch": 0.22, "learning_rate": 4.641428233974075e-05, "loss": 0.7636, "step": 19430 }, { "epoch": 0.22, "learning_rate": 4.641335961260224e-05, "loss": 0.8494, "step": 19435 }, { "epoch": 0.22, "learning_rate": 4.641243688546373e-05, "loss": 0.8321, "step": 19440 }, { "epoch": 0.22, "learning_rate": 4.6411514158325216e-05, "loss": 0.8234, "step": 19445 }, { "epoch": 0.22, "learning_rate": 4.6410591431186704e-05, "loss": 0.8213, "step": 19450 }, { "epoch": 0.22, "learning_rate": 4.640966870404819e-05, "loss": 0.7949, "step": 19455 }, { "epoch": 0.22, "learning_rate": 4.640874597690968e-05, "loss": 0.7936, "step": 19460 }, { "epoch": 0.22, "learning_rate": 4.640782324977117e-05, "loss": 0.8301, "step": 19465 }, { "epoch": 0.22, "learning_rate": 4.640690052263265e-05, "loss": 0.8062, "step": 19470 }, { "epoch": 0.22, "learning_rate": 4.640597779549414e-05, "loss": 0.8063, "step": 19475 }, { "epoch": 0.22, "learning_rate": 4.640505506835563e-05, "loss": 0.8351, "step": 19480 }, { "epoch": 0.22, "learning_rate": 4.640413234121712e-05, "loss": 0.7199, "step": 19485 }, { "epoch": 0.22, "learning_rate": 4.64032096140786e-05, "loss": 0.8107, "step": 19490 }, { "epoch": 0.22, "learning_rate": 4.6402286886940094e-05, "loss": 0.8166, "step": 19495 }, { "epoch": 0.22, "learning_rate": 4.640136415980158e-05, "loss": 0.7937, "step": 19500 }, { "epoch": 0.22, "learning_rate": 4.640044143266306e-05, "loss": 0.814, "step": 19505 }, { "epoch": 0.22, "learning_rate": 4.639951870552455e-05, "loss": 0.8309, "step": 19510 }, { "epoch": 0.22, "learning_rate": 4.639859597838604e-05, "loss": 0.8268, "step": 19515 }, { "epoch": 0.22, "learning_rate": 4.6397673251247534e-05, "loss": 0.7998, "step": 19520 }, { "epoch": 0.22, "learning_rate": 4.6396750524109015e-05, "loss": 0.7442, "step": 19525 }, { "epoch": 0.22, "learning_rate": 4.63958277969705e-05, "loss": 0.7556, "step": 19530 }, { "epoch": 0.22, "learning_rate": 4.639490506983199e-05, "loss": 0.8239, "step": 19535 }, { "epoch": 0.22, "learning_rate": 4.6393982342693485e-05, "loss": 0.8137, "step": 19540 }, { "epoch": 0.22, "learning_rate": 4.6393059615554966e-05, "loss": 0.7876, "step": 19545 }, { "epoch": 0.22, "learning_rate": 4.6392136888416454e-05, "loss": 0.8037, "step": 19550 }, { "epoch": 0.22, "learning_rate": 4.639121416127794e-05, "loss": 0.7808, "step": 19555 }, { "epoch": 0.22, "learning_rate": 4.639029143413943e-05, "loss": 0.8082, "step": 19560 }, { "epoch": 0.22, "learning_rate": 4.638936870700092e-05, "loss": 0.7693, "step": 19565 }, { "epoch": 0.22, "learning_rate": 4.6388445979862405e-05, "loss": 0.8464, "step": 19570 }, { "epoch": 0.22, "learning_rate": 4.638752325272389e-05, "loss": 0.7774, "step": 19575 }, { "epoch": 0.22, "learning_rate": 4.638660052558538e-05, "loss": 0.7264, "step": 19580 }, { "epoch": 0.22, "learning_rate": 4.638567779844687e-05, "loss": 0.7423, "step": 19585 }, { "epoch": 0.22, "learning_rate": 4.638475507130836e-05, "loss": 0.7777, "step": 19590 }, { "epoch": 0.22, "learning_rate": 4.6383832344169845e-05, "loss": 0.8353, "step": 19595 }, { "epoch": 0.22, "learning_rate": 4.6382909617031326e-05, "loss": 0.8821, "step": 19600 }, { "epoch": 0.22, "learning_rate": 4.638198688989282e-05, "loss": 0.8387, "step": 19605 }, { "epoch": 0.22, "learning_rate": 4.638106416275431e-05, "loss": 0.854, "step": 19610 }, { "epoch": 0.22, "learning_rate": 4.6380141435615796e-05, "loss": 0.8396, "step": 19615 }, { "epoch": 0.22, "learning_rate": 4.637921870847728e-05, "loss": 0.8202, "step": 19620 }, { "epoch": 0.22, "learning_rate": 4.637829598133877e-05, "loss": 0.9395, "step": 19625 }, { "epoch": 0.22, "learning_rate": 4.637737325420026e-05, "loss": 0.7648, "step": 19630 }, { "epoch": 0.22, "learning_rate": 4.637645052706174e-05, "loss": 0.8134, "step": 19635 }, { "epoch": 0.22, "learning_rate": 4.637552779992323e-05, "loss": 0.7498, "step": 19640 }, { "epoch": 0.22, "learning_rate": 4.637460507278472e-05, "loss": 0.755, "step": 19645 }, { "epoch": 0.22, "learning_rate": 4.637368234564621e-05, "loss": 0.778, "step": 19650 }, { "epoch": 0.22, "learning_rate": 4.637275961850769e-05, "loss": 0.8318, "step": 19655 }, { "epoch": 0.22, "learning_rate": 4.637183689136918e-05, "loss": 0.7864, "step": 19660 }, { "epoch": 0.22, "learning_rate": 4.637091416423067e-05, "loss": 0.8243, "step": 19665 }, { "epoch": 0.22, "learning_rate": 4.6369991437092156e-05, "loss": 0.8315, "step": 19670 }, { "epoch": 0.22, "learning_rate": 4.6369068709953643e-05, "loss": 0.8183, "step": 19675 }, { "epoch": 0.22, "learning_rate": 4.636814598281513e-05, "loss": 0.7936, "step": 19680 }, { "epoch": 0.22, "learning_rate": 4.636722325567662e-05, "loss": 0.7909, "step": 19685 }, { "epoch": 0.22, "learning_rate": 4.636630052853811e-05, "loss": 0.8095, "step": 19690 }, { "epoch": 0.22, "learning_rate": 4.6365377801399595e-05, "loss": 0.8764, "step": 19695 }, { "epoch": 0.22, "learning_rate": 4.636445507426108e-05, "loss": 0.7735, "step": 19700 }, { "epoch": 0.22, "learning_rate": 4.636353234712257e-05, "loss": 0.8504, "step": 19705 }, { "epoch": 0.22, "learning_rate": 4.636260961998406e-05, "loss": 0.8699, "step": 19710 }, { "epoch": 0.22, "learning_rate": 4.6361686892845546e-05, "loss": 0.7624, "step": 19715 }, { "epoch": 0.22, "learning_rate": 4.6360764165707034e-05, "loss": 0.8231, "step": 19720 }, { "epoch": 0.22, "learning_rate": 4.635984143856852e-05, "loss": 0.7944, "step": 19725 }, { "epoch": 0.22, "learning_rate": 4.635891871143001e-05, "loss": 0.8026, "step": 19730 }, { "epoch": 0.22, "learning_rate": 4.63579959842915e-05, "loss": 0.8197, "step": 19735 }, { "epoch": 0.22, "learning_rate": 4.6357073257152985e-05, "loss": 0.7346, "step": 19740 }, { "epoch": 0.22, "learning_rate": 4.6356150530014466e-05, "loss": 0.75, "step": 19745 }, { "epoch": 0.22, "learning_rate": 4.6355227802875954e-05, "loss": 0.8587, "step": 19750 }, { "epoch": 0.22, "learning_rate": 4.635430507573745e-05, "loss": 0.7468, "step": 19755 }, { "epoch": 0.22, "learning_rate": 4.635338234859894e-05, "loss": 0.7928, "step": 19760 }, { "epoch": 0.22, "learning_rate": 4.635245962146042e-05, "loss": 0.8127, "step": 19765 }, { "epoch": 0.22, "learning_rate": 4.6351536894321906e-05, "loss": 0.7697, "step": 19770 }, { "epoch": 0.22, "learning_rate": 4.63506141671834e-05, "loss": 0.8247, "step": 19775 }, { "epoch": 0.22, "learning_rate": 4.634969144004488e-05, "loss": 0.828, "step": 19780 }, { "epoch": 0.22, "learning_rate": 4.634876871290637e-05, "loss": 0.7728, "step": 19785 }, { "epoch": 0.22, "learning_rate": 4.634784598576786e-05, "loss": 0.8127, "step": 19790 }, { "epoch": 0.22, "learning_rate": 4.634692325862935e-05, "loss": 0.8185, "step": 19795 }, { "epoch": 0.22, "learning_rate": 4.634600053149083e-05, "loss": 0.8153, "step": 19800 }, { "epoch": 0.22, "learning_rate": 4.634507780435232e-05, "loss": 0.7764, "step": 19805 }, { "epoch": 0.22, "learning_rate": 4.634415507721381e-05, "loss": 0.8128, "step": 19810 }, { "epoch": 0.22, "learning_rate": 4.6343232350075296e-05, "loss": 0.8892, "step": 19815 }, { "epoch": 0.22, "learning_rate": 4.6342309622936784e-05, "loss": 0.8779, "step": 19820 }, { "epoch": 0.22, "learning_rate": 4.634138689579827e-05, "loss": 0.7793, "step": 19825 }, { "epoch": 0.22, "learning_rate": 4.634046416865976e-05, "loss": 0.8289, "step": 19830 }, { "epoch": 0.22, "learning_rate": 4.633954144152125e-05, "loss": 0.7625, "step": 19835 }, { "epoch": 0.22, "learning_rate": 4.6338618714382736e-05, "loss": 0.7842, "step": 19840 }, { "epoch": 0.22, "learning_rate": 4.6337695987244223e-05, "loss": 0.82, "step": 19845 }, { "epoch": 0.22, "learning_rate": 4.633677326010571e-05, "loss": 0.8207, "step": 19850 }, { "epoch": 0.22, "learning_rate": 4.633585053296719e-05, "loss": 0.8168, "step": 19855 }, { "epoch": 0.22, "learning_rate": 4.633492780582869e-05, "loss": 0.8113, "step": 19860 }, { "epoch": 0.22, "learning_rate": 4.6334005078690175e-05, "loss": 0.7587, "step": 19865 }, { "epoch": 0.22, "learning_rate": 4.633308235155166e-05, "loss": 0.8028, "step": 19870 }, { "epoch": 0.22, "learning_rate": 4.6332159624413144e-05, "loss": 0.7255, "step": 19875 }, { "epoch": 0.22, "learning_rate": 4.633123689727464e-05, "loss": 0.7562, "step": 19880 }, { "epoch": 0.22, "learning_rate": 4.6330314170136126e-05, "loss": 0.7903, "step": 19885 }, { "epoch": 0.22, "learning_rate": 4.632939144299761e-05, "loss": 0.7946, "step": 19890 }, { "epoch": 0.22, "learning_rate": 4.6328468715859095e-05, "loss": 0.7978, "step": 19895 }, { "epoch": 0.22, "learning_rate": 4.632754598872058e-05, "loss": 0.8312, "step": 19900 }, { "epoch": 0.22, "learning_rate": 4.632662326158208e-05, "loss": 0.8827, "step": 19905 }, { "epoch": 0.22, "learning_rate": 4.632570053444356e-05, "loss": 0.7858, "step": 19910 }, { "epoch": 0.22, "learning_rate": 4.6324777807305047e-05, "loss": 0.7988, "step": 19915 }, { "epoch": 0.22, "learning_rate": 4.6323855080166534e-05, "loss": 0.7691, "step": 19920 }, { "epoch": 0.22, "learning_rate": 4.632293235302803e-05, "loss": 0.7994, "step": 19925 }, { "epoch": 0.22, "learning_rate": 4.632200962588951e-05, "loss": 0.8192, "step": 19930 }, { "epoch": 0.22, "learning_rate": 4.6321086898751e-05, "loss": 0.7426, "step": 19935 }, { "epoch": 0.22, "learning_rate": 4.6320164171612486e-05, "loss": 0.7848, "step": 19940 }, { "epoch": 0.22, "learning_rate": 4.6319241444473974e-05, "loss": 0.8458, "step": 19945 }, { "epoch": 0.22, "learning_rate": 4.631831871733546e-05, "loss": 0.8318, "step": 19950 }, { "epoch": 0.22, "learning_rate": 4.631739599019695e-05, "loss": 0.7594, "step": 19955 }, { "epoch": 0.22, "learning_rate": 4.631647326305844e-05, "loss": 0.8076, "step": 19960 }, { "epoch": 0.22, "learning_rate": 4.631555053591992e-05, "loss": 0.7726, "step": 19965 }, { "epoch": 0.22, "learning_rate": 4.631462780878141e-05, "loss": 0.8101, "step": 19970 }, { "epoch": 0.22, "learning_rate": 4.63137050816429e-05, "loss": 0.7643, "step": 19975 }, { "epoch": 0.22, "learning_rate": 4.631278235450439e-05, "loss": 0.7986, "step": 19980 }, { "epoch": 0.22, "learning_rate": 4.631185962736587e-05, "loss": 0.8329, "step": 19985 }, { "epoch": 0.22, "learning_rate": 4.6310936900227364e-05, "loss": 0.7902, "step": 19990 }, { "epoch": 0.22, "learning_rate": 4.631001417308885e-05, "loss": 0.8113, "step": 19995 }, { "epoch": 0.22, "learning_rate": 4.630909144595034e-05, "loss": 0.8397, "step": 20000 }, { "epoch": 0.22, "eval_loss": 0.7989374399185181, "eval_runtime": 70.3068, "eval_samples_per_second": 28.447, "eval_steps_per_second": 14.223, "step": 20000 }, { "epoch": 0.22, "learning_rate": 4.630816871881182e-05, "loss": 0.8888, "step": 20005 }, { "epoch": 0.22, "learning_rate": 4.6307245991673316e-05, "loss": 0.7775, "step": 20010 }, { "epoch": 0.22, "learning_rate": 4.6306323264534804e-05, "loss": 0.8577, "step": 20015 }, { "epoch": 0.22, "learning_rate": 4.6305400537396285e-05, "loss": 0.7865, "step": 20020 }, { "epoch": 0.22, "learning_rate": 4.630447781025777e-05, "loss": 0.9131, "step": 20025 }, { "epoch": 0.22, "learning_rate": 4.630355508311927e-05, "loss": 0.8213, "step": 20030 }, { "epoch": 0.22, "learning_rate": 4.6302632355980755e-05, "loss": 0.7757, "step": 20035 }, { "epoch": 0.22, "learning_rate": 4.6301709628842236e-05, "loss": 0.8455, "step": 20040 }, { "epoch": 0.22, "learning_rate": 4.6300786901703724e-05, "loss": 0.8803, "step": 20045 }, { "epoch": 0.22, "learning_rate": 4.629986417456521e-05, "loss": 0.7969, "step": 20050 }, { "epoch": 0.22, "learning_rate": 4.62989414474267e-05, "loss": 0.8182, "step": 20055 }, { "epoch": 0.22, "learning_rate": 4.629801872028819e-05, "loss": 0.7775, "step": 20060 }, { "epoch": 0.22, "learning_rate": 4.6297095993149675e-05, "loss": 0.821, "step": 20065 }, { "epoch": 0.22, "learning_rate": 4.629617326601116e-05, "loss": 0.7776, "step": 20070 }, { "epoch": 0.22, "learning_rate": 4.629525053887265e-05, "loss": 0.8575, "step": 20075 }, { "epoch": 0.22, "learning_rate": 4.629432781173414e-05, "loss": 0.8069, "step": 20080 }, { "epoch": 0.22, "learning_rate": 4.629340508459563e-05, "loss": 0.8348, "step": 20085 }, { "epoch": 0.22, "learning_rate": 4.6292482357457115e-05, "loss": 0.7743, "step": 20090 }, { "epoch": 0.22, "learning_rate": 4.62915596303186e-05, "loss": 0.7727, "step": 20095 }, { "epoch": 0.22, "learning_rate": 4.629063690318009e-05, "loss": 0.7214, "step": 20100 }, { "epoch": 0.22, "learning_rate": 4.628971417604158e-05, "loss": 0.7848, "step": 20105 }, { "epoch": 0.22, "learning_rate": 4.6288791448903066e-05, "loss": 0.8289, "step": 20110 }, { "epoch": 0.22, "learning_rate": 4.628786872176455e-05, "loss": 0.8135, "step": 20115 }, { "epoch": 0.22, "learning_rate": 4.628694599462604e-05, "loss": 0.8717, "step": 20120 }, { "epoch": 0.22, "learning_rate": 4.628602326748753e-05, "loss": 0.8524, "step": 20125 }, { "epoch": 0.22, "learning_rate": 4.628510054034901e-05, "loss": 0.7876, "step": 20130 }, { "epoch": 0.22, "learning_rate": 4.62841778132105e-05, "loss": 0.7652, "step": 20135 }, { "epoch": 0.22, "learning_rate": 4.628325508607199e-05, "loss": 0.8496, "step": 20140 }, { "epoch": 0.22, "learning_rate": 4.628233235893348e-05, "loss": 0.7895, "step": 20145 }, { "epoch": 0.22, "learning_rate": 4.628140963179496e-05, "loss": 0.7647, "step": 20150 }, { "epoch": 0.22, "learning_rate": 4.628048690465645e-05, "loss": 0.8447, "step": 20155 }, { "epoch": 0.22, "learning_rate": 4.6279564177517944e-05, "loss": 0.8279, "step": 20160 }, { "epoch": 0.22, "learning_rate": 4.6278641450379425e-05, "loss": 0.7477, "step": 20165 }, { "epoch": 0.22, "learning_rate": 4.627771872324091e-05, "loss": 0.8115, "step": 20170 }, { "epoch": 0.22, "learning_rate": 4.62767959961024e-05, "loss": 0.8098, "step": 20175 }, { "epoch": 0.22, "learning_rate": 4.6275873268963896e-05, "loss": 0.8311, "step": 20180 }, { "epoch": 0.22, "learning_rate": 4.627495054182538e-05, "loss": 0.8366, "step": 20185 }, { "epoch": 0.22, "learning_rate": 4.6274027814686865e-05, "loss": 0.8563, "step": 20190 }, { "epoch": 0.22, "learning_rate": 4.627310508754835e-05, "loss": 0.7294, "step": 20195 }, { "epoch": 0.22, "learning_rate": 4.627218236040984e-05, "loss": 0.873, "step": 20200 }, { "epoch": 0.22, "learning_rate": 4.627125963327133e-05, "loss": 0.7988, "step": 20205 }, { "epoch": 0.22, "learning_rate": 4.6270336906132816e-05, "loss": 0.8022, "step": 20210 }, { "epoch": 0.22, "learning_rate": 4.6269414178994304e-05, "loss": 0.8105, "step": 20215 }, { "epoch": 0.22, "learning_rate": 4.626849145185579e-05, "loss": 0.7957, "step": 20220 }, { "epoch": 0.22, "learning_rate": 4.626756872471728e-05, "loss": 0.8214, "step": 20225 }, { "epoch": 0.22, "learning_rate": 4.626664599757877e-05, "loss": 0.7746, "step": 20230 }, { "epoch": 0.22, "learning_rate": 4.6265723270440255e-05, "loss": 0.8198, "step": 20235 }, { "epoch": 0.22, "learning_rate": 4.6264800543301736e-05, "loss": 0.8272, "step": 20240 }, { "epoch": 0.22, "learning_rate": 4.626387781616323e-05, "loss": 0.8174, "step": 20245 }, { "epoch": 0.22, "learning_rate": 4.626295508902472e-05, "loss": 0.7847, "step": 20250 }, { "epoch": 0.22, "learning_rate": 4.626203236188621e-05, "loss": 0.8518, "step": 20255 }, { "epoch": 0.22, "learning_rate": 4.626110963474769e-05, "loss": 0.86, "step": 20260 }, { "epoch": 0.22, "learning_rate": 4.626018690760918e-05, "loss": 0.7997, "step": 20265 }, { "epoch": 0.22, "learning_rate": 4.625926418047067e-05, "loss": 0.7805, "step": 20270 }, { "epoch": 0.22, "learning_rate": 4.625834145333215e-05, "loss": 0.7111, "step": 20275 }, { "epoch": 0.22, "learning_rate": 4.625741872619364e-05, "loss": 0.8025, "step": 20280 }, { "epoch": 0.22, "learning_rate": 4.625649599905513e-05, "loss": 0.7833, "step": 20285 }, { "epoch": 0.22, "learning_rate": 4.625557327191662e-05, "loss": 0.8004, "step": 20290 }, { "epoch": 0.22, "learning_rate": 4.62546505447781e-05, "loss": 0.7942, "step": 20295 }, { "epoch": 0.22, "learning_rate": 4.625372781763959e-05, "loss": 0.7775, "step": 20300 }, { "epoch": 0.22, "learning_rate": 4.625280509050108e-05, "loss": 0.8418, "step": 20305 }, { "epoch": 0.22, "learning_rate": 4.625188236336257e-05, "loss": 0.8691, "step": 20310 }, { "epoch": 0.22, "learning_rate": 4.6250959636224054e-05, "loss": 0.8695, "step": 20315 }, { "epoch": 0.22, "learning_rate": 4.625003690908554e-05, "loss": 0.8438, "step": 20320 }, { "epoch": 0.23, "learning_rate": 4.624911418194703e-05, "loss": 0.8593, "step": 20325 }, { "epoch": 0.23, "learning_rate": 4.624819145480852e-05, "loss": 0.8618, "step": 20330 }, { "epoch": 0.23, "learning_rate": 4.6247268727670006e-05, "loss": 0.8603, "step": 20335 }, { "epoch": 0.23, "learning_rate": 4.624634600053149e-05, "loss": 0.8269, "step": 20340 }, { "epoch": 0.23, "learning_rate": 4.624542327339298e-05, "loss": 0.8318, "step": 20345 }, { "epoch": 0.23, "learning_rate": 4.624450054625446e-05, "loss": 0.773, "step": 20350 }, { "epoch": 0.23, "learning_rate": 4.624357781911596e-05, "loss": 0.8267, "step": 20355 }, { "epoch": 0.23, "learning_rate": 4.6242655091977445e-05, "loss": 0.7905, "step": 20360 }, { "epoch": 0.23, "learning_rate": 4.624173236483893e-05, "loss": 0.7939, "step": 20365 }, { "epoch": 0.23, "learning_rate": 4.6240809637700414e-05, "loss": 0.8257, "step": 20370 }, { "epoch": 0.23, "learning_rate": 4.623988691056191e-05, "loss": 0.8838, "step": 20375 }, { "epoch": 0.23, "learning_rate": 4.6238964183423396e-05, "loss": 0.7715, "step": 20380 }, { "epoch": 0.23, "learning_rate": 4.6238041456284884e-05, "loss": 0.7983, "step": 20385 }, { "epoch": 0.23, "learning_rate": 4.6237118729146365e-05, "loss": 0.7725, "step": 20390 }, { "epoch": 0.23, "learning_rate": 4.623619600200786e-05, "loss": 0.8064, "step": 20395 }, { "epoch": 0.23, "learning_rate": 4.623527327486935e-05, "loss": 0.7623, "step": 20400 }, { "epoch": 0.23, "learning_rate": 4.623435054773083e-05, "loss": 0.8249, "step": 20405 }, { "epoch": 0.23, "learning_rate": 4.6233427820592316e-05, "loss": 0.8463, "step": 20410 }, { "epoch": 0.23, "learning_rate": 4.623250509345381e-05, "loss": 0.764, "step": 20415 }, { "epoch": 0.23, "learning_rate": 4.62315823663153e-05, "loss": 0.8053, "step": 20420 }, { "epoch": 0.23, "learning_rate": 4.623065963917678e-05, "loss": 0.9145, "step": 20425 }, { "epoch": 0.23, "learning_rate": 4.622973691203827e-05, "loss": 0.834, "step": 20430 }, { "epoch": 0.23, "learning_rate": 4.6228814184899756e-05, "loss": 0.7776, "step": 20435 }, { "epoch": 0.23, "learning_rate": 4.6227891457761244e-05, "loss": 0.8315, "step": 20440 }, { "epoch": 0.23, "learning_rate": 4.622696873062273e-05, "loss": 0.8271, "step": 20445 }, { "epoch": 0.23, "learning_rate": 4.622604600348422e-05, "loss": 0.7866, "step": 20450 }, { "epoch": 0.23, "learning_rate": 4.622512327634571e-05, "loss": 0.7678, "step": 20455 }, { "epoch": 0.23, "learning_rate": 4.6224200549207195e-05, "loss": 0.8113, "step": 20460 }, { "epoch": 0.23, "learning_rate": 4.622327782206868e-05, "loss": 0.7878, "step": 20465 }, { "epoch": 0.23, "learning_rate": 4.622235509493017e-05, "loss": 0.8043, "step": 20470 }, { "epoch": 0.23, "learning_rate": 4.622143236779166e-05, "loss": 0.7943, "step": 20475 }, { "epoch": 0.23, "learning_rate": 4.6220509640653146e-05, "loss": 0.8037, "step": 20480 }, { "epoch": 0.23, "learning_rate": 4.6219586913514634e-05, "loss": 0.8194, "step": 20485 }, { "epoch": 0.23, "learning_rate": 4.621866418637612e-05, "loss": 0.8019, "step": 20490 }, { "epoch": 0.23, "learning_rate": 4.621774145923761e-05, "loss": 0.8578, "step": 20495 }, { "epoch": 0.23, "learning_rate": 4.621681873209909e-05, "loss": 0.8518, "step": 20500 }, { "epoch": 0.23, "learning_rate": 4.6215896004960586e-05, "loss": 0.7913, "step": 20505 }, { "epoch": 0.23, "learning_rate": 4.6214973277822073e-05, "loss": 0.7584, "step": 20510 }, { "epoch": 0.23, "learning_rate": 4.6214050550683555e-05, "loss": 0.8032, "step": 20515 }, { "epoch": 0.23, "learning_rate": 4.621312782354504e-05, "loss": 0.8015, "step": 20520 }, { "epoch": 0.23, "learning_rate": 4.621220509640654e-05, "loss": 0.7594, "step": 20525 }, { "epoch": 0.23, "learning_rate": 4.6211282369268025e-05, "loss": 0.7469, "step": 20530 }, { "epoch": 0.23, "learning_rate": 4.6210359642129506e-05, "loss": 0.7708, "step": 20535 }, { "epoch": 0.23, "learning_rate": 4.6209436914990994e-05, "loss": 0.8438, "step": 20540 }, { "epoch": 0.23, "learning_rate": 4.620851418785249e-05, "loss": 0.8079, "step": 20545 }, { "epoch": 0.23, "learning_rate": 4.620759146071397e-05, "loss": 0.8434, "step": 20550 }, { "epoch": 0.23, "learning_rate": 4.620666873357546e-05, "loss": 0.8217, "step": 20555 }, { "epoch": 0.23, "learning_rate": 4.6205746006436945e-05, "loss": 0.7501, "step": 20560 }, { "epoch": 0.23, "learning_rate": 4.620482327929844e-05, "loss": 0.8179, "step": 20565 }, { "epoch": 0.23, "learning_rate": 4.620390055215992e-05, "loss": 0.8541, "step": 20570 }, { "epoch": 0.23, "learning_rate": 4.620297782502141e-05, "loss": 0.8085, "step": 20575 }, { "epoch": 0.23, "learning_rate": 4.6202055097882897e-05, "loss": 0.851, "step": 20580 }, { "epoch": 0.23, "learning_rate": 4.6201132370744384e-05, "loss": 0.8313, "step": 20585 }, { "epoch": 0.23, "learning_rate": 4.620020964360587e-05, "loss": 0.8121, "step": 20590 }, { "epoch": 0.23, "learning_rate": 4.619928691646736e-05, "loss": 0.84, "step": 20595 }, { "epoch": 0.23, "learning_rate": 4.619836418932885e-05, "loss": 0.8236, "step": 20600 }, { "epoch": 0.23, "learning_rate": 4.6197441462190336e-05, "loss": 0.8422, "step": 20605 }, { "epoch": 0.23, "learning_rate": 4.6196518735051824e-05, "loss": 0.7784, "step": 20610 }, { "epoch": 0.23, "learning_rate": 4.619559600791331e-05, "loss": 0.8075, "step": 20615 }, { "epoch": 0.23, "learning_rate": 4.61946732807748e-05, "loss": 0.7125, "step": 20620 }, { "epoch": 0.23, "learning_rate": 4.619375055363628e-05, "loss": 0.8539, "step": 20625 }, { "epoch": 0.23, "learning_rate": 4.6192827826497775e-05, "loss": 0.8834, "step": 20630 }, { "epoch": 0.23, "learning_rate": 4.619190509935926e-05, "loss": 0.727, "step": 20635 }, { "epoch": 0.23, "learning_rate": 4.619098237222075e-05, "loss": 0.8342, "step": 20640 }, { "epoch": 0.23, "learning_rate": 4.619005964508223e-05, "loss": 0.8171, "step": 20645 }, { "epoch": 0.23, "learning_rate": 4.618913691794372e-05, "loss": 0.7691, "step": 20650 }, { "epoch": 0.23, "learning_rate": 4.6188214190805214e-05, "loss": 0.7441, "step": 20655 }, { "epoch": 0.23, "learning_rate": 4.6187291463666695e-05, "loss": 0.7985, "step": 20660 }, { "epoch": 0.23, "learning_rate": 4.618636873652818e-05, "loss": 0.7361, "step": 20665 }, { "epoch": 0.23, "learning_rate": 4.618544600938967e-05, "loss": 0.7725, "step": 20670 }, { "epoch": 0.23, "learning_rate": 4.6184523282251166e-05, "loss": 0.7742, "step": 20675 }, { "epoch": 0.23, "learning_rate": 4.618360055511265e-05, "loss": 0.7898, "step": 20680 }, { "epoch": 0.23, "learning_rate": 4.6182677827974135e-05, "loss": 0.8172, "step": 20685 }, { "epoch": 0.23, "learning_rate": 4.618175510083562e-05, "loss": 0.7627, "step": 20690 }, { "epoch": 0.23, "learning_rate": 4.618083237369712e-05, "loss": 0.8571, "step": 20695 }, { "epoch": 0.23, "learning_rate": 4.61799096465586e-05, "loss": 0.8212, "step": 20700 }, { "epoch": 0.23, "learning_rate": 4.6178986919420086e-05, "loss": 0.8267, "step": 20705 }, { "epoch": 0.23, "learning_rate": 4.6178064192281574e-05, "loss": 0.8236, "step": 20710 }, { "epoch": 0.23, "learning_rate": 4.617714146514306e-05, "loss": 0.7726, "step": 20715 }, { "epoch": 0.23, "learning_rate": 4.617621873800455e-05, "loss": 0.8384, "step": 20720 }, { "epoch": 0.23, "learning_rate": 4.617529601086604e-05, "loss": 0.788, "step": 20725 }, { "epoch": 0.23, "learning_rate": 4.6174373283727525e-05, "loss": 0.7761, "step": 20730 }, { "epoch": 0.23, "learning_rate": 4.6173450556589006e-05, "loss": 0.8798, "step": 20735 }, { "epoch": 0.23, "learning_rate": 4.61725278294505e-05, "loss": 0.8067, "step": 20740 }, { "epoch": 0.23, "learning_rate": 4.617160510231199e-05, "loss": 0.8271, "step": 20745 }, { "epoch": 0.23, "learning_rate": 4.617068237517348e-05, "loss": 0.7962, "step": 20750 }, { "epoch": 0.23, "learning_rate": 4.616975964803496e-05, "loss": 0.8139, "step": 20755 }, { "epoch": 0.23, "learning_rate": 4.616883692089645e-05, "loss": 0.7674, "step": 20760 }, { "epoch": 0.23, "learning_rate": 4.616791419375794e-05, "loss": 0.7753, "step": 20765 }, { "epoch": 0.23, "learning_rate": 4.616699146661943e-05, "loss": 0.7852, "step": 20770 }, { "epoch": 0.23, "learning_rate": 4.616606873948091e-05, "loss": 0.7779, "step": 20775 }, { "epoch": 0.23, "learning_rate": 4.6165146012342404e-05, "loss": 0.8314, "step": 20780 }, { "epoch": 0.23, "learning_rate": 4.616422328520389e-05, "loss": 0.8023, "step": 20785 }, { "epoch": 0.23, "learning_rate": 4.616330055806537e-05, "loss": 0.8607, "step": 20790 }, { "epoch": 0.23, "learning_rate": 4.616237783092686e-05, "loss": 0.8606, "step": 20795 }, { "epoch": 0.23, "learning_rate": 4.616145510378835e-05, "loss": 0.7604, "step": 20800 }, { "epoch": 0.23, "learning_rate": 4.616053237664984e-05, "loss": 0.7955, "step": 20805 }, { "epoch": 0.23, "learning_rate": 4.6159609649511324e-05, "loss": 0.7712, "step": 20810 }, { "epoch": 0.23, "learning_rate": 4.615868692237281e-05, "loss": 0.7721, "step": 20815 }, { "epoch": 0.23, "learning_rate": 4.61577641952343e-05, "loss": 0.7921, "step": 20820 }, { "epoch": 0.23, "learning_rate": 4.615684146809579e-05, "loss": 0.788, "step": 20825 }, { "epoch": 0.23, "learning_rate": 4.6155918740957275e-05, "loss": 0.8246, "step": 20830 }, { "epoch": 0.23, "learning_rate": 4.615499601381876e-05, "loss": 0.8488, "step": 20835 }, { "epoch": 0.23, "learning_rate": 4.615407328668025e-05, "loss": 0.8481, "step": 20840 }, { "epoch": 0.23, "learning_rate": 4.615315055954174e-05, "loss": 0.7548, "step": 20845 }, { "epoch": 0.23, "learning_rate": 4.615222783240323e-05, "loss": 0.856, "step": 20850 }, { "epoch": 0.23, "learning_rate": 4.6151305105264715e-05, "loss": 0.8147, "step": 20855 }, { "epoch": 0.23, "learning_rate": 4.61503823781262e-05, "loss": 0.7656, "step": 20860 }, { "epoch": 0.23, "learning_rate": 4.614945965098769e-05, "loss": 0.8604, "step": 20865 }, { "epoch": 0.23, "learning_rate": 4.614853692384918e-05, "loss": 0.8303, "step": 20870 }, { "epoch": 0.23, "learning_rate": 4.6147614196710666e-05, "loss": 0.8169, "step": 20875 }, { "epoch": 0.23, "learning_rate": 4.6146691469572154e-05, "loss": 0.8057, "step": 20880 }, { "epoch": 0.23, "learning_rate": 4.6145768742433635e-05, "loss": 0.8187, "step": 20885 }, { "epoch": 0.23, "learning_rate": 4.614484601529513e-05, "loss": 0.8265, "step": 20890 }, { "epoch": 0.23, "learning_rate": 4.614392328815662e-05, "loss": 0.871, "step": 20895 }, { "epoch": 0.23, "learning_rate": 4.61430005610181e-05, "loss": 0.7996, "step": 20900 }, { "epoch": 0.23, "learning_rate": 4.6142077833879586e-05, "loss": 0.818, "step": 20905 }, { "epoch": 0.23, "learning_rate": 4.614115510674108e-05, "loss": 0.8201, "step": 20910 }, { "epoch": 0.23, "learning_rate": 4.614023237960257e-05, "loss": 0.7368, "step": 20915 }, { "epoch": 0.23, "learning_rate": 4.613930965246405e-05, "loss": 0.8071, "step": 20920 }, { "epoch": 0.23, "learning_rate": 4.613838692532554e-05, "loss": 0.8469, "step": 20925 }, { "epoch": 0.23, "learning_rate": 4.613746419818703e-05, "loss": 0.7665, "step": 20930 }, { "epoch": 0.23, "learning_rate": 4.6136541471048513e-05, "loss": 0.8123, "step": 20935 }, { "epoch": 0.23, "learning_rate": 4.613561874391e-05, "loss": 0.7428, "step": 20940 }, { "epoch": 0.23, "learning_rate": 4.613469601677149e-05, "loss": 0.8526, "step": 20945 }, { "epoch": 0.23, "learning_rate": 4.613377328963298e-05, "loss": 0.8078, "step": 20950 }, { "epoch": 0.23, "learning_rate": 4.6132850562494465e-05, "loss": 0.7804, "step": 20955 }, { "epoch": 0.23, "learning_rate": 4.613192783535595e-05, "loss": 0.8253, "step": 20960 }, { "epoch": 0.23, "learning_rate": 4.613100510821744e-05, "loss": 0.7759, "step": 20965 }, { "epoch": 0.23, "learning_rate": 4.613008238107893e-05, "loss": 0.8687, "step": 20970 }, { "epoch": 0.23, "learning_rate": 4.6129159653940416e-05, "loss": 0.8231, "step": 20975 }, { "epoch": 0.23, "learning_rate": 4.6128236926801904e-05, "loss": 0.7818, "step": 20980 }, { "epoch": 0.23, "learning_rate": 4.612731419966339e-05, "loss": 0.8406, "step": 20985 }, { "epoch": 0.23, "learning_rate": 4.612639147252488e-05, "loss": 0.8308, "step": 20990 }, { "epoch": 0.23, "learning_rate": 4.612546874538637e-05, "loss": 0.8801, "step": 20995 }, { "epoch": 0.23, "learning_rate": 4.6124546018247856e-05, "loss": 0.811, "step": 21000 }, { "epoch": 0.23, "eval_loss": 0.784591555595398, "eval_runtime": 69.3636, "eval_samples_per_second": 28.834, "eval_steps_per_second": 14.417, "step": 21000 }, { "epoch": 0.23, "learning_rate": 4.612362329110934e-05, "loss": 0.7743, "step": 21005 }, { "epoch": 0.23, "learning_rate": 4.6122700563970824e-05, "loss": 0.8391, "step": 21010 }, { "epoch": 0.23, "learning_rate": 4.612177783683232e-05, "loss": 0.7449, "step": 21015 }, { "epoch": 0.23, "learning_rate": 4.612085510969381e-05, "loss": 0.7866, "step": 21020 }, { "epoch": 0.23, "learning_rate": 4.6119932382555295e-05, "loss": 0.7869, "step": 21025 }, { "epoch": 0.23, "learning_rate": 4.6119009655416776e-05, "loss": 0.7854, "step": 21030 }, { "epoch": 0.23, "learning_rate": 4.6118086928278264e-05, "loss": 0.7841, "step": 21035 }, { "epoch": 0.23, "learning_rate": 4.611716420113976e-05, "loss": 0.7701, "step": 21040 }, { "epoch": 0.23, "learning_rate": 4.611624147400124e-05, "loss": 0.8006, "step": 21045 }, { "epoch": 0.23, "learning_rate": 4.611531874686273e-05, "loss": 0.8033, "step": 21050 }, { "epoch": 0.23, "learning_rate": 4.6114396019724215e-05, "loss": 0.7935, "step": 21055 }, { "epoch": 0.23, "learning_rate": 4.611347329258571e-05, "loss": 0.7782, "step": 21060 }, { "epoch": 0.23, "learning_rate": 4.611255056544719e-05, "loss": 0.7525, "step": 21065 }, { "epoch": 0.23, "learning_rate": 4.611162783830868e-05, "loss": 0.7961, "step": 21070 }, { "epoch": 0.23, "learning_rate": 4.6110705111170166e-05, "loss": 0.8281, "step": 21075 }, { "epoch": 0.23, "learning_rate": 4.610978238403166e-05, "loss": 0.823, "step": 21080 }, { "epoch": 0.23, "learning_rate": 4.610885965689314e-05, "loss": 0.7867, "step": 21085 }, { "epoch": 0.23, "learning_rate": 4.610793692975463e-05, "loss": 0.8673, "step": 21090 }, { "epoch": 0.23, "learning_rate": 4.610701420261612e-05, "loss": 0.7712, "step": 21095 }, { "epoch": 0.23, "learning_rate": 4.6106091475477606e-05, "loss": 0.7797, "step": 21100 }, { "epoch": 0.23, "learning_rate": 4.6105168748339094e-05, "loss": 0.7978, "step": 21105 }, { "epoch": 0.23, "learning_rate": 4.610424602120058e-05, "loss": 0.7796, "step": 21110 }, { "epoch": 0.23, "learning_rate": 4.610332329406207e-05, "loss": 0.8382, "step": 21115 }, { "epoch": 0.23, "learning_rate": 4.610240056692355e-05, "loss": 0.7775, "step": 21120 }, { "epoch": 0.23, "learning_rate": 4.6101477839785045e-05, "loss": 0.8617, "step": 21125 }, { "epoch": 0.23, "learning_rate": 4.610055511264653e-05, "loss": 0.7802, "step": 21130 }, { "epoch": 0.23, "learning_rate": 4.609963238550802e-05, "loss": 0.84, "step": 21135 }, { "epoch": 0.23, "learning_rate": 4.60987096583695e-05, "loss": 0.8567, "step": 21140 }, { "epoch": 0.23, "learning_rate": 4.6097786931230996e-05, "loss": 0.7647, "step": 21145 }, { "epoch": 0.23, "learning_rate": 4.6096864204092484e-05, "loss": 0.7575, "step": 21150 }, { "epoch": 0.23, "learning_rate": 4.609594147695397e-05, "loss": 0.7771, "step": 21155 }, { "epoch": 0.23, "learning_rate": 4.609501874981545e-05, "loss": 0.8223, "step": 21160 }, { "epoch": 0.23, "learning_rate": 4.609409602267695e-05, "loss": 0.7537, "step": 21165 }, { "epoch": 0.23, "learning_rate": 4.6093173295538436e-05, "loss": 0.7939, "step": 21170 }, { "epoch": 0.23, "learning_rate": 4.609225056839992e-05, "loss": 0.837, "step": 21175 }, { "epoch": 0.23, "learning_rate": 4.6091327841261405e-05, "loss": 0.7984, "step": 21180 }, { "epoch": 0.23, "learning_rate": 4.609040511412289e-05, "loss": 0.8018, "step": 21185 }, { "epoch": 0.23, "learning_rate": 4.608948238698439e-05, "loss": 0.7992, "step": 21190 }, { "epoch": 0.23, "learning_rate": 4.608855965984587e-05, "loss": 0.8008, "step": 21195 }, { "epoch": 0.23, "learning_rate": 4.6087636932707356e-05, "loss": 0.8227, "step": 21200 }, { "epoch": 0.23, "learning_rate": 4.6086714205568844e-05, "loss": 0.8103, "step": 21205 }, { "epoch": 0.23, "learning_rate": 4.608579147843033e-05, "loss": 0.7979, "step": 21210 }, { "epoch": 0.23, "learning_rate": 4.608486875129182e-05, "loss": 0.7701, "step": 21215 }, { "epoch": 0.23, "learning_rate": 4.608394602415331e-05, "loss": 0.781, "step": 21220 }, { "epoch": 0.24, "learning_rate": 4.6083023297014795e-05, "loss": 0.8327, "step": 21225 }, { "epoch": 0.24, "learning_rate": 4.608210056987628e-05, "loss": 0.8222, "step": 21230 }, { "epoch": 0.24, "learning_rate": 4.608117784273777e-05, "loss": 0.7914, "step": 21235 }, { "epoch": 0.24, "learning_rate": 4.608025511559926e-05, "loss": 0.816, "step": 21240 }, { "epoch": 0.24, "learning_rate": 4.6079332388460747e-05, "loss": 0.8215, "step": 21245 }, { "epoch": 0.24, "learning_rate": 4.6078409661322234e-05, "loss": 0.8275, "step": 21250 }, { "epoch": 0.24, "learning_rate": 4.607748693418372e-05, "loss": 0.7904, "step": 21255 }, { "epoch": 0.24, "learning_rate": 4.607656420704521e-05, "loss": 0.8411, "step": 21260 }, { "epoch": 0.24, "learning_rate": 4.60756414799067e-05, "loss": 0.8522, "step": 21265 }, { "epoch": 0.24, "learning_rate": 4.607471875276818e-05, "loss": 0.8354, "step": 21270 }, { "epoch": 0.24, "learning_rate": 4.6073796025629674e-05, "loss": 0.8123, "step": 21275 }, { "epoch": 0.24, "learning_rate": 4.607287329849116e-05, "loss": 0.8066, "step": 21280 }, { "epoch": 0.24, "learning_rate": 4.607195057135264e-05, "loss": 0.8308, "step": 21285 }, { "epoch": 0.24, "learning_rate": 4.607102784421413e-05, "loss": 0.8565, "step": 21290 }, { "epoch": 0.24, "learning_rate": 4.6070105117075625e-05, "loss": 0.8173, "step": 21295 }, { "epoch": 0.24, "learning_rate": 4.606918238993711e-05, "loss": 0.7739, "step": 21300 }, { "epoch": 0.24, "learning_rate": 4.6068259662798594e-05, "loss": 0.825, "step": 21305 }, { "epoch": 0.24, "learning_rate": 4.606733693566008e-05, "loss": 0.7653, "step": 21310 }, { "epoch": 0.24, "learning_rate": 4.6066414208521576e-05, "loss": 0.7658, "step": 21315 }, { "epoch": 0.24, "learning_rate": 4.606549148138306e-05, "loss": 0.7611, "step": 21320 }, { "epoch": 0.24, "learning_rate": 4.6064568754244545e-05, "loss": 0.7718, "step": 21325 }, { "epoch": 0.24, "learning_rate": 4.606364602710603e-05, "loss": 0.8352, "step": 21330 }, { "epoch": 0.24, "learning_rate": 4.606272329996752e-05, "loss": 0.834, "step": 21335 }, { "epoch": 0.24, "learning_rate": 4.606180057282901e-05, "loss": 0.7973, "step": 21340 }, { "epoch": 0.24, "learning_rate": 4.60608778456905e-05, "loss": 0.8621, "step": 21345 }, { "epoch": 0.24, "learning_rate": 4.6059955118551985e-05, "loss": 0.7661, "step": 21350 }, { "epoch": 0.24, "learning_rate": 4.605903239141347e-05, "loss": 0.7575, "step": 21355 }, { "epoch": 0.24, "learning_rate": 4.605810966427496e-05, "loss": 0.727, "step": 21360 }, { "epoch": 0.24, "learning_rate": 4.605718693713645e-05, "loss": 0.8247, "step": 21365 }, { "epoch": 0.24, "learning_rate": 4.6056264209997936e-05, "loss": 0.8243, "step": 21370 }, { "epoch": 0.24, "learning_rate": 4.6055341482859424e-05, "loss": 0.8057, "step": 21375 }, { "epoch": 0.24, "learning_rate": 4.605441875572091e-05, "loss": 0.7702, "step": 21380 }, { "epoch": 0.24, "learning_rate": 4.60534960285824e-05, "loss": 0.8131, "step": 21385 }, { "epoch": 0.24, "learning_rate": 4.605257330144389e-05, "loss": 0.832, "step": 21390 }, { "epoch": 0.24, "learning_rate": 4.605165057430537e-05, "loss": 0.8356, "step": 21395 }, { "epoch": 0.24, "learning_rate": 4.605072784716686e-05, "loss": 0.7448, "step": 21400 }, { "epoch": 0.24, "learning_rate": 4.604980512002835e-05, "loss": 0.7793, "step": 21405 }, { "epoch": 0.24, "learning_rate": 4.604888239288984e-05, "loss": 0.7734, "step": 21410 }, { "epoch": 0.24, "learning_rate": 4.604795966575132e-05, "loss": 0.7912, "step": 21415 }, { "epoch": 0.24, "learning_rate": 4.604703693861281e-05, "loss": 0.7587, "step": 21420 }, { "epoch": 0.24, "learning_rate": 4.60461142114743e-05, "loss": 0.8104, "step": 21425 }, { "epoch": 0.24, "learning_rate": 4.6045191484335783e-05, "loss": 0.8841, "step": 21430 }, { "epoch": 0.24, "learning_rate": 4.604426875719727e-05, "loss": 0.8711, "step": 21435 }, { "epoch": 0.24, "learning_rate": 4.604334603005876e-05, "loss": 0.7839, "step": 21440 }, { "epoch": 0.24, "learning_rate": 4.6042423302920254e-05, "loss": 0.8142, "step": 21445 }, { "epoch": 0.24, "learning_rate": 4.6041500575781735e-05, "loss": 0.7965, "step": 21450 }, { "epoch": 0.24, "learning_rate": 4.604057784864322e-05, "loss": 0.8002, "step": 21455 }, { "epoch": 0.24, "learning_rate": 4.603965512150471e-05, "loss": 0.8463, "step": 21460 }, { "epoch": 0.24, "learning_rate": 4.6038732394366205e-05, "loss": 0.8263, "step": 21465 }, { "epoch": 0.24, "learning_rate": 4.6037809667227686e-05, "loss": 0.8508, "step": 21470 }, { "epoch": 0.24, "learning_rate": 4.6036886940089174e-05, "loss": 0.8641, "step": 21475 }, { "epoch": 0.24, "learning_rate": 4.603596421295066e-05, "loss": 0.8119, "step": 21480 }, { "epoch": 0.24, "learning_rate": 4.603504148581215e-05, "loss": 0.7674, "step": 21485 }, { "epoch": 0.24, "learning_rate": 4.603411875867364e-05, "loss": 0.7792, "step": 21490 }, { "epoch": 0.24, "learning_rate": 4.6033196031535125e-05, "loss": 0.8077, "step": 21495 }, { "epoch": 0.24, "learning_rate": 4.603227330439661e-05, "loss": 0.841, "step": 21500 }, { "epoch": 0.24, "learning_rate": 4.6031350577258094e-05, "loss": 0.7968, "step": 21505 }, { "epoch": 0.24, "learning_rate": 4.603042785011959e-05, "loss": 0.7653, "step": 21510 }, { "epoch": 0.24, "learning_rate": 4.602950512298108e-05, "loss": 0.7661, "step": 21515 }, { "epoch": 0.24, "learning_rate": 4.6028582395842565e-05, "loss": 0.8533, "step": 21520 }, { "epoch": 0.24, "learning_rate": 4.6027659668704046e-05, "loss": 0.8456, "step": 21525 }, { "epoch": 0.24, "learning_rate": 4.602673694156554e-05, "loss": 0.8319, "step": 21530 }, { "epoch": 0.24, "learning_rate": 4.602581421442703e-05, "loss": 0.7915, "step": 21535 }, { "epoch": 0.24, "learning_rate": 4.6024891487288516e-05, "loss": 0.8134, "step": 21540 }, { "epoch": 0.24, "learning_rate": 4.602396876015e-05, "loss": 0.7406, "step": 21545 }, { "epoch": 0.24, "learning_rate": 4.602304603301149e-05, "loss": 0.8326, "step": 21550 }, { "epoch": 0.24, "learning_rate": 4.602212330587298e-05, "loss": 0.8174, "step": 21555 }, { "epoch": 0.24, "learning_rate": 4.602120057873446e-05, "loss": 0.7791, "step": 21560 }, { "epoch": 0.24, "learning_rate": 4.602027785159595e-05, "loss": 0.7922, "step": 21565 }, { "epoch": 0.24, "learning_rate": 4.6019355124457436e-05, "loss": 0.8457, "step": 21570 }, { "epoch": 0.24, "learning_rate": 4.601843239731893e-05, "loss": 0.7718, "step": 21575 }, { "epoch": 0.24, "learning_rate": 4.601750967018041e-05, "loss": 0.7978, "step": 21580 }, { "epoch": 0.24, "learning_rate": 4.60165869430419e-05, "loss": 0.7471, "step": 21585 }, { "epoch": 0.24, "learning_rate": 4.601566421590339e-05, "loss": 0.7534, "step": 21590 }, { "epoch": 0.24, "learning_rate": 4.6014741488764876e-05, "loss": 0.7923, "step": 21595 }, { "epoch": 0.24, "learning_rate": 4.6013818761626363e-05, "loss": 0.7874, "step": 21600 }, { "epoch": 0.24, "learning_rate": 4.601289603448785e-05, "loss": 0.7777, "step": 21605 }, { "epoch": 0.24, "learning_rate": 4.601197330734934e-05, "loss": 0.7389, "step": 21610 }, { "epoch": 0.24, "learning_rate": 4.601105058021083e-05, "loss": 0.7902, "step": 21615 }, { "epoch": 0.24, "learning_rate": 4.6010127853072315e-05, "loss": 0.7697, "step": 21620 }, { "epoch": 0.24, "learning_rate": 4.60092051259338e-05, "loss": 0.8476, "step": 21625 }, { "epoch": 0.24, "learning_rate": 4.600828239879529e-05, "loss": 0.7668, "step": 21630 }, { "epoch": 0.24, "learning_rate": 4.600735967165677e-05, "loss": 0.7949, "step": 21635 }, { "epoch": 0.24, "learning_rate": 4.6006436944518266e-05, "loss": 0.7373, "step": 21640 }, { "epoch": 0.24, "learning_rate": 4.6005514217379754e-05, "loss": 0.8122, "step": 21645 }, { "epoch": 0.24, "learning_rate": 4.600459149024124e-05, "loss": 0.772, "step": 21650 }, { "epoch": 0.24, "learning_rate": 4.600366876310272e-05, "loss": 0.8006, "step": 21655 }, { "epoch": 0.24, "learning_rate": 4.600274603596422e-05, "loss": 0.7217, "step": 21660 }, { "epoch": 0.24, "learning_rate": 4.6001823308825706e-05, "loss": 0.8059, "step": 21665 }, { "epoch": 0.24, "learning_rate": 4.6000900581687187e-05, "loss": 0.7893, "step": 21670 }, { "epoch": 0.24, "learning_rate": 4.5999977854548674e-05, "loss": 0.7857, "step": 21675 }, { "epoch": 0.24, "learning_rate": 4.599905512741017e-05, "loss": 0.7977, "step": 21680 }, { "epoch": 0.24, "learning_rate": 4.599813240027166e-05, "loss": 0.7655, "step": 21685 }, { "epoch": 0.24, "learning_rate": 4.599720967313314e-05, "loss": 0.8432, "step": 21690 }, { "epoch": 0.24, "learning_rate": 4.5996286945994626e-05, "loss": 0.8164, "step": 21695 }, { "epoch": 0.24, "learning_rate": 4.599536421885612e-05, "loss": 0.7649, "step": 21700 }, { "epoch": 0.24, "learning_rate": 4.59944414917176e-05, "loss": 0.7735, "step": 21705 }, { "epoch": 0.24, "learning_rate": 4.599351876457909e-05, "loss": 0.8438, "step": 21710 }, { "epoch": 0.24, "learning_rate": 4.599259603744058e-05, "loss": 0.8409, "step": 21715 }, { "epoch": 0.24, "learning_rate": 4.5991673310302065e-05, "loss": 0.8002, "step": 21720 }, { "epoch": 0.24, "learning_rate": 4.599075058316355e-05, "loss": 0.673, "step": 21725 }, { "epoch": 0.24, "learning_rate": 4.598982785602504e-05, "loss": 0.8193, "step": 21730 }, { "epoch": 0.24, "learning_rate": 4.598890512888653e-05, "loss": 0.8327, "step": 21735 }, { "epoch": 0.24, "learning_rate": 4.5987982401748016e-05, "loss": 0.7142, "step": 21740 }, { "epoch": 0.24, "learning_rate": 4.5987059674609504e-05, "loss": 0.9008, "step": 21745 }, { "epoch": 0.24, "learning_rate": 4.598613694747099e-05, "loss": 0.7689, "step": 21750 }, { "epoch": 0.24, "learning_rate": 4.598521422033248e-05, "loss": 0.8167, "step": 21755 }, { "epoch": 0.24, "learning_rate": 4.598429149319397e-05, "loss": 0.7684, "step": 21760 }, { "epoch": 0.24, "learning_rate": 4.5983368766055456e-05, "loss": 0.8581, "step": 21765 }, { "epoch": 0.24, "learning_rate": 4.5982446038916944e-05, "loss": 0.8187, "step": 21770 }, { "epoch": 0.24, "learning_rate": 4.598152331177843e-05, "loss": 0.8115, "step": 21775 }, { "epoch": 0.24, "learning_rate": 4.598060058463991e-05, "loss": 0.7952, "step": 21780 }, { "epoch": 0.24, "learning_rate": 4.59796778575014e-05, "loss": 0.8026, "step": 21785 }, { "epoch": 0.24, "learning_rate": 4.5978755130362895e-05, "loss": 0.7519, "step": 21790 }, { "epoch": 0.24, "learning_rate": 4.597783240322438e-05, "loss": 0.7651, "step": 21795 }, { "epoch": 0.24, "learning_rate": 4.5976909676085864e-05, "loss": 0.8163, "step": 21800 }, { "epoch": 0.24, "learning_rate": 4.597598694894735e-05, "loss": 0.916, "step": 21805 }, { "epoch": 0.24, "learning_rate": 4.5975064221808846e-05, "loss": 0.7763, "step": 21810 }, { "epoch": 0.24, "learning_rate": 4.5974141494670334e-05, "loss": 0.8724, "step": 21815 }, { "epoch": 0.24, "learning_rate": 4.5973218767531815e-05, "loss": 0.7955, "step": 21820 }, { "epoch": 0.24, "learning_rate": 4.59722960403933e-05, "loss": 0.8204, "step": 21825 }, { "epoch": 0.24, "learning_rate": 4.59713733132548e-05, "loss": 0.8113, "step": 21830 }, { "epoch": 0.24, "learning_rate": 4.597045058611628e-05, "loss": 0.8026, "step": 21835 }, { "epoch": 0.24, "learning_rate": 4.596952785897777e-05, "loss": 0.7453, "step": 21840 }, { "epoch": 0.24, "learning_rate": 4.5968605131839255e-05, "loss": 0.8076, "step": 21845 }, { "epoch": 0.24, "learning_rate": 4.596768240470075e-05, "loss": 0.7819, "step": 21850 }, { "epoch": 0.24, "learning_rate": 4.596675967756223e-05, "loss": 0.7798, "step": 21855 }, { "epoch": 0.24, "learning_rate": 4.596583695042372e-05, "loss": 0.8168, "step": 21860 }, { "epoch": 0.24, "learning_rate": 4.5964914223285206e-05, "loss": 0.7653, "step": 21865 }, { "epoch": 0.24, "learning_rate": 4.5963991496146694e-05, "loss": 0.7837, "step": 21870 }, { "epoch": 0.24, "learning_rate": 4.596306876900818e-05, "loss": 0.7778, "step": 21875 }, { "epoch": 0.24, "learning_rate": 4.596214604186967e-05, "loss": 0.7878, "step": 21880 }, { "epoch": 0.24, "learning_rate": 4.596122331473116e-05, "loss": 0.8264, "step": 21885 }, { "epoch": 0.24, "learning_rate": 4.5960300587592645e-05, "loss": 0.8219, "step": 21890 }, { "epoch": 0.24, "learning_rate": 4.595937786045413e-05, "loss": 0.798, "step": 21895 }, { "epoch": 0.24, "learning_rate": 4.595845513331562e-05, "loss": 0.757, "step": 21900 }, { "epoch": 0.24, "learning_rate": 4.595753240617711e-05, "loss": 0.7716, "step": 21905 }, { "epoch": 0.24, "learning_rate": 4.595660967903859e-05, "loss": 0.8593, "step": 21910 }, { "epoch": 0.24, "learning_rate": 4.5955686951900084e-05, "loss": 0.836, "step": 21915 }, { "epoch": 0.24, "learning_rate": 4.595476422476157e-05, "loss": 0.7564, "step": 21920 }, { "epoch": 0.24, "learning_rate": 4.595384149762306e-05, "loss": 0.8478, "step": 21925 }, { "epoch": 0.24, "learning_rate": 4.595291877048454e-05, "loss": 0.7964, "step": 21930 }, { "epoch": 0.24, "learning_rate": 4.5951996043346036e-05, "loss": 0.8089, "step": 21935 }, { "epoch": 0.24, "learning_rate": 4.5951073316207524e-05, "loss": 0.8207, "step": 21940 }, { "epoch": 0.24, "learning_rate": 4.5950150589069005e-05, "loss": 0.8131, "step": 21945 }, { "epoch": 0.24, "learning_rate": 4.594922786193049e-05, "loss": 0.7743, "step": 21950 }, { "epoch": 0.24, "learning_rate": 4.594830513479198e-05, "loss": 0.8066, "step": 21955 }, { "epoch": 0.24, "learning_rate": 4.5947382407653475e-05, "loss": 0.8415, "step": 21960 }, { "epoch": 0.24, "learning_rate": 4.5946459680514956e-05, "loss": 0.7518, "step": 21965 }, { "epoch": 0.24, "learning_rate": 4.5945536953376444e-05, "loss": 0.8162, "step": 21970 }, { "epoch": 0.24, "learning_rate": 4.594461422623793e-05, "loss": 0.8055, "step": 21975 }, { "epoch": 0.24, "learning_rate": 4.594369149909942e-05, "loss": 0.803, "step": 21980 }, { "epoch": 0.24, "learning_rate": 4.594276877196091e-05, "loss": 0.818, "step": 21985 }, { "epoch": 0.24, "learning_rate": 4.5941846044822395e-05, "loss": 0.8417, "step": 21990 }, { "epoch": 0.24, "learning_rate": 4.594092331768388e-05, "loss": 0.772, "step": 21995 }, { "epoch": 0.24, "learning_rate": 4.594000059054537e-05, "loss": 0.7762, "step": 22000 }, { "epoch": 0.24, "eval_loss": 0.7753146886825562, "eval_runtime": 69.262, "eval_samples_per_second": 28.876, "eval_steps_per_second": 14.438, "step": 22000 }, { "epoch": 0.24, "learning_rate": 4.593907786340686e-05, "loss": 0.8635, "step": 22005 }, { "epoch": 0.24, "learning_rate": 4.593815513626835e-05, "loss": 0.8011, "step": 22010 }, { "epoch": 0.24, "learning_rate": 4.5937232409129835e-05, "loss": 0.7675, "step": 22015 }, { "epoch": 0.24, "learning_rate": 4.5936309681991316e-05, "loss": 0.8212, "step": 22020 }, { "epoch": 0.24, "learning_rate": 4.593538695485281e-05, "loss": 0.7824, "step": 22025 }, { "epoch": 0.24, "learning_rate": 4.59344642277143e-05, "loss": 0.8269, "step": 22030 }, { "epoch": 0.24, "learning_rate": 4.5933541500575786e-05, "loss": 0.7811, "step": 22035 }, { "epoch": 0.24, "learning_rate": 4.593261877343727e-05, "loss": 0.8244, "step": 22040 }, { "epoch": 0.24, "learning_rate": 4.593169604629876e-05, "loss": 0.7822, "step": 22045 }, { "epoch": 0.24, "learning_rate": 4.593077331916025e-05, "loss": 0.7639, "step": 22050 }, { "epoch": 0.24, "learning_rate": 4.592985059202173e-05, "loss": 0.7846, "step": 22055 }, { "epoch": 0.24, "learning_rate": 4.592892786488322e-05, "loss": 0.7426, "step": 22060 }, { "epoch": 0.24, "learning_rate": 4.592800513774471e-05, "loss": 0.8933, "step": 22065 }, { "epoch": 0.24, "learning_rate": 4.59270824106062e-05, "loss": 0.7455, "step": 22070 }, { "epoch": 0.24, "learning_rate": 4.592615968346768e-05, "loss": 0.8093, "step": 22075 }, { "epoch": 0.24, "learning_rate": 4.592523695632917e-05, "loss": 0.8077, "step": 22080 }, { "epoch": 0.24, "learning_rate": 4.5924314229190664e-05, "loss": 0.8227, "step": 22085 }, { "epoch": 0.24, "learning_rate": 4.5923391502052146e-05, "loss": 0.7767, "step": 22090 }, { "epoch": 0.24, "learning_rate": 4.5922468774913633e-05, "loss": 0.787, "step": 22095 }, { "epoch": 0.24, "learning_rate": 4.592154604777512e-05, "loss": 0.8333, "step": 22100 }, { "epoch": 0.24, "learning_rate": 4.592062332063661e-05, "loss": 0.7869, "step": 22105 }, { "epoch": 0.24, "learning_rate": 4.59197005934981e-05, "loss": 0.7935, "step": 22110 }, { "epoch": 0.24, "learning_rate": 4.5918777866359585e-05, "loss": 0.7846, "step": 22115 }, { "epoch": 0.24, "learning_rate": 4.591785513922107e-05, "loss": 0.8042, "step": 22120 }, { "epoch": 0.24, "learning_rate": 4.591693241208256e-05, "loss": 0.8172, "step": 22125 }, { "epoch": 0.25, "learning_rate": 4.591600968494405e-05, "loss": 0.787, "step": 22130 }, { "epoch": 0.25, "learning_rate": 4.5915086957805536e-05, "loss": 0.8132, "step": 22135 }, { "epoch": 0.25, "learning_rate": 4.5914164230667024e-05, "loss": 0.7789, "step": 22140 }, { "epoch": 0.25, "learning_rate": 4.591324150352851e-05, "loss": 0.8323, "step": 22145 }, { "epoch": 0.25, "learning_rate": 4.591231877639e-05, "loss": 0.7721, "step": 22150 }, { "epoch": 0.25, "learning_rate": 4.591139604925149e-05, "loss": 0.7488, "step": 22155 }, { "epoch": 0.25, "learning_rate": 4.5910473322112975e-05, "loss": 0.8246, "step": 22160 }, { "epoch": 0.25, "learning_rate": 4.5909550594974457e-05, "loss": 0.7799, "step": 22165 }, { "epoch": 0.25, "learning_rate": 4.5908627867835944e-05, "loss": 0.7261, "step": 22170 }, { "epoch": 0.25, "learning_rate": 4.590770514069744e-05, "loss": 0.7549, "step": 22175 }, { "epoch": 0.25, "learning_rate": 4.590678241355893e-05, "loss": 0.8425, "step": 22180 }, { "epoch": 0.25, "learning_rate": 4.590585968642041e-05, "loss": 0.8014, "step": 22185 }, { "epoch": 0.25, "learning_rate": 4.5904936959281896e-05, "loss": 0.8596, "step": 22190 }, { "epoch": 0.25, "learning_rate": 4.590401423214339e-05, "loss": 0.7626, "step": 22195 }, { "epoch": 0.25, "learning_rate": 4.590309150500488e-05, "loss": 0.8078, "step": 22200 }, { "epoch": 0.25, "learning_rate": 4.590216877786636e-05, "loss": 0.7846, "step": 22205 }, { "epoch": 0.25, "learning_rate": 4.590124605072785e-05, "loss": 0.7829, "step": 22210 }, { "epoch": 0.25, "learning_rate": 4.590032332358934e-05, "loss": 0.8301, "step": 22215 }, { "epoch": 0.25, "learning_rate": 4.589940059645082e-05, "loss": 0.8003, "step": 22220 }, { "epoch": 0.25, "learning_rate": 4.589847786931231e-05, "loss": 0.8146, "step": 22225 }, { "epoch": 0.25, "learning_rate": 4.58975551421738e-05, "loss": 0.7845, "step": 22230 }, { "epoch": 0.25, "learning_rate": 4.589663241503529e-05, "loss": 0.7893, "step": 22235 }, { "epoch": 0.25, "learning_rate": 4.5895709687896774e-05, "loss": 0.7671, "step": 22240 }, { "epoch": 0.25, "learning_rate": 4.589478696075826e-05, "loss": 0.8515, "step": 22245 }, { "epoch": 0.25, "learning_rate": 4.589386423361975e-05, "loss": 0.8211, "step": 22250 }, { "epoch": 0.25, "learning_rate": 4.589294150648124e-05, "loss": 0.8112, "step": 22255 }, { "epoch": 0.25, "learning_rate": 4.5892018779342726e-05, "loss": 0.8529, "step": 22260 }, { "epoch": 0.25, "learning_rate": 4.5891096052204213e-05, "loss": 0.7784, "step": 22265 }, { "epoch": 0.25, "learning_rate": 4.58901733250657e-05, "loss": 0.8211, "step": 22270 }, { "epoch": 0.25, "learning_rate": 4.588925059792719e-05, "loss": 0.8679, "step": 22275 }, { "epoch": 0.25, "learning_rate": 4.588832787078868e-05, "loss": 0.8187, "step": 22280 }, { "epoch": 0.25, "learning_rate": 4.5887405143650165e-05, "loss": 0.8332, "step": 22285 }, { "epoch": 0.25, "learning_rate": 4.588648241651165e-05, "loss": 0.8766, "step": 22290 }, { "epoch": 0.25, "learning_rate": 4.5885559689373134e-05, "loss": 0.7953, "step": 22295 }, { "epoch": 0.25, "learning_rate": 4.588463696223463e-05, "loss": 0.7655, "step": 22300 }, { "epoch": 0.25, "learning_rate": 4.5883714235096116e-05, "loss": 0.7699, "step": 22305 }, { "epoch": 0.25, "learning_rate": 4.5882791507957604e-05, "loss": 0.7461, "step": 22310 }, { "epoch": 0.25, "learning_rate": 4.5881868780819085e-05, "loss": 0.8366, "step": 22315 }, { "epoch": 0.25, "learning_rate": 4.588094605368057e-05, "loss": 0.8219, "step": 22320 }, { "epoch": 0.25, "learning_rate": 4.588002332654207e-05, "loss": 0.8155, "step": 22325 }, { "epoch": 0.25, "learning_rate": 4.587910059940355e-05, "loss": 0.8104, "step": 22330 }, { "epoch": 0.25, "learning_rate": 4.5878177872265037e-05, "loss": 0.7881, "step": 22335 }, { "epoch": 0.25, "learning_rate": 4.5877255145126524e-05, "loss": 0.753, "step": 22340 }, { "epoch": 0.25, "learning_rate": 4.587633241798802e-05, "loss": 0.8622, "step": 22345 }, { "epoch": 0.25, "learning_rate": 4.58754096908495e-05, "loss": 0.8108, "step": 22350 }, { "epoch": 0.25, "learning_rate": 4.587448696371099e-05, "loss": 0.7755, "step": 22355 }, { "epoch": 0.25, "learning_rate": 4.5873564236572476e-05, "loss": 0.838, "step": 22360 }, { "epoch": 0.25, "learning_rate": 4.5872641509433964e-05, "loss": 0.8155, "step": 22365 }, { "epoch": 0.25, "learning_rate": 4.587171878229545e-05, "loss": 0.7274, "step": 22370 }, { "epoch": 0.25, "learning_rate": 4.587079605515694e-05, "loss": 0.8217, "step": 22375 }, { "epoch": 0.25, "learning_rate": 4.586987332801843e-05, "loss": 0.8257, "step": 22380 }, { "epoch": 0.25, "learning_rate": 4.5868950600879915e-05, "loss": 0.7427, "step": 22385 }, { "epoch": 0.25, "learning_rate": 4.58680278737414e-05, "loss": 0.8619, "step": 22390 }, { "epoch": 0.25, "learning_rate": 4.586710514660289e-05, "loss": 0.8465, "step": 22395 }, { "epoch": 0.25, "learning_rate": 4.586618241946438e-05, "loss": 0.829, "step": 22400 }, { "epoch": 0.25, "learning_rate": 4.586525969232586e-05, "loss": 0.7823, "step": 22405 }, { "epoch": 0.25, "learning_rate": 4.5864336965187354e-05, "loss": 0.8521, "step": 22410 }, { "epoch": 0.25, "learning_rate": 4.586341423804884e-05, "loss": 0.8086, "step": 22415 }, { "epoch": 0.25, "learning_rate": 4.586249151091033e-05, "loss": 0.8004, "step": 22420 }, { "epoch": 0.25, "learning_rate": 4.586156878377181e-05, "loss": 0.8034, "step": 22425 }, { "epoch": 0.25, "learning_rate": 4.5860646056633306e-05, "loss": 0.7856, "step": 22430 }, { "epoch": 0.25, "learning_rate": 4.5859723329494794e-05, "loss": 0.8455, "step": 22435 }, { "epoch": 0.25, "learning_rate": 4.5858800602356275e-05, "loss": 0.8203, "step": 22440 }, { "epoch": 0.25, "learning_rate": 4.585787787521776e-05, "loss": 0.793, "step": 22445 }, { "epoch": 0.25, "learning_rate": 4.585695514807926e-05, "loss": 0.7682, "step": 22450 }, { "epoch": 0.25, "learning_rate": 4.5856032420940745e-05, "loss": 0.7531, "step": 22455 }, { "epoch": 0.25, "learning_rate": 4.5855109693802226e-05, "loss": 0.7989, "step": 22460 }, { "epoch": 0.25, "learning_rate": 4.5854186966663714e-05, "loss": 0.7903, "step": 22465 }, { "epoch": 0.25, "learning_rate": 4.58532642395252e-05, "loss": 0.8244, "step": 22470 }, { "epoch": 0.25, "learning_rate": 4.585234151238669e-05, "loss": 0.8036, "step": 22475 }, { "epoch": 0.25, "learning_rate": 4.585141878524818e-05, "loss": 0.7865, "step": 22480 }, { "epoch": 0.25, "learning_rate": 4.5850496058109665e-05, "loss": 0.7967, "step": 22485 }, { "epoch": 0.25, "learning_rate": 4.584957333097115e-05, "loss": 0.789, "step": 22490 }, { "epoch": 0.25, "learning_rate": 4.584865060383264e-05, "loss": 0.7666, "step": 22495 }, { "epoch": 0.25, "learning_rate": 4.584772787669413e-05, "loss": 0.7473, "step": 22500 }, { "epoch": 0.25, "learning_rate": 4.584680514955562e-05, "loss": 0.7827, "step": 22505 }, { "epoch": 0.25, "learning_rate": 4.5845882422417105e-05, "loss": 0.8097, "step": 22510 }, { "epoch": 0.25, "learning_rate": 4.584495969527859e-05, "loss": 0.7902, "step": 22515 }, { "epoch": 0.25, "learning_rate": 4.584403696814008e-05, "loss": 0.7441, "step": 22520 }, { "epoch": 0.25, "learning_rate": 4.584311424100157e-05, "loss": 0.7682, "step": 22525 }, { "epoch": 0.25, "learning_rate": 4.5842191513863056e-05, "loss": 0.7836, "step": 22530 }, { "epoch": 0.25, "learning_rate": 4.5841268786724544e-05, "loss": 0.7836, "step": 22535 }, { "epoch": 0.25, "learning_rate": 4.584034605958603e-05, "loss": 0.8035, "step": 22540 }, { "epoch": 0.25, "learning_rate": 4.583942333244752e-05, "loss": 0.8052, "step": 22545 }, { "epoch": 0.25, "learning_rate": 4.5838500605309e-05, "loss": 0.7987, "step": 22550 }, { "epoch": 0.25, "learning_rate": 4.583757787817049e-05, "loss": 0.7589, "step": 22555 }, { "epoch": 0.25, "learning_rate": 4.583665515103198e-05, "loss": 0.8089, "step": 22560 }, { "epoch": 0.25, "learning_rate": 4.583573242389347e-05, "loss": 0.8034, "step": 22565 }, { "epoch": 0.25, "learning_rate": 4.583480969675495e-05, "loss": 0.7561, "step": 22570 }, { "epoch": 0.25, "learning_rate": 4.583388696961644e-05, "loss": 0.759, "step": 22575 }, { "epoch": 0.25, "learning_rate": 4.5832964242477934e-05, "loss": 0.7819, "step": 22580 }, { "epoch": 0.25, "learning_rate": 4.583204151533942e-05, "loss": 0.8424, "step": 22585 }, { "epoch": 0.25, "learning_rate": 4.58311187882009e-05, "loss": 0.8682, "step": 22590 }, { "epoch": 0.25, "learning_rate": 4.583019606106239e-05, "loss": 0.7578, "step": 22595 }, { "epoch": 0.25, "learning_rate": 4.5829273333923886e-05, "loss": 0.8384, "step": 22600 }, { "epoch": 0.25, "learning_rate": 4.582835060678537e-05, "loss": 0.785, "step": 22605 }, { "epoch": 0.25, "learning_rate": 4.5827427879646855e-05, "loss": 0.8192, "step": 22610 }, { "epoch": 0.25, "learning_rate": 4.582650515250834e-05, "loss": 0.763, "step": 22615 }, { "epoch": 0.25, "learning_rate": 4.582558242536983e-05, "loss": 0.8125, "step": 22620 }, { "epoch": 0.25, "learning_rate": 4.582465969823132e-05, "loss": 0.8464, "step": 22625 }, { "epoch": 0.25, "learning_rate": 4.5823736971092806e-05, "loss": 0.7569, "step": 22630 }, { "epoch": 0.25, "learning_rate": 4.5822814243954294e-05, "loss": 0.7728, "step": 22635 }, { "epoch": 0.25, "learning_rate": 4.582189151681578e-05, "loss": 0.7915, "step": 22640 }, { "epoch": 0.25, "learning_rate": 4.582096878967727e-05, "loss": 0.7629, "step": 22645 }, { "epoch": 0.25, "learning_rate": 4.582004606253876e-05, "loss": 0.8153, "step": 22650 }, { "epoch": 0.25, "learning_rate": 4.5819123335400245e-05, "loss": 0.8353, "step": 22655 }, { "epoch": 0.25, "learning_rate": 4.581820060826173e-05, "loss": 0.8207, "step": 22660 }, { "epoch": 0.25, "learning_rate": 4.581727788112322e-05, "loss": 0.7741, "step": 22665 }, { "epoch": 0.25, "learning_rate": 4.581635515398471e-05, "loss": 0.7336, "step": 22670 }, { "epoch": 0.25, "learning_rate": 4.58154324268462e-05, "loss": 0.8401, "step": 22675 }, { "epoch": 0.25, "learning_rate": 4.581450969970768e-05, "loss": 0.7846, "step": 22680 }, { "epoch": 0.25, "learning_rate": 4.581358697256917e-05, "loss": 0.7976, "step": 22685 }, { "epoch": 0.25, "learning_rate": 4.581266424543066e-05, "loss": 0.8255, "step": 22690 }, { "epoch": 0.25, "learning_rate": 4.581174151829215e-05, "loss": 0.8229, "step": 22695 }, { "epoch": 0.25, "learning_rate": 4.581081879115363e-05, "loss": 0.7876, "step": 22700 }, { "epoch": 0.25, "learning_rate": 4.580989606401512e-05, "loss": 0.7706, "step": 22705 }, { "epoch": 0.25, "learning_rate": 4.580897333687661e-05, "loss": 0.7655, "step": 22710 }, { "epoch": 0.25, "learning_rate": 4.580805060973809e-05, "loss": 0.8238, "step": 22715 }, { "epoch": 0.25, "learning_rate": 4.580712788259958e-05, "loss": 0.7928, "step": 22720 }, { "epoch": 0.25, "learning_rate": 4.580620515546107e-05, "loss": 0.8076, "step": 22725 }, { "epoch": 0.25, "learning_rate": 4.580528242832256e-05, "loss": 0.7819, "step": 22730 }, { "epoch": 0.25, "learning_rate": 4.5804359701184044e-05, "loss": 0.7867, "step": 22735 }, { "epoch": 0.25, "learning_rate": 4.580343697404553e-05, "loss": 0.7195, "step": 22740 }, { "epoch": 0.25, "learning_rate": 4.580251424690702e-05, "loss": 0.7872, "step": 22745 }, { "epoch": 0.25, "learning_rate": 4.580159151976851e-05, "loss": 0.827, "step": 22750 }, { "epoch": 0.25, "learning_rate": 4.5800668792629996e-05, "loss": 0.8045, "step": 22755 }, { "epoch": 0.25, "learning_rate": 4.5799746065491483e-05, "loss": 0.8201, "step": 22760 }, { "epoch": 0.25, "learning_rate": 4.579882333835297e-05, "loss": 0.7919, "step": 22765 }, { "epoch": 0.25, "learning_rate": 4.579790061121446e-05, "loss": 0.7944, "step": 22770 }, { "epoch": 0.25, "learning_rate": 4.579697788407595e-05, "loss": 0.7972, "step": 22775 }, { "epoch": 0.25, "learning_rate": 4.5796055156937435e-05, "loss": 0.8885, "step": 22780 }, { "epoch": 0.25, "learning_rate": 4.579513242979892e-05, "loss": 0.8123, "step": 22785 }, { "epoch": 0.25, "learning_rate": 4.5794209702660404e-05, "loss": 0.8157, "step": 22790 }, { "epoch": 0.25, "learning_rate": 4.57932869755219e-05, "loss": 0.8335, "step": 22795 }, { "epoch": 0.25, "learning_rate": 4.5792364248383386e-05, "loss": 0.7731, "step": 22800 }, { "epoch": 0.25, "learning_rate": 4.5791441521244874e-05, "loss": 0.8616, "step": 22805 }, { "epoch": 0.25, "learning_rate": 4.5790518794106355e-05, "loss": 0.7674, "step": 22810 }, { "epoch": 0.25, "learning_rate": 4.578959606696785e-05, "loss": 0.778, "step": 22815 }, { "epoch": 0.25, "learning_rate": 4.578867333982934e-05, "loss": 0.8069, "step": 22820 }, { "epoch": 0.25, "learning_rate": 4.578775061269082e-05, "loss": 0.7492, "step": 22825 }, { "epoch": 0.25, "learning_rate": 4.5786827885552307e-05, "loss": 0.7863, "step": 22830 }, { "epoch": 0.25, "learning_rate": 4.57859051584138e-05, "loss": 0.7872, "step": 22835 }, { "epoch": 0.25, "learning_rate": 4.578498243127529e-05, "loss": 0.7542, "step": 22840 }, { "epoch": 0.25, "learning_rate": 4.578405970413677e-05, "loss": 0.7493, "step": 22845 }, { "epoch": 0.25, "learning_rate": 4.578313697699826e-05, "loss": 0.8043, "step": 22850 }, { "epoch": 0.25, "learning_rate": 4.5782214249859746e-05, "loss": 0.757, "step": 22855 }, { "epoch": 0.25, "learning_rate": 4.5781291522721234e-05, "loss": 0.7506, "step": 22860 }, { "epoch": 0.25, "learning_rate": 4.578036879558272e-05, "loss": 0.82, "step": 22865 }, { "epoch": 0.25, "learning_rate": 4.577944606844421e-05, "loss": 0.8064, "step": 22870 }, { "epoch": 0.25, "learning_rate": 4.57785233413057e-05, "loss": 0.8297, "step": 22875 }, { "epoch": 0.25, "learning_rate": 4.5777600614167185e-05, "loss": 0.9088, "step": 22880 }, { "epoch": 0.25, "learning_rate": 4.577667788702867e-05, "loss": 0.7694, "step": 22885 }, { "epoch": 0.25, "learning_rate": 4.577575515989016e-05, "loss": 0.811, "step": 22890 }, { "epoch": 0.25, "learning_rate": 4.577483243275165e-05, "loss": 0.7567, "step": 22895 }, { "epoch": 0.25, "learning_rate": 4.5773909705613136e-05, "loss": 0.8051, "step": 22900 }, { "epoch": 0.25, "learning_rate": 4.5772986978474624e-05, "loss": 0.7731, "step": 22905 }, { "epoch": 0.25, "learning_rate": 4.577206425133611e-05, "loss": 0.7981, "step": 22910 }, { "epoch": 0.25, "learning_rate": 4.57711415241976e-05, "loss": 0.8786, "step": 22915 }, { "epoch": 0.25, "learning_rate": 4.577021879705909e-05, "loss": 0.7929, "step": 22920 }, { "epoch": 0.25, "learning_rate": 4.5769296069920576e-05, "loss": 0.7693, "step": 22925 }, { "epoch": 0.25, "learning_rate": 4.5768373342782063e-05, "loss": 0.8008, "step": 22930 }, { "epoch": 0.25, "learning_rate": 4.5767450615643545e-05, "loss": 0.797, "step": 22935 }, { "epoch": 0.25, "learning_rate": 4.576652788850503e-05, "loss": 0.8438, "step": 22940 }, { "epoch": 0.25, "learning_rate": 4.576560516136653e-05, "loss": 0.7833, "step": 22945 }, { "epoch": 0.25, "learning_rate": 4.5764682434228015e-05, "loss": 0.7675, "step": 22950 }, { "epoch": 0.25, "learning_rate": 4.5763759707089496e-05, "loss": 0.8064, "step": 22955 }, { "epoch": 0.25, "learning_rate": 4.5762836979950984e-05, "loss": 0.7945, "step": 22960 }, { "epoch": 0.25, "learning_rate": 4.576191425281248e-05, "loss": 0.858, "step": 22965 }, { "epoch": 0.25, "learning_rate": 4.5760991525673966e-05, "loss": 0.7574, "step": 22970 }, { "epoch": 0.25, "learning_rate": 4.576006879853545e-05, "loss": 0.7968, "step": 22975 }, { "epoch": 0.25, "learning_rate": 4.5759146071396935e-05, "loss": 0.7404, "step": 22980 }, { "epoch": 0.25, "learning_rate": 4.575822334425843e-05, "loss": 0.7718, "step": 22985 }, { "epoch": 0.25, "learning_rate": 4.575730061711991e-05, "loss": 0.7934, "step": 22990 }, { "epoch": 0.25, "learning_rate": 4.57563778899814e-05, "loss": 0.7603, "step": 22995 }, { "epoch": 0.25, "learning_rate": 4.5755455162842887e-05, "loss": 0.7778, "step": 23000 }, { "epoch": 0.25, "eval_loss": 0.7877804636955261, "eval_runtime": 69.2421, "eval_samples_per_second": 28.884, "eval_steps_per_second": 14.442, "step": 23000 }, { "epoch": 0.25, "learning_rate": 4.5754532435704374e-05, "loss": 0.8131, "step": 23005 }, { "epoch": 0.25, "learning_rate": 4.575360970856586e-05, "loss": 0.7881, "step": 23010 }, { "epoch": 0.25, "learning_rate": 4.575268698142735e-05, "loss": 0.8156, "step": 23015 }, { "epoch": 0.25, "learning_rate": 4.575176425428884e-05, "loss": 0.8357, "step": 23020 }, { "epoch": 0.25, "learning_rate": 4.5750841527150326e-05, "loss": 0.8594, "step": 23025 }, { "epoch": 0.26, "learning_rate": 4.5749918800011814e-05, "loss": 0.8027, "step": 23030 }, { "epoch": 0.26, "learning_rate": 4.57489960728733e-05, "loss": 0.7358, "step": 23035 }, { "epoch": 0.26, "learning_rate": 4.574807334573479e-05, "loss": 0.8478, "step": 23040 }, { "epoch": 0.26, "learning_rate": 4.574715061859628e-05, "loss": 0.7695, "step": 23045 }, { "epoch": 0.26, "learning_rate": 4.5746227891457765e-05, "loss": 0.7589, "step": 23050 }, { "epoch": 0.26, "learning_rate": 4.574530516431925e-05, "loss": 0.8135, "step": 23055 }, { "epoch": 0.26, "learning_rate": 4.574438243718074e-05, "loss": 0.8359, "step": 23060 }, { "epoch": 0.26, "learning_rate": 4.574345971004222e-05, "loss": 0.8365, "step": 23065 }, { "epoch": 0.26, "learning_rate": 4.5742536982903716e-05, "loss": 0.7531, "step": 23070 }, { "epoch": 0.26, "learning_rate": 4.5741614255765204e-05, "loss": 0.8271, "step": 23075 }, { "epoch": 0.26, "learning_rate": 4.574069152862669e-05, "loss": 0.764, "step": 23080 }, { "epoch": 0.26, "learning_rate": 4.573976880148817e-05, "loss": 0.7405, "step": 23085 }, { "epoch": 0.26, "learning_rate": 4.573884607434966e-05, "loss": 0.8095, "step": 23090 }, { "epoch": 0.26, "learning_rate": 4.5737923347211156e-05, "loss": 0.7954, "step": 23095 }, { "epoch": 0.26, "learning_rate": 4.573700062007264e-05, "loss": 0.7905, "step": 23100 }, { "epoch": 0.26, "learning_rate": 4.5736077892934125e-05, "loss": 0.8839, "step": 23105 }, { "epoch": 0.26, "learning_rate": 4.573515516579561e-05, "loss": 0.7684, "step": 23110 }, { "epoch": 0.26, "learning_rate": 4.573423243865711e-05, "loss": 0.7789, "step": 23115 }, { "epoch": 0.26, "learning_rate": 4.573330971151859e-05, "loss": 0.7642, "step": 23120 }, { "epoch": 0.26, "learning_rate": 4.5732386984380076e-05, "loss": 0.7731, "step": 23125 }, { "epoch": 0.26, "learning_rate": 4.5731464257241564e-05, "loss": 0.7897, "step": 23130 }, { "epoch": 0.26, "learning_rate": 4.573054153010305e-05, "loss": 0.8148, "step": 23135 }, { "epoch": 0.26, "learning_rate": 4.572961880296454e-05, "loss": 0.809, "step": 23140 }, { "epoch": 0.26, "learning_rate": 4.572869607582603e-05, "loss": 0.7851, "step": 23145 }, { "epoch": 0.26, "learning_rate": 4.5727773348687515e-05, "loss": 0.7819, "step": 23150 }, { "epoch": 0.26, "learning_rate": 4.5726850621549e-05, "loss": 0.7998, "step": 23155 }, { "epoch": 0.26, "learning_rate": 4.572592789441049e-05, "loss": 0.7858, "step": 23160 }, { "epoch": 0.26, "learning_rate": 4.572500516727198e-05, "loss": 0.7522, "step": 23165 }, { "epoch": 0.26, "learning_rate": 4.572408244013347e-05, "loss": 0.8169, "step": 23170 }, { "epoch": 0.26, "learning_rate": 4.572315971299495e-05, "loss": 0.7784, "step": 23175 }, { "epoch": 0.26, "learning_rate": 4.572223698585644e-05, "loss": 0.8586, "step": 23180 }, { "epoch": 0.26, "learning_rate": 4.572131425871793e-05, "loss": 0.7301, "step": 23185 }, { "epoch": 0.26, "learning_rate": 4.572039153157942e-05, "loss": 0.8326, "step": 23190 }, { "epoch": 0.26, "learning_rate": 4.57194688044409e-05, "loss": 0.8247, "step": 23195 }, { "epoch": 0.26, "learning_rate": 4.5718546077302394e-05, "loss": 0.7619, "step": 23200 }, { "epoch": 0.26, "learning_rate": 4.571762335016388e-05, "loss": 0.7959, "step": 23205 }, { "epoch": 0.26, "learning_rate": 4.571670062302536e-05, "loss": 0.8506, "step": 23210 }, { "epoch": 0.26, "learning_rate": 4.571577789588685e-05, "loss": 0.8768, "step": 23215 }, { "epoch": 0.26, "learning_rate": 4.5714855168748345e-05, "loss": 0.7628, "step": 23220 }, { "epoch": 0.26, "learning_rate": 4.571393244160983e-05, "loss": 0.7822, "step": 23225 }, { "epoch": 0.26, "learning_rate": 4.5713009714471314e-05, "loss": 0.7694, "step": 23230 }, { "epoch": 0.26, "learning_rate": 4.57120869873328e-05, "loss": 0.7797, "step": 23235 }, { "epoch": 0.26, "learning_rate": 4.571116426019429e-05, "loss": 0.8003, "step": 23240 }, { "epoch": 0.26, "learning_rate": 4.571024153305578e-05, "loss": 0.8048, "step": 23245 }, { "epoch": 0.26, "learning_rate": 4.5709318805917265e-05, "loss": 0.7938, "step": 23250 }, { "epoch": 0.26, "learning_rate": 4.570839607877875e-05, "loss": 0.7708, "step": 23255 }, { "epoch": 0.26, "learning_rate": 4.570747335164024e-05, "loss": 0.8227, "step": 23260 }, { "epoch": 0.26, "learning_rate": 4.570655062450173e-05, "loss": 0.8181, "step": 23265 }, { "epoch": 0.26, "learning_rate": 4.570562789736322e-05, "loss": 0.7699, "step": 23270 }, { "epoch": 0.26, "learning_rate": 4.5704705170224705e-05, "loss": 0.7876, "step": 23275 }, { "epoch": 0.26, "learning_rate": 4.570378244308619e-05, "loss": 0.7549, "step": 23280 }, { "epoch": 0.26, "learning_rate": 4.570285971594768e-05, "loss": 0.8647, "step": 23285 }, { "epoch": 0.26, "learning_rate": 4.570193698880917e-05, "loss": 0.7946, "step": 23290 }, { "epoch": 0.26, "learning_rate": 4.5701014261670656e-05, "loss": 0.8438, "step": 23295 }, { "epoch": 0.26, "learning_rate": 4.5700091534532144e-05, "loss": 0.7635, "step": 23300 }, { "epoch": 0.26, "learning_rate": 4.5699168807393625e-05, "loss": 0.8017, "step": 23305 }, { "epoch": 0.26, "learning_rate": 4.569824608025512e-05, "loss": 0.7812, "step": 23310 }, { "epoch": 0.26, "learning_rate": 4.569732335311661e-05, "loss": 0.7295, "step": 23315 }, { "epoch": 0.26, "learning_rate": 4.569640062597809e-05, "loss": 0.7665, "step": 23320 }, { "epoch": 0.26, "learning_rate": 4.5695477898839576e-05, "loss": 0.77, "step": 23325 }, { "epoch": 0.26, "learning_rate": 4.569455517170107e-05, "loss": 0.8141, "step": 23330 }, { "epoch": 0.26, "learning_rate": 4.569363244456256e-05, "loss": 0.7779, "step": 23335 }, { "epoch": 0.26, "learning_rate": 4.569270971742404e-05, "loss": 0.8018, "step": 23340 }, { "epoch": 0.26, "learning_rate": 4.569178699028553e-05, "loss": 0.8232, "step": 23345 }, { "epoch": 0.26, "learning_rate": 4.569086426314702e-05, "loss": 0.8556, "step": 23350 }, { "epoch": 0.26, "learning_rate": 4.568994153600851e-05, "loss": 0.7759, "step": 23355 }, { "epoch": 0.26, "learning_rate": 4.568901880886999e-05, "loss": 0.7855, "step": 23360 }, { "epoch": 0.26, "learning_rate": 4.568809608173148e-05, "loss": 0.7721, "step": 23365 }, { "epoch": 0.26, "learning_rate": 4.5687173354592974e-05, "loss": 0.7974, "step": 23370 }, { "epoch": 0.26, "learning_rate": 4.5686250627454455e-05, "loss": 0.7713, "step": 23375 }, { "epoch": 0.26, "learning_rate": 4.568532790031594e-05, "loss": 0.7555, "step": 23380 }, { "epoch": 0.26, "learning_rate": 4.568440517317743e-05, "loss": 0.8567, "step": 23385 }, { "epoch": 0.26, "learning_rate": 4.568348244603892e-05, "loss": 0.7682, "step": 23390 }, { "epoch": 0.26, "learning_rate": 4.5682559718900406e-05, "loss": 0.7615, "step": 23395 }, { "epoch": 0.26, "learning_rate": 4.5681636991761894e-05, "loss": 0.8274, "step": 23400 }, { "epoch": 0.26, "learning_rate": 4.568071426462338e-05, "loss": 0.8622, "step": 23405 }, { "epoch": 0.26, "learning_rate": 4.567979153748487e-05, "loss": 0.8212, "step": 23410 }, { "epoch": 0.26, "learning_rate": 4.567886881034636e-05, "loss": 0.8074, "step": 23415 }, { "epoch": 0.26, "learning_rate": 4.5677946083207846e-05, "loss": 0.8528, "step": 23420 }, { "epoch": 0.26, "learning_rate": 4.5677023356069333e-05, "loss": 0.8044, "step": 23425 }, { "epoch": 0.26, "learning_rate": 4.567610062893082e-05, "loss": 0.7281, "step": 23430 }, { "epoch": 0.26, "learning_rate": 4.567517790179231e-05, "loss": 0.7164, "step": 23435 }, { "epoch": 0.26, "learning_rate": 4.56742551746538e-05, "loss": 0.7287, "step": 23440 }, { "epoch": 0.26, "learning_rate": 4.5673332447515285e-05, "loss": 0.7863, "step": 23445 }, { "epoch": 0.26, "learning_rate": 4.5672409720376766e-05, "loss": 0.7675, "step": 23450 }, { "epoch": 0.26, "learning_rate": 4.5671486993238254e-05, "loss": 0.7801, "step": 23455 }, { "epoch": 0.26, "learning_rate": 4.567056426609975e-05, "loss": 0.7917, "step": 23460 }, { "epoch": 0.26, "learning_rate": 4.5669641538961236e-05, "loss": 0.7937, "step": 23465 }, { "epoch": 0.26, "learning_rate": 4.566871881182272e-05, "loss": 0.7409, "step": 23470 }, { "epoch": 0.26, "learning_rate": 4.5667796084684205e-05, "loss": 0.7648, "step": 23475 }, { "epoch": 0.26, "learning_rate": 4.56668733575457e-05, "loss": 0.7892, "step": 23480 }, { "epoch": 0.26, "learning_rate": 4.566595063040718e-05, "loss": 0.7921, "step": 23485 }, { "epoch": 0.26, "learning_rate": 4.566502790326867e-05, "loss": 0.8023, "step": 23490 }, { "epoch": 0.26, "learning_rate": 4.5664105176130156e-05, "loss": 0.748, "step": 23495 }, { "epoch": 0.26, "learning_rate": 4.566318244899165e-05, "loss": 0.8399, "step": 23500 }, { "epoch": 0.26, "learning_rate": 4.566225972185313e-05, "loss": 0.7757, "step": 23505 }, { "epoch": 0.26, "learning_rate": 4.566133699471462e-05, "loss": 0.8881, "step": 23510 }, { "epoch": 0.26, "learning_rate": 4.566041426757611e-05, "loss": 0.8295, "step": 23515 }, { "epoch": 0.26, "learning_rate": 4.5659491540437596e-05, "loss": 0.8097, "step": 23520 }, { "epoch": 0.26, "learning_rate": 4.5658568813299084e-05, "loss": 0.7929, "step": 23525 }, { "epoch": 0.26, "learning_rate": 4.565764608616057e-05, "loss": 0.8374, "step": 23530 }, { "epoch": 0.26, "learning_rate": 4.565672335902206e-05, "loss": 0.791, "step": 23535 }, { "epoch": 0.26, "learning_rate": 4.565580063188355e-05, "loss": 0.8144, "step": 23540 }, { "epoch": 0.26, "learning_rate": 4.5654877904745035e-05, "loss": 0.8153, "step": 23545 }, { "epoch": 0.26, "learning_rate": 4.565395517760652e-05, "loss": 0.8342, "step": 23550 }, { "epoch": 0.26, "learning_rate": 4.565303245046801e-05, "loss": 0.7604, "step": 23555 }, { "epoch": 0.26, "learning_rate": 4.565210972332949e-05, "loss": 0.8261, "step": 23560 }, { "epoch": 0.26, "learning_rate": 4.5651186996190986e-05, "loss": 0.7857, "step": 23565 }, { "epoch": 0.26, "learning_rate": 4.5650264269052474e-05, "loss": 0.8341, "step": 23570 }, { "epoch": 0.26, "learning_rate": 4.564934154191396e-05, "loss": 0.8103, "step": 23575 }, { "epoch": 0.26, "learning_rate": 4.564841881477544e-05, "loss": 0.8232, "step": 23580 }, { "epoch": 0.26, "learning_rate": 4.564749608763694e-05, "loss": 0.8639, "step": 23585 }, { "epoch": 0.26, "learning_rate": 4.5646573360498426e-05, "loss": 0.8186, "step": 23590 }, { "epoch": 0.26, "learning_rate": 4.564565063335991e-05, "loss": 0.7602, "step": 23595 }, { "epoch": 0.26, "learning_rate": 4.5644727906221395e-05, "loss": 0.7538, "step": 23600 }, { "epoch": 0.26, "learning_rate": 4.564380517908289e-05, "loss": 0.7761, "step": 23605 }, { "epoch": 0.26, "learning_rate": 4.564288245194438e-05, "loss": 0.8325, "step": 23610 }, { "epoch": 0.26, "learning_rate": 4.564195972480586e-05, "loss": 0.8684, "step": 23615 }, { "epoch": 0.26, "learning_rate": 4.5641036997667346e-05, "loss": 0.7359, "step": 23620 }, { "epoch": 0.26, "learning_rate": 4.5640114270528834e-05, "loss": 0.7737, "step": 23625 }, { "epoch": 0.26, "learning_rate": 4.563919154339032e-05, "loss": 0.7908, "step": 23630 }, { "epoch": 0.26, "learning_rate": 4.563826881625181e-05, "loss": 0.8095, "step": 23635 }, { "epoch": 0.26, "learning_rate": 4.56373460891133e-05, "loss": 0.8414, "step": 23640 }, { "epoch": 0.26, "learning_rate": 4.5636423361974785e-05, "loss": 0.8109, "step": 23645 }, { "epoch": 0.26, "learning_rate": 4.563550063483627e-05, "loss": 0.8064, "step": 23650 }, { "epoch": 0.26, "learning_rate": 4.563457790769776e-05, "loss": 0.7208, "step": 23655 }, { "epoch": 0.26, "learning_rate": 4.563365518055925e-05, "loss": 0.8095, "step": 23660 }, { "epoch": 0.26, "learning_rate": 4.5632732453420737e-05, "loss": 0.8119, "step": 23665 }, { "epoch": 0.26, "learning_rate": 4.5631809726282224e-05, "loss": 0.7941, "step": 23670 }, { "epoch": 0.26, "learning_rate": 4.563088699914371e-05, "loss": 0.7992, "step": 23675 }, { "epoch": 0.26, "learning_rate": 4.56299642720052e-05, "loss": 0.8111, "step": 23680 }, { "epoch": 0.26, "learning_rate": 4.562904154486669e-05, "loss": 0.7387, "step": 23685 }, { "epoch": 0.26, "learning_rate": 4.562811881772817e-05, "loss": 0.8075, "step": 23690 }, { "epoch": 0.26, "learning_rate": 4.5627196090589664e-05, "loss": 0.7729, "step": 23695 }, { "epoch": 0.26, "learning_rate": 4.562627336345115e-05, "loss": 0.7696, "step": 23700 }, { "epoch": 0.26, "learning_rate": 4.562535063631263e-05, "loss": 0.8325, "step": 23705 }, { "epoch": 0.26, "learning_rate": 4.562442790917412e-05, "loss": 0.7994, "step": 23710 }, { "epoch": 0.26, "learning_rate": 4.5623505182035615e-05, "loss": 0.812, "step": 23715 }, { "epoch": 0.26, "learning_rate": 4.56225824548971e-05, "loss": 0.8021, "step": 23720 }, { "epoch": 0.26, "learning_rate": 4.5621659727758584e-05, "loss": 0.8154, "step": 23725 }, { "epoch": 0.26, "learning_rate": 4.562073700062007e-05, "loss": 0.7816, "step": 23730 }, { "epoch": 0.26, "learning_rate": 4.5619814273481566e-05, "loss": 0.8168, "step": 23735 }, { "epoch": 0.26, "learning_rate": 4.5618891546343054e-05, "loss": 0.8549, "step": 23740 }, { "epoch": 0.26, "learning_rate": 4.5617968819204535e-05, "loss": 0.7438, "step": 23745 }, { "epoch": 0.26, "learning_rate": 4.561704609206602e-05, "loss": 0.75, "step": 23750 }, { "epoch": 0.26, "learning_rate": 4.561612336492752e-05, "loss": 0.7996, "step": 23755 }, { "epoch": 0.26, "learning_rate": 4.5615200637789e-05, "loss": 0.8286, "step": 23760 }, { "epoch": 0.26, "learning_rate": 4.561427791065049e-05, "loss": 0.8214, "step": 23765 }, { "epoch": 0.26, "learning_rate": 4.5613355183511975e-05, "loss": 0.8231, "step": 23770 }, { "epoch": 0.26, "learning_rate": 4.561243245637346e-05, "loss": 0.8441, "step": 23775 }, { "epoch": 0.26, "learning_rate": 4.561150972923495e-05, "loss": 0.7853, "step": 23780 }, { "epoch": 0.26, "learning_rate": 4.561058700209644e-05, "loss": 0.8536, "step": 23785 }, { "epoch": 0.26, "learning_rate": 4.5609664274957926e-05, "loss": 0.7889, "step": 23790 }, { "epoch": 0.26, "learning_rate": 4.5608741547819414e-05, "loss": 0.8164, "step": 23795 }, { "epoch": 0.26, "learning_rate": 4.56078188206809e-05, "loss": 0.8134, "step": 23800 }, { "epoch": 0.26, "learning_rate": 4.560689609354239e-05, "loss": 0.7604, "step": 23805 }, { "epoch": 0.26, "learning_rate": 4.560597336640388e-05, "loss": 0.7804, "step": 23810 }, { "epoch": 0.26, "learning_rate": 4.5605050639265365e-05, "loss": 0.7587, "step": 23815 }, { "epoch": 0.26, "learning_rate": 4.560412791212685e-05, "loss": 0.7948, "step": 23820 }, { "epoch": 0.26, "learning_rate": 4.560320518498834e-05, "loss": 0.78, "step": 23825 }, { "epoch": 0.26, "learning_rate": 4.560228245784983e-05, "loss": 0.7918, "step": 23830 }, { "epoch": 0.26, "learning_rate": 4.560135973071131e-05, "loss": 0.8455, "step": 23835 }, { "epoch": 0.26, "learning_rate": 4.56004370035728e-05, "loss": 0.7993, "step": 23840 }, { "epoch": 0.26, "learning_rate": 4.559951427643429e-05, "loss": 0.7662, "step": 23845 }, { "epoch": 0.26, "learning_rate": 4.559859154929578e-05, "loss": 0.7483, "step": 23850 }, { "epoch": 0.26, "learning_rate": 4.559766882215726e-05, "loss": 0.7088, "step": 23855 }, { "epoch": 0.26, "learning_rate": 4.559674609501875e-05, "loss": 0.8029, "step": 23860 }, { "epoch": 0.26, "learning_rate": 4.5595823367880244e-05, "loss": 0.7261, "step": 23865 }, { "epoch": 0.26, "learning_rate": 4.5594900640741725e-05, "loss": 0.7386, "step": 23870 }, { "epoch": 0.26, "learning_rate": 4.559397791360321e-05, "loss": 0.7552, "step": 23875 }, { "epoch": 0.26, "learning_rate": 4.55930551864647e-05, "loss": 0.7855, "step": 23880 }, { "epoch": 0.26, "learning_rate": 4.5592132459326195e-05, "loss": 0.8346, "step": 23885 }, { "epoch": 0.26, "learning_rate": 4.5591209732187676e-05, "loss": 0.7976, "step": 23890 }, { "epoch": 0.26, "learning_rate": 4.5590287005049164e-05, "loss": 0.8029, "step": 23895 }, { "epoch": 0.26, "learning_rate": 4.558936427791065e-05, "loss": 0.7622, "step": 23900 }, { "epoch": 0.26, "learning_rate": 4.558844155077214e-05, "loss": 0.791, "step": 23905 }, { "epoch": 0.26, "learning_rate": 4.558751882363363e-05, "loss": 0.8047, "step": 23910 }, { "epoch": 0.26, "learning_rate": 4.5586596096495115e-05, "loss": 0.8535, "step": 23915 }, { "epoch": 0.26, "learning_rate": 4.55856733693566e-05, "loss": 0.7491, "step": 23920 }, { "epoch": 0.26, "learning_rate": 4.558475064221809e-05, "loss": 0.7713, "step": 23925 }, { "epoch": 0.26, "learning_rate": 4.558382791507958e-05, "loss": 0.8014, "step": 23930 }, { "epoch": 0.27, "learning_rate": 4.558290518794107e-05, "loss": 0.8122, "step": 23935 }, { "epoch": 0.27, "learning_rate": 4.5581982460802555e-05, "loss": 0.7699, "step": 23940 }, { "epoch": 0.27, "learning_rate": 4.5581059733664036e-05, "loss": 0.7574, "step": 23945 }, { "epoch": 0.27, "learning_rate": 4.558013700652553e-05, "loss": 0.7923, "step": 23950 }, { "epoch": 0.27, "learning_rate": 4.557921427938702e-05, "loss": 0.8147, "step": 23955 }, { "epoch": 0.27, "learning_rate": 4.5578291552248506e-05, "loss": 0.7627, "step": 23960 }, { "epoch": 0.27, "learning_rate": 4.557736882510999e-05, "loss": 0.8178, "step": 23965 }, { "epoch": 0.27, "learning_rate": 4.557644609797148e-05, "loss": 0.7321, "step": 23970 }, { "epoch": 0.27, "learning_rate": 4.557552337083297e-05, "loss": 0.8174, "step": 23975 }, { "epoch": 0.27, "learning_rate": 4.557460064369445e-05, "loss": 0.8204, "step": 23980 }, { "epoch": 0.27, "learning_rate": 4.557367791655594e-05, "loss": 0.8693, "step": 23985 }, { "epoch": 0.27, "learning_rate": 4.5572755189417426e-05, "loss": 0.7599, "step": 23990 }, { "epoch": 0.27, "learning_rate": 4.557183246227892e-05, "loss": 0.7692, "step": 23995 }, { "epoch": 0.27, "learning_rate": 4.55709097351404e-05, "loss": 0.737, "step": 24000 }, { "epoch": 0.27, "eval_loss": 0.747295081615448, "eval_runtime": 69.3343, "eval_samples_per_second": 28.846, "eval_steps_per_second": 14.423, "step": 24000 }, { "epoch": 0.27, "learning_rate": 4.556998700800189e-05, "loss": 0.8069, "step": 24005 }, { "epoch": 0.27, "learning_rate": 4.556906428086338e-05, "loss": 0.7975, "step": 24010 }, { "epoch": 0.27, "learning_rate": 4.5568141553724866e-05, "loss": 0.8405, "step": 24015 }, { "epoch": 0.27, "learning_rate": 4.5567218826586353e-05, "loss": 0.8488, "step": 24020 }, { "epoch": 0.27, "learning_rate": 4.556629609944784e-05, "loss": 0.7995, "step": 24025 }, { "epoch": 0.27, "learning_rate": 4.556537337230933e-05, "loss": 0.8402, "step": 24030 }, { "epoch": 0.27, "learning_rate": 4.556445064517082e-05, "loss": 0.7782, "step": 24035 }, { "epoch": 0.27, "learning_rate": 4.5563527918032305e-05, "loss": 0.7517, "step": 24040 }, { "epoch": 0.27, "learning_rate": 4.556260519089379e-05, "loss": 0.8315, "step": 24045 }, { "epoch": 0.27, "learning_rate": 4.556168246375528e-05, "loss": 0.8467, "step": 24050 }, { "epoch": 0.27, "learning_rate": 4.556075973661677e-05, "loss": 0.6738, "step": 24055 }, { "epoch": 0.27, "learning_rate": 4.5559837009478256e-05, "loss": 0.7906, "step": 24060 }, { "epoch": 0.27, "learning_rate": 4.5558914282339744e-05, "loss": 0.761, "step": 24065 }, { "epoch": 0.27, "learning_rate": 4.555799155520123e-05, "loss": 0.818, "step": 24070 }, { "epoch": 0.27, "learning_rate": 4.555706882806271e-05, "loss": 0.8419, "step": 24075 }, { "epoch": 0.27, "learning_rate": 4.555614610092421e-05, "loss": 0.7673, "step": 24080 }, { "epoch": 0.27, "learning_rate": 4.5555223373785696e-05, "loss": 0.8153, "step": 24085 }, { "epoch": 0.27, "learning_rate": 4.555430064664718e-05, "loss": 0.7618, "step": 24090 }, { "epoch": 0.27, "learning_rate": 4.5553377919508664e-05, "loss": 0.8778, "step": 24095 }, { "epoch": 0.27, "learning_rate": 4.555245519237016e-05, "loss": 0.8386, "step": 24100 }, { "epoch": 0.27, "learning_rate": 4.555153246523165e-05, "loss": 0.7048, "step": 24105 }, { "epoch": 0.27, "learning_rate": 4.555060973809313e-05, "loss": 0.8225, "step": 24110 }, { "epoch": 0.27, "learning_rate": 4.5549687010954616e-05, "loss": 0.8402, "step": 24115 }, { "epoch": 0.27, "learning_rate": 4.554876428381611e-05, "loss": 0.8085, "step": 24120 }, { "epoch": 0.27, "learning_rate": 4.55478415566776e-05, "loss": 0.8084, "step": 24125 }, { "epoch": 0.27, "learning_rate": 4.554691882953908e-05, "loss": 0.8549, "step": 24130 }, { "epoch": 0.27, "learning_rate": 4.554599610240057e-05, "loss": 0.7679, "step": 24135 }, { "epoch": 0.27, "learning_rate": 4.5545073375262055e-05, "loss": 0.781, "step": 24140 }, { "epoch": 0.27, "learning_rate": 4.554415064812354e-05, "loss": 0.8217, "step": 24145 }, { "epoch": 0.27, "learning_rate": 4.554322792098503e-05, "loss": 0.8024, "step": 24150 }, { "epoch": 0.27, "learning_rate": 4.554230519384652e-05, "loss": 0.8097, "step": 24155 }, { "epoch": 0.27, "learning_rate": 4.5541382466708006e-05, "loss": 0.8382, "step": 24160 }, { "epoch": 0.27, "learning_rate": 4.5540459739569494e-05, "loss": 0.8135, "step": 24165 }, { "epoch": 0.27, "learning_rate": 4.553953701243098e-05, "loss": 0.7721, "step": 24170 }, { "epoch": 0.27, "learning_rate": 4.553861428529247e-05, "loss": 0.8064, "step": 24175 }, { "epoch": 0.27, "learning_rate": 4.553769155815396e-05, "loss": 0.7957, "step": 24180 }, { "epoch": 0.27, "learning_rate": 4.5536768831015446e-05, "loss": 0.8095, "step": 24185 }, { "epoch": 0.27, "learning_rate": 4.5535846103876934e-05, "loss": 0.7912, "step": 24190 }, { "epoch": 0.27, "learning_rate": 4.553492337673842e-05, "loss": 0.7777, "step": 24195 }, { "epoch": 0.27, "learning_rate": 4.553400064959991e-05, "loss": 0.8239, "step": 24200 }, { "epoch": 0.27, "learning_rate": 4.55330779224614e-05, "loss": 0.8306, "step": 24205 }, { "epoch": 0.27, "learning_rate": 4.5532155195322885e-05, "loss": 0.7955, "step": 24210 }, { "epoch": 0.27, "learning_rate": 4.553123246818437e-05, "loss": 0.7827, "step": 24215 }, { "epoch": 0.27, "learning_rate": 4.5530309741045854e-05, "loss": 0.7805, "step": 24220 }, { "epoch": 0.27, "learning_rate": 4.552938701390734e-05, "loss": 0.7715, "step": 24225 }, { "epoch": 0.27, "learning_rate": 4.5528464286768836e-05, "loss": 0.7662, "step": 24230 }, { "epoch": 0.27, "learning_rate": 4.5527541559630324e-05, "loss": 0.8282, "step": 24235 }, { "epoch": 0.27, "learning_rate": 4.5526618832491805e-05, "loss": 0.8181, "step": 24240 }, { "epoch": 0.27, "learning_rate": 4.552569610535329e-05, "loss": 0.8081, "step": 24245 }, { "epoch": 0.27, "learning_rate": 4.552477337821479e-05, "loss": 0.8608, "step": 24250 }, { "epoch": 0.27, "learning_rate": 4.552385065107627e-05, "loss": 0.8356, "step": 24255 }, { "epoch": 0.27, "learning_rate": 4.552292792393776e-05, "loss": 0.7325, "step": 24260 }, { "epoch": 0.27, "learning_rate": 4.5522005196799245e-05, "loss": 0.7576, "step": 24265 }, { "epoch": 0.27, "learning_rate": 4.552108246966074e-05, "loss": 0.77, "step": 24270 }, { "epoch": 0.27, "learning_rate": 4.552015974252222e-05, "loss": 0.8423, "step": 24275 }, { "epoch": 0.27, "learning_rate": 4.551923701538371e-05, "loss": 0.7464, "step": 24280 }, { "epoch": 0.27, "learning_rate": 4.5518314288245196e-05, "loss": 0.8239, "step": 24285 }, { "epoch": 0.27, "learning_rate": 4.5517391561106684e-05, "loss": 0.8117, "step": 24290 }, { "epoch": 0.27, "learning_rate": 4.551646883396817e-05, "loss": 0.7915, "step": 24295 }, { "epoch": 0.27, "learning_rate": 4.551554610682966e-05, "loss": 0.8063, "step": 24300 }, { "epoch": 0.27, "learning_rate": 4.551462337969115e-05, "loss": 0.8304, "step": 24305 }, { "epoch": 0.27, "learning_rate": 4.5513700652552635e-05, "loss": 0.8059, "step": 24310 }, { "epoch": 0.27, "learning_rate": 4.551277792541412e-05, "loss": 0.8421, "step": 24315 }, { "epoch": 0.27, "learning_rate": 4.551185519827561e-05, "loss": 0.8312, "step": 24320 }, { "epoch": 0.27, "learning_rate": 4.55109324711371e-05, "loss": 0.7909, "step": 24325 }, { "epoch": 0.27, "learning_rate": 4.551000974399858e-05, "loss": 0.7993, "step": 24330 }, { "epoch": 0.27, "learning_rate": 4.5509087016860074e-05, "loss": 0.8225, "step": 24335 }, { "epoch": 0.27, "learning_rate": 4.550816428972156e-05, "loss": 0.7759, "step": 24340 }, { "epoch": 0.27, "learning_rate": 4.550724156258305e-05, "loss": 0.7657, "step": 24345 }, { "epoch": 0.27, "learning_rate": 4.550631883544453e-05, "loss": 0.7866, "step": 24350 }, { "epoch": 0.27, "learning_rate": 4.5505396108306026e-05, "loss": 0.7637, "step": 24355 }, { "epoch": 0.27, "learning_rate": 4.5504473381167514e-05, "loss": 0.8546, "step": 24360 }, { "epoch": 0.27, "learning_rate": 4.5503550654028995e-05, "loss": 0.7918, "step": 24365 }, { "epoch": 0.27, "learning_rate": 4.550262792689048e-05, "loss": 0.778, "step": 24370 }, { "epoch": 0.27, "learning_rate": 4.550170519975197e-05, "loss": 0.878, "step": 24375 }, { "epoch": 0.27, "learning_rate": 4.5500782472613465e-05, "loss": 0.7959, "step": 24380 }, { "epoch": 0.27, "learning_rate": 4.5499859745474946e-05, "loss": 0.7985, "step": 24385 }, { "epoch": 0.27, "learning_rate": 4.5498937018336434e-05, "loss": 0.7621, "step": 24390 }, { "epoch": 0.27, "learning_rate": 4.549801429119792e-05, "loss": 0.7685, "step": 24395 }, { "epoch": 0.27, "learning_rate": 4.5497091564059416e-05, "loss": 0.8702, "step": 24400 }, { "epoch": 0.27, "learning_rate": 4.54961688369209e-05, "loss": 0.8243, "step": 24405 }, { "epoch": 0.27, "learning_rate": 4.5495246109782385e-05, "loss": 0.8034, "step": 24410 }, { "epoch": 0.27, "learning_rate": 4.549432338264387e-05, "loss": 0.8191, "step": 24415 }, { "epoch": 0.27, "learning_rate": 4.549340065550536e-05, "loss": 0.7663, "step": 24420 }, { "epoch": 0.27, "learning_rate": 4.549247792836685e-05, "loss": 0.8361, "step": 24425 }, { "epoch": 0.27, "learning_rate": 4.549155520122834e-05, "loss": 0.8276, "step": 24430 }, { "epoch": 0.27, "learning_rate": 4.5490632474089825e-05, "loss": 0.7968, "step": 24435 }, { "epoch": 0.27, "learning_rate": 4.548970974695131e-05, "loss": 0.7472, "step": 24440 }, { "epoch": 0.27, "learning_rate": 4.54887870198128e-05, "loss": 0.7564, "step": 24445 }, { "epoch": 0.27, "learning_rate": 4.548786429267429e-05, "loss": 0.7514, "step": 24450 }, { "epoch": 0.27, "learning_rate": 4.5486941565535776e-05, "loss": 0.8366, "step": 24455 }, { "epoch": 0.27, "learning_rate": 4.548601883839726e-05, "loss": 0.7657, "step": 24460 }, { "epoch": 0.27, "learning_rate": 4.548509611125875e-05, "loss": 0.7375, "step": 24465 }, { "epoch": 0.27, "learning_rate": 4.548417338412024e-05, "loss": 0.7905, "step": 24470 }, { "epoch": 0.27, "learning_rate": 4.548325065698173e-05, "loss": 0.8194, "step": 24475 }, { "epoch": 0.27, "learning_rate": 4.548232792984321e-05, "loss": 0.7643, "step": 24480 }, { "epoch": 0.27, "learning_rate": 4.54814052027047e-05, "loss": 0.7814, "step": 24485 }, { "epoch": 0.27, "learning_rate": 4.548048247556619e-05, "loss": 0.8224, "step": 24490 }, { "epoch": 0.27, "learning_rate": 4.547955974842767e-05, "loss": 0.7858, "step": 24495 }, { "epoch": 0.27, "learning_rate": 4.547863702128916e-05, "loss": 0.8042, "step": 24500 }, { "epoch": 0.27, "learning_rate": 4.5477714294150654e-05, "loss": 0.8382, "step": 24505 }, { "epoch": 0.27, "learning_rate": 4.547679156701214e-05, "loss": 0.7763, "step": 24510 }, { "epoch": 0.27, "learning_rate": 4.5475868839873623e-05, "loss": 0.766, "step": 24515 }, { "epoch": 0.27, "learning_rate": 4.547494611273511e-05, "loss": 0.8111, "step": 24520 }, { "epoch": 0.27, "learning_rate": 4.54740233855966e-05, "loss": 0.7771, "step": 24525 }, { "epoch": 0.27, "learning_rate": 4.547310065845809e-05, "loss": 0.7372, "step": 24530 }, { "epoch": 0.27, "learning_rate": 4.5472177931319575e-05, "loss": 0.7898, "step": 24535 }, { "epoch": 0.27, "learning_rate": 4.547125520418106e-05, "loss": 0.8104, "step": 24540 }, { "epoch": 0.27, "learning_rate": 4.547033247704255e-05, "loss": 0.8273, "step": 24545 }, { "epoch": 0.27, "learning_rate": 4.546940974990404e-05, "loss": 0.7562, "step": 24550 }, { "epoch": 0.27, "learning_rate": 4.5468487022765526e-05, "loss": 0.7633, "step": 24555 }, { "epoch": 0.27, "learning_rate": 4.5467564295627014e-05, "loss": 0.8197, "step": 24560 }, { "epoch": 0.27, "learning_rate": 4.54666415684885e-05, "loss": 0.708, "step": 24565 }, { "epoch": 0.27, "learning_rate": 4.546571884134999e-05, "loss": 0.795, "step": 24570 }, { "epoch": 0.27, "learning_rate": 4.546479611421148e-05, "loss": 0.7191, "step": 24575 }, { "epoch": 0.27, "learning_rate": 4.5463873387072965e-05, "loss": 0.7321, "step": 24580 }, { "epoch": 0.27, "learning_rate": 4.546295065993445e-05, "loss": 0.8507, "step": 24585 }, { "epoch": 0.27, "learning_rate": 4.546202793279594e-05, "loss": 0.8055, "step": 24590 }, { "epoch": 0.27, "learning_rate": 4.546110520565743e-05, "loss": 0.7215, "step": 24595 }, { "epoch": 0.27, "learning_rate": 4.546018247851892e-05, "loss": 0.8678, "step": 24600 }, { "epoch": 0.27, "learning_rate": 4.54592597513804e-05, "loss": 0.7989, "step": 24605 }, { "epoch": 0.27, "learning_rate": 4.5458337024241886e-05, "loss": 0.7813, "step": 24610 }, { "epoch": 0.27, "learning_rate": 4.545741429710338e-05, "loss": 0.8183, "step": 24615 }, { "epoch": 0.27, "learning_rate": 4.545649156996487e-05, "loss": 0.8675, "step": 24620 }, { "epoch": 0.27, "learning_rate": 4.545556884282635e-05, "loss": 0.7883, "step": 24625 }, { "epoch": 0.27, "learning_rate": 4.545464611568784e-05, "loss": 0.7946, "step": 24630 }, { "epoch": 0.27, "learning_rate": 4.545372338854933e-05, "loss": 0.7749, "step": 24635 }, { "epoch": 0.27, "learning_rate": 4.545280066141081e-05, "loss": 0.7672, "step": 24640 }, { "epoch": 0.27, "learning_rate": 4.54518779342723e-05, "loss": 0.7871, "step": 24645 }, { "epoch": 0.27, "learning_rate": 4.545095520713379e-05, "loss": 0.8326, "step": 24650 }, { "epoch": 0.27, "learning_rate": 4.545003247999528e-05, "loss": 0.764, "step": 24655 }, { "epoch": 0.27, "learning_rate": 4.5449109752856764e-05, "loss": 0.7536, "step": 24660 }, { "epoch": 0.27, "learning_rate": 4.544818702571825e-05, "loss": 0.743, "step": 24665 }, { "epoch": 0.27, "learning_rate": 4.544726429857974e-05, "loss": 0.8597, "step": 24670 }, { "epoch": 0.27, "learning_rate": 4.544634157144123e-05, "loss": 0.8216, "step": 24675 }, { "epoch": 0.27, "learning_rate": 4.5445418844302716e-05, "loss": 0.7681, "step": 24680 }, { "epoch": 0.27, "learning_rate": 4.5444496117164203e-05, "loss": 0.7743, "step": 24685 }, { "epoch": 0.27, "learning_rate": 4.544357339002569e-05, "loss": 0.7329, "step": 24690 }, { "epoch": 0.27, "learning_rate": 4.544265066288718e-05, "loss": 0.7603, "step": 24695 }, { "epoch": 0.27, "learning_rate": 4.544172793574867e-05, "loss": 0.7214, "step": 24700 }, { "epoch": 0.27, "learning_rate": 4.5440805208610155e-05, "loss": 0.7953, "step": 24705 }, { "epoch": 0.27, "learning_rate": 4.543988248147164e-05, "loss": 0.7797, "step": 24710 }, { "epoch": 0.27, "learning_rate": 4.5438959754333124e-05, "loss": 0.7065, "step": 24715 }, { "epoch": 0.27, "learning_rate": 4.543803702719462e-05, "loss": 0.7282, "step": 24720 }, { "epoch": 0.27, "learning_rate": 4.5437114300056106e-05, "loss": 0.7604, "step": 24725 }, { "epoch": 0.27, "learning_rate": 4.5436191572917594e-05, "loss": 0.7677, "step": 24730 }, { "epoch": 0.27, "learning_rate": 4.5435268845779075e-05, "loss": 0.7772, "step": 24735 }, { "epoch": 0.27, "learning_rate": 4.543434611864057e-05, "loss": 0.7631, "step": 24740 }, { "epoch": 0.27, "learning_rate": 4.543342339150206e-05, "loss": 0.7831, "step": 24745 }, { "epoch": 0.27, "learning_rate": 4.543250066436354e-05, "loss": 0.7759, "step": 24750 }, { "epoch": 0.27, "learning_rate": 4.5431577937225027e-05, "loss": 0.8717, "step": 24755 }, { "epoch": 0.27, "learning_rate": 4.5430655210086514e-05, "loss": 0.7696, "step": 24760 }, { "epoch": 0.27, "learning_rate": 4.542973248294801e-05, "loss": 0.858, "step": 24765 }, { "epoch": 0.27, "learning_rate": 4.542880975580949e-05, "loss": 0.7872, "step": 24770 }, { "epoch": 0.27, "learning_rate": 4.542788702867098e-05, "loss": 0.7775, "step": 24775 }, { "epoch": 0.27, "learning_rate": 4.5426964301532466e-05, "loss": 0.8311, "step": 24780 }, { "epoch": 0.27, "learning_rate": 4.542604157439396e-05, "loss": 0.8499, "step": 24785 }, { "epoch": 0.27, "learning_rate": 4.542511884725544e-05, "loss": 0.8638, "step": 24790 }, { "epoch": 0.27, "learning_rate": 4.542419612011693e-05, "loss": 0.7715, "step": 24795 }, { "epoch": 0.27, "learning_rate": 4.542327339297842e-05, "loss": 0.7422, "step": 24800 }, { "epoch": 0.27, "learning_rate": 4.5422350665839905e-05, "loss": 0.754, "step": 24805 }, { "epoch": 0.27, "learning_rate": 4.542142793870139e-05, "loss": 0.8228, "step": 24810 }, { "epoch": 0.27, "learning_rate": 4.542050521156288e-05, "loss": 0.7661, "step": 24815 }, { "epoch": 0.27, "learning_rate": 4.541958248442437e-05, "loss": 0.8206, "step": 24820 }, { "epoch": 0.27, "learning_rate": 4.541865975728585e-05, "loss": 0.7589, "step": 24825 }, { "epoch": 0.27, "learning_rate": 4.5417737030147344e-05, "loss": 0.7725, "step": 24830 }, { "epoch": 0.27, "learning_rate": 4.541681430300883e-05, "loss": 0.7888, "step": 24835 }, { "epoch": 0.28, "learning_rate": 4.541589157587032e-05, "loss": 0.8104, "step": 24840 }, { "epoch": 0.28, "learning_rate": 4.54149688487318e-05, "loss": 0.7865, "step": 24845 }, { "epoch": 0.28, "learning_rate": 4.5414046121593296e-05, "loss": 0.8086, "step": 24850 }, { "epoch": 0.28, "learning_rate": 4.5413123394454784e-05, "loss": 0.7698, "step": 24855 }, { "epoch": 0.28, "learning_rate": 4.541220066731627e-05, "loss": 0.7218, "step": 24860 }, { "epoch": 0.28, "learning_rate": 4.541127794017775e-05, "loss": 0.8091, "step": 24865 }, { "epoch": 0.28, "learning_rate": 4.541035521303925e-05, "loss": 0.7648, "step": 24870 }, { "epoch": 0.28, "learning_rate": 4.5409432485900735e-05, "loss": 0.7912, "step": 24875 }, { "epoch": 0.28, "learning_rate": 4.5408509758762216e-05, "loss": 0.7441, "step": 24880 }, { "epoch": 0.28, "learning_rate": 4.5407587031623704e-05, "loss": 0.7534, "step": 24885 }, { "epoch": 0.28, "learning_rate": 4.54066643044852e-05, "loss": 0.8144, "step": 24890 }, { "epoch": 0.28, "learning_rate": 4.5405741577346686e-05, "loss": 0.7897, "step": 24895 }, { "epoch": 0.28, "learning_rate": 4.540481885020817e-05, "loss": 0.8094, "step": 24900 }, { "epoch": 0.28, "learning_rate": 4.5403896123069655e-05, "loss": 0.7751, "step": 24905 }, { "epoch": 0.28, "learning_rate": 4.540297339593114e-05, "loss": 0.7926, "step": 24910 }, { "epoch": 0.28, "learning_rate": 4.540205066879263e-05, "loss": 0.8345, "step": 24915 }, { "epoch": 0.28, "learning_rate": 4.540112794165412e-05, "loss": 0.7524, "step": 24920 }, { "epoch": 0.28, "learning_rate": 4.540020521451561e-05, "loss": 0.7633, "step": 24925 }, { "epoch": 0.28, "learning_rate": 4.5399282487377095e-05, "loss": 0.828, "step": 24930 }, { "epoch": 0.28, "learning_rate": 4.539835976023858e-05, "loss": 0.7263, "step": 24935 }, { "epoch": 0.28, "learning_rate": 4.539743703310007e-05, "loss": 0.7713, "step": 24940 }, { "epoch": 0.28, "learning_rate": 4.539651430596156e-05, "loss": 0.7516, "step": 24945 }, { "epoch": 0.28, "learning_rate": 4.5395591578823046e-05, "loss": 0.83, "step": 24950 }, { "epoch": 0.28, "learning_rate": 4.5394668851684534e-05, "loss": 0.8154, "step": 24955 }, { "epoch": 0.28, "learning_rate": 4.539374612454602e-05, "loss": 0.7933, "step": 24960 }, { "epoch": 0.28, "learning_rate": 4.539282339740751e-05, "loss": 0.73, "step": 24965 }, { "epoch": 0.28, "learning_rate": 4.5391900670269e-05, "loss": 0.8198, "step": 24970 }, { "epoch": 0.28, "learning_rate": 4.539097794313048e-05, "loss": 0.7502, "step": 24975 }, { "epoch": 0.28, "learning_rate": 4.539005521599197e-05, "loss": 0.797, "step": 24980 }, { "epoch": 0.28, "learning_rate": 4.538913248885346e-05, "loss": 0.7974, "step": 24985 }, { "epoch": 0.28, "learning_rate": 4.538820976171494e-05, "loss": 0.7814, "step": 24990 }, { "epoch": 0.28, "learning_rate": 4.538728703457643e-05, "loss": 0.7952, "step": 24995 }, { "epoch": 0.28, "learning_rate": 4.5386364307437924e-05, "loss": 0.8451, "step": 25000 }, { "epoch": 0.28, "eval_loss": 0.7460049986839294, "eval_runtime": 69.2649, "eval_samples_per_second": 28.875, "eval_steps_per_second": 14.437, "step": 25000 }, { "epoch": 0.28, "learning_rate": 4.538544158029941e-05, "loss": 0.6974, "step": 25005 }, { "epoch": 0.28, "learning_rate": 4.538451885316089e-05, "loss": 0.7967, "step": 25010 }, { "epoch": 0.28, "learning_rate": 4.538359612602238e-05, "loss": 0.7232, "step": 25015 }, { "epoch": 0.28, "learning_rate": 4.5382673398883876e-05, "loss": 0.8284, "step": 25020 }, { "epoch": 0.28, "learning_rate": 4.538175067174536e-05, "loss": 0.8222, "step": 25025 }, { "epoch": 0.28, "learning_rate": 4.5380827944606845e-05, "loss": 0.7843, "step": 25030 }, { "epoch": 0.28, "learning_rate": 4.537990521746833e-05, "loss": 0.748, "step": 25035 }, { "epoch": 0.28, "learning_rate": 4.537898249032983e-05, "loss": 0.7714, "step": 25040 }, { "epoch": 0.28, "learning_rate": 4.537805976319131e-05, "loss": 0.8076, "step": 25045 }, { "epoch": 0.28, "learning_rate": 4.5377137036052796e-05, "loss": 0.7597, "step": 25050 }, { "epoch": 0.28, "learning_rate": 4.5376214308914284e-05, "loss": 0.7824, "step": 25055 }, { "epoch": 0.28, "learning_rate": 4.537529158177577e-05, "loss": 0.7442, "step": 25060 }, { "epoch": 0.28, "learning_rate": 4.537436885463726e-05, "loss": 0.7704, "step": 25065 }, { "epoch": 0.28, "learning_rate": 4.537344612749875e-05, "loss": 0.7996, "step": 25070 }, { "epoch": 0.28, "learning_rate": 4.5372523400360235e-05, "loss": 0.7479, "step": 25075 }, { "epoch": 0.28, "learning_rate": 4.537160067322172e-05, "loss": 0.7793, "step": 25080 }, { "epoch": 0.28, "learning_rate": 4.537067794608321e-05, "loss": 0.7818, "step": 25085 }, { "epoch": 0.28, "learning_rate": 4.53697552189447e-05, "loss": 0.8253, "step": 25090 }, { "epoch": 0.28, "learning_rate": 4.536883249180619e-05, "loss": 0.766, "step": 25095 }, { "epoch": 0.28, "learning_rate": 4.536790976466767e-05, "loss": 0.8615, "step": 25100 }, { "epoch": 0.28, "learning_rate": 4.536698703752916e-05, "loss": 0.764, "step": 25105 }, { "epoch": 0.28, "learning_rate": 4.536606431039065e-05, "loss": 0.8488, "step": 25110 }, { "epoch": 0.28, "learning_rate": 4.536514158325214e-05, "loss": 0.8394, "step": 25115 }, { "epoch": 0.28, "learning_rate": 4.536421885611362e-05, "loss": 0.8059, "step": 25120 }, { "epoch": 0.28, "learning_rate": 4.536329612897511e-05, "loss": 0.7289, "step": 25125 }, { "epoch": 0.28, "learning_rate": 4.53623734018366e-05, "loss": 0.7711, "step": 25130 }, { "epoch": 0.28, "learning_rate": 4.536145067469808e-05, "loss": 0.783, "step": 25135 }, { "epoch": 0.28, "learning_rate": 4.536052794755957e-05, "loss": 0.7465, "step": 25140 }, { "epoch": 0.28, "learning_rate": 4.535960522042106e-05, "loss": 0.7973, "step": 25145 }, { "epoch": 0.28, "learning_rate": 4.535868249328255e-05, "loss": 0.8106, "step": 25150 }, { "epoch": 0.28, "learning_rate": 4.5357759766144034e-05, "loss": 0.7728, "step": 25155 }, { "epoch": 0.28, "learning_rate": 4.535683703900552e-05, "loss": 0.8119, "step": 25160 }, { "epoch": 0.28, "learning_rate": 4.535591431186701e-05, "loss": 0.7594, "step": 25165 }, { "epoch": 0.28, "learning_rate": 4.5354991584728504e-05, "loss": 0.8151, "step": 25170 }, { "epoch": 0.28, "learning_rate": 4.5354068857589986e-05, "loss": 0.7907, "step": 25175 }, { "epoch": 0.28, "learning_rate": 4.5353146130451473e-05, "loss": 0.8173, "step": 25180 }, { "epoch": 0.28, "learning_rate": 4.535222340331296e-05, "loss": 0.8279, "step": 25185 }, { "epoch": 0.28, "learning_rate": 4.535130067617445e-05, "loss": 0.7935, "step": 25190 }, { "epoch": 0.28, "learning_rate": 4.535037794903594e-05, "loss": 0.8019, "step": 25195 }, { "epoch": 0.28, "learning_rate": 4.5349455221897425e-05, "loss": 0.8773, "step": 25200 }, { "epoch": 0.28, "learning_rate": 4.534853249475891e-05, "loss": 0.8096, "step": 25205 }, { "epoch": 0.28, "learning_rate": 4.5347609767620394e-05, "loss": 0.8069, "step": 25210 }, { "epoch": 0.28, "learning_rate": 4.534668704048189e-05, "loss": 0.8181, "step": 25215 }, { "epoch": 0.28, "learning_rate": 4.5345764313343376e-05, "loss": 0.7988, "step": 25220 }, { "epoch": 0.28, "learning_rate": 4.5344841586204864e-05, "loss": 0.7785, "step": 25225 }, { "epoch": 0.28, "learning_rate": 4.5343918859066345e-05, "loss": 0.8854, "step": 25230 }, { "epoch": 0.28, "learning_rate": 4.534299613192784e-05, "loss": 0.7776, "step": 25235 }, { "epoch": 0.28, "learning_rate": 4.534207340478933e-05, "loss": 0.792, "step": 25240 }, { "epoch": 0.28, "learning_rate": 4.5341150677650815e-05, "loss": 0.8089, "step": 25245 }, { "epoch": 0.28, "learning_rate": 4.5340227950512297e-05, "loss": 0.8536, "step": 25250 }, { "epoch": 0.28, "learning_rate": 4.533930522337379e-05, "loss": 0.8022, "step": 25255 }, { "epoch": 0.28, "learning_rate": 4.533838249623528e-05, "loss": 0.797, "step": 25260 }, { "epoch": 0.28, "learning_rate": 4.533745976909676e-05, "loss": 0.8245, "step": 25265 }, { "epoch": 0.28, "learning_rate": 4.533653704195825e-05, "loss": 0.7995, "step": 25270 }, { "epoch": 0.28, "learning_rate": 4.533561431481974e-05, "loss": 0.9075, "step": 25275 }, { "epoch": 0.28, "learning_rate": 4.533469158768123e-05, "loss": 0.743, "step": 25280 }, { "epoch": 0.28, "learning_rate": 4.533376886054271e-05, "loss": 0.8229, "step": 25285 }, { "epoch": 0.28, "learning_rate": 4.53328461334042e-05, "loss": 0.8223, "step": 25290 }, { "epoch": 0.28, "learning_rate": 4.533192340626569e-05, "loss": 0.7657, "step": 25295 }, { "epoch": 0.28, "learning_rate": 4.5331000679127175e-05, "loss": 0.7954, "step": 25300 }, { "epoch": 0.28, "learning_rate": 4.533007795198866e-05, "loss": 0.8185, "step": 25305 }, { "epoch": 0.28, "learning_rate": 4.532915522485015e-05, "loss": 0.7954, "step": 25310 }, { "epoch": 0.28, "learning_rate": 4.532823249771164e-05, "loss": 0.8314, "step": 25315 }, { "epoch": 0.28, "learning_rate": 4.5327309770573126e-05, "loss": 0.7797, "step": 25320 }, { "epoch": 0.28, "learning_rate": 4.5326387043434614e-05, "loss": 0.7723, "step": 25325 }, { "epoch": 0.28, "learning_rate": 4.53254643162961e-05, "loss": 0.8312, "step": 25330 }, { "epoch": 0.28, "learning_rate": 4.532454158915759e-05, "loss": 0.7466, "step": 25335 }, { "epoch": 0.28, "learning_rate": 4.532361886201908e-05, "loss": 0.7743, "step": 25340 }, { "epoch": 0.28, "learning_rate": 4.5322696134880566e-05, "loss": 0.7775, "step": 25345 }, { "epoch": 0.28, "learning_rate": 4.5321773407742053e-05, "loss": 0.7443, "step": 25350 }, { "epoch": 0.28, "learning_rate": 4.532085068060354e-05, "loss": 0.7687, "step": 25355 }, { "epoch": 0.28, "learning_rate": 4.531992795346502e-05, "loss": 0.7882, "step": 25360 }, { "epoch": 0.28, "learning_rate": 4.531900522632652e-05, "loss": 0.851, "step": 25365 }, { "epoch": 0.28, "learning_rate": 4.5318082499188005e-05, "loss": 0.7829, "step": 25370 }, { "epoch": 0.28, "learning_rate": 4.5317159772049486e-05, "loss": 0.7509, "step": 25375 }, { "epoch": 0.28, "learning_rate": 4.5316237044910974e-05, "loss": 0.754, "step": 25380 }, { "epoch": 0.28, "learning_rate": 4.531531431777247e-05, "loss": 0.8052, "step": 25385 }, { "epoch": 0.28, "learning_rate": 4.5314391590633956e-05, "loss": 0.7219, "step": 25390 }, { "epoch": 0.28, "learning_rate": 4.531346886349544e-05, "loss": 0.7918, "step": 25395 }, { "epoch": 0.28, "learning_rate": 4.5312546136356925e-05, "loss": 0.8137, "step": 25400 }, { "epoch": 0.28, "learning_rate": 4.531162340921842e-05, "loss": 0.7485, "step": 25405 }, { "epoch": 0.28, "learning_rate": 4.53107006820799e-05, "loss": 0.7422, "step": 25410 }, { "epoch": 0.28, "learning_rate": 4.530977795494139e-05, "loss": 0.7915, "step": 25415 }, { "epoch": 0.28, "learning_rate": 4.5308855227802877e-05, "loss": 0.7716, "step": 25420 }, { "epoch": 0.28, "learning_rate": 4.530793250066437e-05, "loss": 0.7903, "step": 25425 }, { "epoch": 0.28, "learning_rate": 4.530700977352585e-05, "loss": 0.7825, "step": 25430 }, { "epoch": 0.28, "learning_rate": 4.530608704638734e-05, "loss": 0.8392, "step": 25435 }, { "epoch": 0.28, "learning_rate": 4.530516431924883e-05, "loss": 0.7701, "step": 25440 }, { "epoch": 0.28, "learning_rate": 4.5304241592110316e-05, "loss": 0.7795, "step": 25445 }, { "epoch": 0.28, "learning_rate": 4.5303318864971804e-05, "loss": 0.7546, "step": 25450 }, { "epoch": 0.28, "learning_rate": 4.530239613783329e-05, "loss": 0.7799, "step": 25455 }, { "epoch": 0.28, "learning_rate": 4.530147341069478e-05, "loss": 0.7277, "step": 25460 }, { "epoch": 0.28, "learning_rate": 4.530055068355627e-05, "loss": 0.7742, "step": 25465 }, { "epoch": 0.28, "learning_rate": 4.5299627956417755e-05, "loss": 0.7703, "step": 25470 }, { "epoch": 0.28, "learning_rate": 4.529870522927924e-05, "loss": 0.7743, "step": 25475 }, { "epoch": 0.28, "learning_rate": 4.529778250214073e-05, "loss": 0.816, "step": 25480 }, { "epoch": 0.28, "learning_rate": 4.529685977500221e-05, "loss": 0.7861, "step": 25485 }, { "epoch": 0.28, "learning_rate": 4.5295937047863706e-05, "loss": 0.8022, "step": 25490 }, { "epoch": 0.28, "learning_rate": 4.5295014320725194e-05, "loss": 0.8074, "step": 25495 }, { "epoch": 0.28, "learning_rate": 4.529409159358668e-05, "loss": 0.8492, "step": 25500 }, { "epoch": 0.28, "learning_rate": 4.529316886644816e-05, "loss": 0.813, "step": 25505 }, { "epoch": 0.28, "learning_rate": 4.529224613930965e-05, "loss": 0.833, "step": 25510 }, { "epoch": 0.28, "learning_rate": 4.5291323412171146e-05, "loss": 0.8095, "step": 25515 }, { "epoch": 0.28, "learning_rate": 4.529040068503263e-05, "loss": 0.7979, "step": 25520 }, { "epoch": 0.28, "learning_rate": 4.5289477957894115e-05, "loss": 0.7711, "step": 25525 }, { "epoch": 0.28, "learning_rate": 4.52885552307556e-05, "loss": 0.7561, "step": 25530 }, { "epoch": 0.28, "learning_rate": 4.52876325036171e-05, "loss": 0.8769, "step": 25535 }, { "epoch": 0.28, "learning_rate": 4.528670977647858e-05, "loss": 0.8189, "step": 25540 }, { "epoch": 0.28, "learning_rate": 4.5285787049340066e-05, "loss": 0.7869, "step": 25545 }, { "epoch": 0.28, "learning_rate": 4.5284864322201554e-05, "loss": 0.8297, "step": 25550 }, { "epoch": 0.28, "learning_rate": 4.528394159506305e-05, "loss": 0.8256, "step": 25555 }, { "epoch": 0.28, "learning_rate": 4.528301886792453e-05, "loss": 0.779, "step": 25560 }, { "epoch": 0.28, "learning_rate": 4.528209614078602e-05, "loss": 0.8481, "step": 25565 }, { "epoch": 0.28, "learning_rate": 4.5281173413647505e-05, "loss": 0.8225, "step": 25570 }, { "epoch": 0.28, "learning_rate": 4.528025068650899e-05, "loss": 0.7628, "step": 25575 }, { "epoch": 0.28, "learning_rate": 4.527932795937048e-05, "loss": 0.7435, "step": 25580 }, { "epoch": 0.28, "learning_rate": 4.527840523223197e-05, "loss": 0.766, "step": 25585 }, { "epoch": 0.28, "learning_rate": 4.527748250509346e-05, "loss": 0.7667, "step": 25590 }, { "epoch": 0.28, "learning_rate": 4.527655977795494e-05, "loss": 0.7949, "step": 25595 }, { "epoch": 0.28, "learning_rate": 4.527563705081643e-05, "loss": 0.7677, "step": 25600 }, { "epoch": 0.28, "learning_rate": 4.527471432367792e-05, "loss": 0.8017, "step": 25605 }, { "epoch": 0.28, "learning_rate": 4.527379159653941e-05, "loss": 0.8248, "step": 25610 }, { "epoch": 0.28, "learning_rate": 4.527286886940089e-05, "loss": 0.806, "step": 25615 }, { "epoch": 0.28, "learning_rate": 4.5271946142262384e-05, "loss": 0.8242, "step": 25620 }, { "epoch": 0.28, "learning_rate": 4.527102341512387e-05, "loss": 0.8462, "step": 25625 }, { "epoch": 0.28, "learning_rate": 4.527010068798536e-05, "loss": 0.7855, "step": 25630 }, { "epoch": 0.28, "learning_rate": 4.526917796084684e-05, "loss": 0.818, "step": 25635 }, { "epoch": 0.28, "learning_rate": 4.5268255233708335e-05, "loss": 0.7771, "step": 25640 }, { "epoch": 0.28, "learning_rate": 4.526733250656982e-05, "loss": 0.7795, "step": 25645 }, { "epoch": 0.28, "learning_rate": 4.5266409779431304e-05, "loss": 0.7451, "step": 25650 }, { "epoch": 0.28, "learning_rate": 4.526548705229279e-05, "loss": 0.8316, "step": 25655 }, { "epoch": 0.28, "learning_rate": 4.526456432515428e-05, "loss": 0.778, "step": 25660 }, { "epoch": 0.28, "learning_rate": 4.5263641598015774e-05, "loss": 0.7301, "step": 25665 }, { "epoch": 0.28, "learning_rate": 4.5262718870877255e-05, "loss": 0.8174, "step": 25670 }, { "epoch": 0.28, "learning_rate": 4.526179614373874e-05, "loss": 0.7907, "step": 25675 }, { "epoch": 0.28, "learning_rate": 4.526087341660023e-05, "loss": 0.7906, "step": 25680 }, { "epoch": 0.28, "learning_rate": 4.525995068946172e-05, "loss": 0.7957, "step": 25685 }, { "epoch": 0.28, "learning_rate": 4.525902796232321e-05, "loss": 0.7553, "step": 25690 }, { "epoch": 0.28, "learning_rate": 4.5258105235184695e-05, "loss": 0.8506, "step": 25695 }, { "epoch": 0.28, "learning_rate": 4.525718250804618e-05, "loss": 0.7779, "step": 25700 }, { "epoch": 0.28, "learning_rate": 4.525625978090767e-05, "loss": 0.7618, "step": 25705 }, { "epoch": 0.28, "learning_rate": 4.525533705376916e-05, "loss": 0.8007, "step": 25710 }, { "epoch": 0.28, "learning_rate": 4.5254414326630646e-05, "loss": 0.7388, "step": 25715 }, { "epoch": 0.28, "learning_rate": 4.5253491599492134e-05, "loss": 0.787, "step": 25720 }, { "epoch": 0.28, "learning_rate": 4.525256887235362e-05, "loss": 0.7646, "step": 25725 }, { "epoch": 0.28, "learning_rate": 4.525164614521511e-05, "loss": 0.8279, "step": 25730 }, { "epoch": 0.28, "learning_rate": 4.52507234180766e-05, "loss": 0.7705, "step": 25735 }, { "epoch": 0.29, "learning_rate": 4.5249800690938085e-05, "loss": 0.7761, "step": 25740 }, { "epoch": 0.29, "learning_rate": 4.5248877963799566e-05, "loss": 0.8244, "step": 25745 }, { "epoch": 0.29, "learning_rate": 4.524795523666106e-05, "loss": 0.769, "step": 25750 }, { "epoch": 0.29, "learning_rate": 4.524703250952255e-05, "loss": 0.7765, "step": 25755 }, { "epoch": 0.29, "learning_rate": 4.524610978238403e-05, "loss": 0.8082, "step": 25760 }, { "epoch": 0.29, "learning_rate": 4.524518705524552e-05, "loss": 0.7882, "step": 25765 }, { "epoch": 0.29, "learning_rate": 4.524426432810701e-05, "loss": 0.7859, "step": 25770 }, { "epoch": 0.29, "learning_rate": 4.52433416009685e-05, "loss": 0.747, "step": 25775 }, { "epoch": 0.29, "learning_rate": 4.524241887382998e-05, "loss": 0.749, "step": 25780 }, { "epoch": 0.29, "learning_rate": 4.524149614669147e-05, "loss": 0.7493, "step": 25785 }, { "epoch": 0.29, "learning_rate": 4.5240573419552964e-05, "loss": 0.8399, "step": 25790 }, { "epoch": 0.29, "learning_rate": 4.5239650692414445e-05, "loss": 0.7971, "step": 25795 }, { "epoch": 0.29, "learning_rate": 4.523872796527593e-05, "loss": 0.7569, "step": 25800 }, { "epoch": 0.29, "learning_rate": 4.523780523813742e-05, "loss": 0.7646, "step": 25805 }, { "epoch": 0.29, "learning_rate": 4.523688251099891e-05, "loss": 0.771, "step": 25810 }, { "epoch": 0.29, "learning_rate": 4.5235959783860396e-05, "loss": 0.7739, "step": 25815 }, { "epoch": 0.29, "learning_rate": 4.5235037056721884e-05, "loss": 0.7758, "step": 25820 }, { "epoch": 0.29, "learning_rate": 4.523411432958337e-05, "loss": 0.7581, "step": 25825 }, { "epoch": 0.29, "learning_rate": 4.523319160244486e-05, "loss": 0.7973, "step": 25830 }, { "epoch": 0.29, "learning_rate": 4.523226887530635e-05, "loss": 0.808, "step": 25835 }, { "epoch": 0.29, "learning_rate": 4.5231346148167836e-05, "loss": 0.7636, "step": 25840 }, { "epoch": 0.29, "learning_rate": 4.5230423421029323e-05, "loss": 0.8082, "step": 25845 }, { "epoch": 0.29, "learning_rate": 4.522950069389081e-05, "loss": 0.8727, "step": 25850 }, { "epoch": 0.29, "learning_rate": 4.52285779667523e-05, "loss": 0.7449, "step": 25855 }, { "epoch": 0.29, "learning_rate": 4.522765523961379e-05, "loss": 0.8243, "step": 25860 }, { "epoch": 0.29, "learning_rate": 4.5226732512475275e-05, "loss": 0.7984, "step": 25865 }, { "epoch": 0.29, "learning_rate": 4.5225809785336756e-05, "loss": 0.769, "step": 25870 }, { "epoch": 0.29, "learning_rate": 4.522488705819825e-05, "loss": 0.6983, "step": 25875 }, { "epoch": 0.29, "learning_rate": 4.522396433105974e-05, "loss": 0.8465, "step": 25880 }, { "epoch": 0.29, "learning_rate": 4.5223041603921226e-05, "loss": 0.7783, "step": 25885 }, { "epoch": 0.29, "learning_rate": 4.522211887678271e-05, "loss": 0.8192, "step": 25890 }, { "epoch": 0.29, "learning_rate": 4.5221196149644195e-05, "loss": 0.8466, "step": 25895 }, { "epoch": 0.29, "learning_rate": 4.522027342250569e-05, "loss": 0.7692, "step": 25900 }, { "epoch": 0.29, "learning_rate": 4.521935069536717e-05, "loss": 0.8269, "step": 25905 }, { "epoch": 0.29, "learning_rate": 4.521842796822866e-05, "loss": 0.8214, "step": 25910 }, { "epoch": 0.29, "learning_rate": 4.5217505241090147e-05, "loss": 0.7916, "step": 25915 }, { "epoch": 0.29, "learning_rate": 4.521658251395164e-05, "loss": 0.8159, "step": 25920 }, { "epoch": 0.29, "learning_rate": 4.521565978681312e-05, "loss": 0.7389, "step": 25925 }, { "epoch": 0.29, "learning_rate": 4.521473705967461e-05, "loss": 0.832, "step": 25930 }, { "epoch": 0.29, "learning_rate": 4.52138143325361e-05, "loss": 0.7438, "step": 25935 }, { "epoch": 0.29, "learning_rate": 4.521289160539759e-05, "loss": 0.6833, "step": 25940 }, { "epoch": 0.29, "learning_rate": 4.5211968878259074e-05, "loss": 0.8202, "step": 25945 }, { "epoch": 0.29, "learning_rate": 4.521104615112056e-05, "loss": 0.8037, "step": 25950 }, { "epoch": 0.29, "learning_rate": 4.521012342398205e-05, "loss": 0.8213, "step": 25955 }, { "epoch": 0.29, "learning_rate": 4.520920069684354e-05, "loss": 0.7333, "step": 25960 }, { "epoch": 0.29, "learning_rate": 4.5208277969705025e-05, "loss": 0.735, "step": 25965 }, { "epoch": 0.29, "learning_rate": 4.520735524256651e-05, "loss": 0.8558, "step": 25970 }, { "epoch": 0.29, "learning_rate": 4.5206432515428e-05, "loss": 0.8878, "step": 25975 }, { "epoch": 0.29, "learning_rate": 4.520550978828948e-05, "loss": 0.7896, "step": 25980 }, { "epoch": 0.29, "learning_rate": 4.5204587061150976e-05, "loss": 0.8019, "step": 25985 }, { "epoch": 0.29, "learning_rate": 4.5203664334012464e-05, "loss": 0.795, "step": 25990 }, { "epoch": 0.29, "learning_rate": 4.520274160687395e-05, "loss": 0.8089, "step": 25995 }, { "epoch": 0.29, "learning_rate": 4.520181887973543e-05, "loss": 0.823, "step": 26000 }, { "epoch": 0.29, "eval_loss": 0.7299502491950989, "eval_runtime": 69.453, "eval_samples_per_second": 28.796, "eval_steps_per_second": 14.398, "step": 26000 }, { "epoch": 0.29, "learning_rate": 4.520089615259693e-05, "loss": 0.8926, "step": 26005 }, { "epoch": 0.29, "learning_rate": 4.5199973425458416e-05, "loss": 0.8264, "step": 26010 }, { "epoch": 0.29, "learning_rate": 4.5199050698319903e-05, "loss": 0.7948, "step": 26015 }, { "epoch": 0.29, "learning_rate": 4.5198127971181385e-05, "loss": 0.7681, "step": 26020 }, { "epoch": 0.29, "learning_rate": 4.519720524404288e-05, "loss": 0.8702, "step": 26025 }, { "epoch": 0.29, "learning_rate": 4.519628251690437e-05, "loss": 0.8215, "step": 26030 }, { "epoch": 0.29, "learning_rate": 4.519535978976585e-05, "loss": 0.7447, "step": 26035 }, { "epoch": 0.29, "learning_rate": 4.5194437062627336e-05, "loss": 0.7891, "step": 26040 }, { "epoch": 0.29, "learning_rate": 4.5193514335488824e-05, "loss": 0.7721, "step": 26045 }, { "epoch": 0.29, "learning_rate": 4.519259160835032e-05, "loss": 0.7914, "step": 26050 }, { "epoch": 0.29, "learning_rate": 4.51916688812118e-05, "loss": 0.8223, "step": 26055 }, { "epoch": 0.29, "learning_rate": 4.519074615407329e-05, "loss": 0.8727, "step": 26060 }, { "epoch": 0.29, "learning_rate": 4.5189823426934775e-05, "loss": 0.7748, "step": 26065 }, { "epoch": 0.29, "learning_rate": 4.518890069979626e-05, "loss": 0.8163, "step": 26070 }, { "epoch": 0.29, "learning_rate": 4.518797797265775e-05, "loss": 0.8496, "step": 26075 }, { "epoch": 0.29, "learning_rate": 4.518705524551924e-05, "loss": 0.8637, "step": 26080 }, { "epoch": 0.29, "learning_rate": 4.5186132518380727e-05, "loss": 0.7874, "step": 26085 }, { "epoch": 0.29, "learning_rate": 4.5185209791242214e-05, "loss": 0.7577, "step": 26090 }, { "epoch": 0.29, "learning_rate": 4.51842870641037e-05, "loss": 0.7645, "step": 26095 }, { "epoch": 0.29, "learning_rate": 4.518336433696519e-05, "loss": 0.7807, "step": 26100 }, { "epoch": 0.29, "learning_rate": 4.518244160982668e-05, "loss": 0.7621, "step": 26105 }, { "epoch": 0.29, "learning_rate": 4.518151888268816e-05, "loss": 0.7314, "step": 26110 }, { "epoch": 0.29, "learning_rate": 4.5180596155549654e-05, "loss": 0.8104, "step": 26115 }, { "epoch": 0.29, "learning_rate": 4.517967342841114e-05, "loss": 0.8098, "step": 26120 }, { "epoch": 0.29, "learning_rate": 4.517875070127263e-05, "loss": 0.8466, "step": 26125 }, { "epoch": 0.29, "learning_rate": 4.517782797413411e-05, "loss": 0.8127, "step": 26130 }, { "epoch": 0.29, "learning_rate": 4.5176905246995605e-05, "loss": 0.8173, "step": 26135 }, { "epoch": 0.29, "learning_rate": 4.517598251985709e-05, "loss": 0.7814, "step": 26140 }, { "epoch": 0.29, "learning_rate": 4.5175059792718574e-05, "loss": 0.7438, "step": 26145 }, { "epoch": 0.29, "learning_rate": 4.517413706558006e-05, "loss": 0.7442, "step": 26150 }, { "epoch": 0.29, "learning_rate": 4.5173214338441556e-05, "loss": 0.7571, "step": 26155 }, { "epoch": 0.29, "learning_rate": 4.5172291611303044e-05, "loss": 0.7544, "step": 26160 }, { "epoch": 0.29, "learning_rate": 4.5171368884164525e-05, "loss": 0.7866, "step": 26165 }, { "epoch": 0.29, "learning_rate": 4.517044615702601e-05, "loss": 0.7701, "step": 26170 }, { "epoch": 0.29, "learning_rate": 4.516952342988751e-05, "loss": 0.7772, "step": 26175 }, { "epoch": 0.29, "learning_rate": 4.516860070274899e-05, "loss": 0.8677, "step": 26180 }, { "epoch": 0.29, "learning_rate": 4.516767797561048e-05, "loss": 0.7367, "step": 26185 }, { "epoch": 0.29, "learning_rate": 4.5166755248471965e-05, "loss": 0.7594, "step": 26190 }, { "epoch": 0.29, "learning_rate": 4.516583252133345e-05, "loss": 0.7754, "step": 26195 }, { "epoch": 0.29, "learning_rate": 4.516490979419494e-05, "loss": 0.7984, "step": 26200 }, { "epoch": 0.29, "learning_rate": 4.516398706705643e-05, "loss": 0.8504, "step": 26205 }, { "epoch": 0.29, "learning_rate": 4.5163064339917916e-05, "loss": 0.8687, "step": 26210 }, { "epoch": 0.29, "learning_rate": 4.5162141612779404e-05, "loss": 0.7405, "step": 26215 }, { "epoch": 0.29, "learning_rate": 4.516121888564089e-05, "loss": 0.7655, "step": 26220 }, { "epoch": 0.29, "learning_rate": 4.516029615850238e-05, "loss": 0.7632, "step": 26225 }, { "epoch": 0.29, "learning_rate": 4.515937343136387e-05, "loss": 0.827, "step": 26230 }, { "epoch": 0.29, "learning_rate": 4.5158450704225355e-05, "loss": 0.7679, "step": 26235 }, { "epoch": 0.29, "learning_rate": 4.515752797708684e-05, "loss": 0.7835, "step": 26240 }, { "epoch": 0.29, "learning_rate": 4.515660524994833e-05, "loss": 0.8245, "step": 26245 }, { "epoch": 0.29, "learning_rate": 4.515568252280982e-05, "loss": 0.8274, "step": 26250 }, { "epoch": 0.29, "learning_rate": 4.51547597956713e-05, "loss": 0.7696, "step": 26255 }, { "epoch": 0.29, "learning_rate": 4.5153837068532795e-05, "loss": 0.7291, "step": 26260 }, { "epoch": 0.29, "learning_rate": 4.515291434139428e-05, "loss": 0.7409, "step": 26265 }, { "epoch": 0.29, "learning_rate": 4.515199161425577e-05, "loss": 0.7957, "step": 26270 }, { "epoch": 0.29, "learning_rate": 4.515106888711725e-05, "loss": 0.7847, "step": 26275 }, { "epoch": 0.29, "learning_rate": 4.515014615997874e-05, "loss": 0.8372, "step": 26280 }, { "epoch": 0.29, "learning_rate": 4.5149223432840234e-05, "loss": 0.7459, "step": 26285 }, { "epoch": 0.29, "learning_rate": 4.514830070570172e-05, "loss": 0.8096, "step": 26290 }, { "epoch": 0.29, "learning_rate": 4.51473779785632e-05, "loss": 0.7981, "step": 26295 }, { "epoch": 0.29, "learning_rate": 4.514645525142469e-05, "loss": 0.7844, "step": 26300 }, { "epoch": 0.29, "learning_rate": 4.5145532524286185e-05, "loss": 0.7434, "step": 26305 }, { "epoch": 0.29, "learning_rate": 4.5144609797147666e-05, "loss": 0.8171, "step": 26310 }, { "epoch": 0.29, "learning_rate": 4.5143687070009154e-05, "loss": 0.8311, "step": 26315 }, { "epoch": 0.29, "learning_rate": 4.514276434287064e-05, "loss": 0.7797, "step": 26320 }, { "epoch": 0.29, "learning_rate": 4.5141841615732137e-05, "loss": 0.7728, "step": 26325 }, { "epoch": 0.29, "learning_rate": 4.514091888859362e-05, "loss": 0.7768, "step": 26330 }, { "epoch": 0.29, "learning_rate": 4.5139996161455105e-05, "loss": 0.8148, "step": 26335 }, { "epoch": 0.29, "learning_rate": 4.513907343431659e-05, "loss": 0.7924, "step": 26340 }, { "epoch": 0.29, "learning_rate": 4.513815070717808e-05, "loss": 0.7932, "step": 26345 }, { "epoch": 0.29, "learning_rate": 4.513722798003957e-05, "loss": 0.7944, "step": 26350 }, { "epoch": 0.29, "learning_rate": 4.513630525290106e-05, "loss": 0.769, "step": 26355 }, { "epoch": 0.29, "learning_rate": 4.5135382525762545e-05, "loss": 0.7566, "step": 26360 }, { "epoch": 0.29, "learning_rate": 4.513445979862403e-05, "loss": 0.8312, "step": 26365 }, { "epoch": 0.29, "learning_rate": 4.513353707148552e-05, "loss": 0.74, "step": 26370 }, { "epoch": 0.29, "learning_rate": 4.513261434434701e-05, "loss": 0.7679, "step": 26375 }, { "epoch": 0.29, "learning_rate": 4.5131691617208496e-05, "loss": 0.7431, "step": 26380 }, { "epoch": 0.29, "learning_rate": 4.513076889006998e-05, "loss": 0.8155, "step": 26385 }, { "epoch": 0.29, "learning_rate": 4.512984616293147e-05, "loss": 0.7899, "step": 26390 }, { "epoch": 0.29, "learning_rate": 4.512892343579296e-05, "loss": 0.7812, "step": 26395 }, { "epoch": 0.29, "learning_rate": 4.512800070865445e-05, "loss": 0.7747, "step": 26400 }, { "epoch": 0.29, "learning_rate": 4.512707798151593e-05, "loss": 0.8025, "step": 26405 }, { "epoch": 0.29, "learning_rate": 4.512615525437742e-05, "loss": 0.755, "step": 26410 }, { "epoch": 0.29, "learning_rate": 4.512523252723891e-05, "loss": 0.7864, "step": 26415 }, { "epoch": 0.29, "learning_rate": 4.512430980010039e-05, "loss": 0.7402, "step": 26420 }, { "epoch": 0.29, "learning_rate": 4.512338707296188e-05, "loss": 0.7746, "step": 26425 }, { "epoch": 0.29, "learning_rate": 4.512246434582337e-05, "loss": 0.7285, "step": 26430 }, { "epoch": 0.29, "learning_rate": 4.512154161868486e-05, "loss": 0.7992, "step": 26435 }, { "epoch": 0.29, "learning_rate": 4.5120618891546344e-05, "loss": 0.7442, "step": 26440 }, { "epoch": 0.29, "learning_rate": 4.511969616440783e-05, "loss": 0.8036, "step": 26445 }, { "epoch": 0.29, "learning_rate": 4.511877343726932e-05, "loss": 0.8078, "step": 26450 }, { "epoch": 0.29, "learning_rate": 4.511785071013081e-05, "loss": 0.7421, "step": 26455 }, { "epoch": 0.29, "learning_rate": 4.5116927982992295e-05, "loss": 0.7525, "step": 26460 }, { "epoch": 0.29, "learning_rate": 4.511600525585378e-05, "loss": 0.7609, "step": 26465 }, { "epoch": 0.29, "learning_rate": 4.511508252871527e-05, "loss": 0.7592, "step": 26470 }, { "epoch": 0.29, "learning_rate": 4.511415980157676e-05, "loss": 0.7715, "step": 26475 }, { "epoch": 0.29, "learning_rate": 4.5113237074438246e-05, "loss": 0.7465, "step": 26480 }, { "epoch": 0.29, "learning_rate": 4.5112314347299734e-05, "loss": 0.8234, "step": 26485 }, { "epoch": 0.29, "learning_rate": 4.511139162016122e-05, "loss": 0.7935, "step": 26490 }, { "epoch": 0.29, "learning_rate": 4.51104688930227e-05, "loss": 0.7283, "step": 26495 }, { "epoch": 0.29, "learning_rate": 4.51095461658842e-05, "loss": 0.7532, "step": 26500 }, { "epoch": 0.29, "learning_rate": 4.5108623438745686e-05, "loss": 0.8227, "step": 26505 }, { "epoch": 0.29, "learning_rate": 4.5107700711607173e-05, "loss": 0.8271, "step": 26510 }, { "epoch": 0.29, "learning_rate": 4.5106777984468654e-05, "loss": 0.8169, "step": 26515 }, { "epoch": 0.29, "learning_rate": 4.510585525733015e-05, "loss": 0.755, "step": 26520 }, { "epoch": 0.29, "learning_rate": 4.510493253019164e-05, "loss": 0.7594, "step": 26525 }, { "epoch": 0.29, "learning_rate": 4.510400980305312e-05, "loss": 0.7604, "step": 26530 }, { "epoch": 0.29, "learning_rate": 4.5103087075914606e-05, "loss": 0.74, "step": 26535 }, { "epoch": 0.29, "learning_rate": 4.51021643487761e-05, "loss": 0.7853, "step": 26540 }, { "epoch": 0.29, "learning_rate": 4.510124162163759e-05, "loss": 0.7587, "step": 26545 }, { "epoch": 0.29, "learning_rate": 4.510031889449907e-05, "loss": 0.8247, "step": 26550 }, { "epoch": 0.29, "learning_rate": 4.509939616736056e-05, "loss": 0.8096, "step": 26555 }, { "epoch": 0.29, "learning_rate": 4.509847344022205e-05, "loss": 0.7872, "step": 26560 }, { "epoch": 0.29, "learning_rate": 4.509755071308353e-05, "loss": 0.8384, "step": 26565 }, { "epoch": 0.29, "learning_rate": 4.509662798594502e-05, "loss": 0.7467, "step": 26570 }, { "epoch": 0.29, "learning_rate": 4.509570525880651e-05, "loss": 0.7952, "step": 26575 }, { "epoch": 0.29, "learning_rate": 4.5094782531667996e-05, "loss": 0.8779, "step": 26580 }, { "epoch": 0.29, "learning_rate": 4.5093859804529484e-05, "loss": 0.7781, "step": 26585 }, { "epoch": 0.29, "learning_rate": 4.509293707739097e-05, "loss": 0.7525, "step": 26590 }, { "epoch": 0.29, "learning_rate": 4.509201435025246e-05, "loss": 0.7865, "step": 26595 }, { "epoch": 0.29, "learning_rate": 4.509109162311395e-05, "loss": 0.8341, "step": 26600 }, { "epoch": 0.29, "learning_rate": 4.5090168895975436e-05, "loss": 0.8033, "step": 26605 }, { "epoch": 0.29, "learning_rate": 4.5089246168836924e-05, "loss": 0.8012, "step": 26610 }, { "epoch": 0.29, "learning_rate": 4.508832344169841e-05, "loss": 0.761, "step": 26615 }, { "epoch": 0.29, "learning_rate": 4.50874007145599e-05, "loss": 0.8147, "step": 26620 }, { "epoch": 0.29, "learning_rate": 4.508647798742139e-05, "loss": 0.7691, "step": 26625 }, { "epoch": 0.29, "learning_rate": 4.5085555260282875e-05, "loss": 0.7626, "step": 26630 }, { "epoch": 0.29, "learning_rate": 4.508463253314436e-05, "loss": 0.8191, "step": 26635 }, { "epoch": 0.29, "learning_rate": 4.5083709806005844e-05, "loss": 0.8547, "step": 26640 }, { "epoch": 0.3, "learning_rate": 4.508278707886733e-05, "loss": 0.8273, "step": 26645 }, { "epoch": 0.3, "learning_rate": 4.5081864351728826e-05, "loss": 0.8195, "step": 26650 }, { "epoch": 0.3, "learning_rate": 4.5080941624590314e-05, "loss": 0.7861, "step": 26655 }, { "epoch": 0.3, "learning_rate": 4.5080018897451795e-05, "loss": 0.7573, "step": 26660 }, { "epoch": 0.3, "learning_rate": 4.507909617031328e-05, "loss": 0.7747, "step": 26665 }, { "epoch": 0.3, "learning_rate": 4.507817344317478e-05, "loss": 0.744, "step": 26670 }, { "epoch": 0.3, "learning_rate": 4.5077250716036266e-05, "loss": 0.7668, "step": 26675 }, { "epoch": 0.3, "learning_rate": 4.507632798889775e-05, "loss": 0.7578, "step": 26680 }, { "epoch": 0.3, "learning_rate": 4.5075405261759235e-05, "loss": 0.7865, "step": 26685 }, { "epoch": 0.3, "learning_rate": 4.507448253462073e-05, "loss": 0.8371, "step": 26690 }, { "epoch": 0.3, "learning_rate": 4.507355980748221e-05, "loss": 0.7478, "step": 26695 }, { "epoch": 0.3, "learning_rate": 4.50726370803437e-05, "loss": 0.7337, "step": 26700 }, { "epoch": 0.3, "learning_rate": 4.5071714353205186e-05, "loss": 0.813, "step": 26705 }, { "epoch": 0.3, "learning_rate": 4.507079162606668e-05, "loss": 0.7955, "step": 26710 }, { "epoch": 0.3, "learning_rate": 4.506986889892816e-05, "loss": 0.8496, "step": 26715 }, { "epoch": 0.3, "learning_rate": 4.506894617178965e-05, "loss": 0.722, "step": 26720 }, { "epoch": 0.3, "learning_rate": 4.506802344465114e-05, "loss": 0.7967, "step": 26725 }, { "epoch": 0.3, "learning_rate": 4.5067100717512625e-05, "loss": 0.82, "step": 26730 }, { "epoch": 0.3, "learning_rate": 4.506617799037411e-05, "loss": 0.7433, "step": 26735 }, { "epoch": 0.3, "learning_rate": 4.50652552632356e-05, "loss": 0.813, "step": 26740 }, { "epoch": 0.3, "learning_rate": 4.506433253609709e-05, "loss": 0.8379, "step": 26745 }, { "epoch": 0.3, "learning_rate": 4.5063409808958577e-05, "loss": 0.7812, "step": 26750 }, { "epoch": 0.3, "learning_rate": 4.5062487081820064e-05, "loss": 0.7335, "step": 26755 }, { "epoch": 0.3, "learning_rate": 4.506156435468155e-05, "loss": 0.714, "step": 26760 }, { "epoch": 0.3, "learning_rate": 4.506064162754304e-05, "loss": 0.8497, "step": 26765 }, { "epoch": 0.3, "learning_rate": 4.505971890040452e-05, "loss": 0.8068, "step": 26770 }, { "epoch": 0.3, "learning_rate": 4.5058796173266016e-05, "loss": 0.8089, "step": 26775 }, { "epoch": 0.3, "learning_rate": 4.5057873446127504e-05, "loss": 0.7877, "step": 26780 }, { "epoch": 0.3, "learning_rate": 4.505695071898899e-05, "loss": 0.7998, "step": 26785 }, { "epoch": 0.3, "learning_rate": 4.505602799185047e-05, "loss": 0.8201, "step": 26790 }, { "epoch": 0.3, "learning_rate": 4.505510526471196e-05, "loss": 0.7587, "step": 26795 }, { "epoch": 0.3, "learning_rate": 4.5054182537573455e-05, "loss": 0.7382, "step": 26800 }, { "epoch": 0.3, "learning_rate": 4.5053259810434936e-05, "loss": 0.7022, "step": 26805 }, { "epoch": 0.3, "learning_rate": 4.5052337083296424e-05, "loss": 0.7616, "step": 26810 }, { "epoch": 0.3, "learning_rate": 4.505141435615791e-05, "loss": 0.8177, "step": 26815 }, { "epoch": 0.3, "learning_rate": 4.5050491629019406e-05, "loss": 0.8202, "step": 26820 }, { "epoch": 0.3, "learning_rate": 4.504956890188089e-05, "loss": 0.7707, "step": 26825 }, { "epoch": 0.3, "learning_rate": 4.5048646174742375e-05, "loss": 0.7692, "step": 26830 }, { "epoch": 0.3, "learning_rate": 4.504772344760386e-05, "loss": 0.7817, "step": 26835 }, { "epoch": 0.3, "learning_rate": 4.504680072046535e-05, "loss": 0.7315, "step": 26840 }, { "epoch": 0.3, "learning_rate": 4.504587799332684e-05, "loss": 0.7673, "step": 26845 }, { "epoch": 0.3, "learning_rate": 4.504495526618833e-05, "loss": 0.7782, "step": 26850 }, { "epoch": 0.3, "learning_rate": 4.5044032539049815e-05, "loss": 0.8126, "step": 26855 }, { "epoch": 0.3, "learning_rate": 4.50431098119113e-05, "loss": 0.8156, "step": 26860 }, { "epoch": 0.3, "learning_rate": 4.504218708477279e-05, "loss": 0.7342, "step": 26865 }, { "epoch": 0.3, "learning_rate": 4.504126435763428e-05, "loss": 0.7718, "step": 26870 }, { "epoch": 0.3, "learning_rate": 4.5040341630495766e-05, "loss": 0.7075, "step": 26875 }, { "epoch": 0.3, "learning_rate": 4.503941890335725e-05, "loss": 0.8011, "step": 26880 }, { "epoch": 0.3, "learning_rate": 4.503849617621874e-05, "loss": 0.8375, "step": 26885 }, { "epoch": 0.3, "learning_rate": 4.503757344908023e-05, "loss": 0.8065, "step": 26890 }, { "epoch": 0.3, "learning_rate": 4.503665072194172e-05, "loss": 0.7789, "step": 26895 }, { "epoch": 0.3, "learning_rate": 4.50357279948032e-05, "loss": 0.7623, "step": 26900 }, { "epoch": 0.3, "learning_rate": 4.503480526766469e-05, "loss": 0.7612, "step": 26905 }, { "epoch": 0.3, "learning_rate": 4.503388254052618e-05, "loss": 0.7812, "step": 26910 }, { "epoch": 0.3, "learning_rate": 4.503295981338766e-05, "loss": 0.8036, "step": 26915 }, { "epoch": 0.3, "learning_rate": 4.503203708624915e-05, "loss": 0.7749, "step": 26920 }, { "epoch": 0.3, "learning_rate": 4.5031114359110645e-05, "loss": 0.7374, "step": 26925 }, { "epoch": 0.3, "learning_rate": 4.503019163197213e-05, "loss": 0.7657, "step": 26930 }, { "epoch": 0.3, "learning_rate": 4.5029268904833613e-05, "loss": 0.7799, "step": 26935 }, { "epoch": 0.3, "learning_rate": 4.50283461776951e-05, "loss": 0.7875, "step": 26940 }, { "epoch": 0.3, "learning_rate": 4.502742345055659e-05, "loss": 0.7431, "step": 26945 }, { "epoch": 0.3, "learning_rate": 4.502650072341808e-05, "loss": 0.7173, "step": 26950 }, { "epoch": 0.3, "learning_rate": 4.5025577996279565e-05, "loss": 0.8284, "step": 26955 }, { "epoch": 0.3, "learning_rate": 4.502465526914105e-05, "loss": 0.7999, "step": 26960 }, { "epoch": 0.3, "learning_rate": 4.502373254200254e-05, "loss": 0.7999, "step": 26965 }, { "epoch": 0.3, "learning_rate": 4.502280981486403e-05, "loss": 0.7337, "step": 26970 }, { "epoch": 0.3, "learning_rate": 4.5021887087725516e-05, "loss": 0.7653, "step": 26975 }, { "epoch": 0.3, "learning_rate": 4.5020964360587004e-05, "loss": 0.7927, "step": 26980 }, { "epoch": 0.3, "learning_rate": 4.502004163344849e-05, "loss": 0.8128, "step": 26985 }, { "epoch": 0.3, "learning_rate": 4.501911890630998e-05, "loss": 0.8214, "step": 26990 }, { "epoch": 0.3, "learning_rate": 4.501819617917147e-05, "loss": 0.8268, "step": 26995 }, { "epoch": 0.3, "learning_rate": 4.5017273452032955e-05, "loss": 0.7472, "step": 27000 }, { "epoch": 0.3, "eval_loss": 0.7292386889457703, "eval_runtime": 69.3045, "eval_samples_per_second": 28.858, "eval_steps_per_second": 14.429, "step": 27000 }, { "epoch": 0.3, "learning_rate": 4.501635072489444e-05, "loss": 0.7961, "step": 27005 }, { "epoch": 0.3, "learning_rate": 4.501542799775593e-05, "loss": 0.7553, "step": 27010 }, { "epoch": 0.3, "learning_rate": 4.501450527061742e-05, "loss": 0.7514, "step": 27015 }, { "epoch": 0.3, "learning_rate": 4.501358254347891e-05, "loss": 0.7522, "step": 27020 }, { "epoch": 0.3, "learning_rate": 4.501265981634039e-05, "loss": 0.829, "step": 27025 }, { "epoch": 0.3, "learning_rate": 4.5011737089201876e-05, "loss": 0.752, "step": 27030 }, { "epoch": 0.3, "learning_rate": 4.501081436206337e-05, "loss": 0.7873, "step": 27035 }, { "epoch": 0.3, "learning_rate": 4.500989163492486e-05, "loss": 0.7559, "step": 27040 }, { "epoch": 0.3, "learning_rate": 4.500896890778634e-05, "loss": 0.8782, "step": 27045 }, { "epoch": 0.3, "learning_rate": 4.500804618064783e-05, "loss": 0.789, "step": 27050 }, { "epoch": 0.3, "learning_rate": 4.500712345350932e-05, "loss": 0.8169, "step": 27055 }, { "epoch": 0.3, "learning_rate": 4.500620072637081e-05, "loss": 0.7875, "step": 27060 }, { "epoch": 0.3, "learning_rate": 4.500527799923229e-05, "loss": 0.7862, "step": 27065 }, { "epoch": 0.3, "learning_rate": 4.500435527209378e-05, "loss": 0.8189, "step": 27070 }, { "epoch": 0.3, "learning_rate": 4.500343254495527e-05, "loss": 0.8387, "step": 27075 }, { "epoch": 0.3, "learning_rate": 4.5002509817816754e-05, "loss": 0.8143, "step": 27080 }, { "epoch": 0.3, "learning_rate": 4.500158709067824e-05, "loss": 0.8108, "step": 27085 }, { "epoch": 0.3, "learning_rate": 4.500066436353973e-05, "loss": 0.783, "step": 27090 }, { "epoch": 0.3, "learning_rate": 4.4999741636401225e-05, "loss": 0.7152, "step": 27095 }, { "epoch": 0.3, "learning_rate": 4.4998818909262706e-05, "loss": 0.8284, "step": 27100 }, { "epoch": 0.3, "learning_rate": 4.4997896182124194e-05, "loss": 0.7824, "step": 27105 }, { "epoch": 0.3, "learning_rate": 4.499697345498568e-05, "loss": 0.7864, "step": 27110 }, { "epoch": 0.3, "learning_rate": 4.499605072784717e-05, "loss": 0.7883, "step": 27115 }, { "epoch": 0.3, "learning_rate": 4.499512800070866e-05, "loss": 0.7138, "step": 27120 }, { "epoch": 0.3, "learning_rate": 4.4994205273570145e-05, "loss": 0.7545, "step": 27125 }, { "epoch": 0.3, "learning_rate": 4.499328254643163e-05, "loss": 0.7815, "step": 27130 }, { "epoch": 0.3, "learning_rate": 4.499235981929312e-05, "loss": 0.7407, "step": 27135 }, { "epoch": 0.3, "learning_rate": 4.499143709215461e-05, "loss": 0.7795, "step": 27140 }, { "epoch": 0.3, "learning_rate": 4.4990514365016096e-05, "loss": 0.7578, "step": 27145 }, { "epoch": 0.3, "learning_rate": 4.4989591637877584e-05, "loss": 0.8114, "step": 27150 }, { "epoch": 0.3, "learning_rate": 4.4988668910739065e-05, "loss": 0.7479, "step": 27155 }, { "epoch": 0.3, "learning_rate": 4.498774618360056e-05, "loss": 0.7793, "step": 27160 }, { "epoch": 0.3, "learning_rate": 4.498682345646205e-05, "loss": 0.7684, "step": 27165 }, { "epoch": 0.3, "learning_rate": 4.4985900729323536e-05, "loss": 0.7745, "step": 27170 }, { "epoch": 0.3, "learning_rate": 4.4984978002185017e-05, "loss": 0.8305, "step": 27175 }, { "epoch": 0.3, "learning_rate": 4.4984055275046504e-05, "loss": 0.8121, "step": 27180 }, { "epoch": 0.3, "learning_rate": 4.4983132547908e-05, "loss": 0.7703, "step": 27185 }, { "epoch": 0.3, "learning_rate": 4.498220982076948e-05, "loss": 0.799, "step": 27190 }, { "epoch": 0.3, "learning_rate": 4.498128709363097e-05, "loss": 0.7536, "step": 27195 }, { "epoch": 0.3, "learning_rate": 4.4980364366492456e-05, "loss": 0.8096, "step": 27200 }, { "epoch": 0.3, "learning_rate": 4.497944163935395e-05, "loss": 0.7954, "step": 27205 }, { "epoch": 0.3, "learning_rate": 4.497851891221543e-05, "loss": 0.7942, "step": 27210 }, { "epoch": 0.3, "learning_rate": 4.497759618507692e-05, "loss": 0.7458, "step": 27215 }, { "epoch": 0.3, "learning_rate": 4.497667345793841e-05, "loss": 0.7695, "step": 27220 }, { "epoch": 0.3, "learning_rate": 4.4975750730799895e-05, "loss": 0.8625, "step": 27225 }, { "epoch": 0.3, "learning_rate": 4.497482800366138e-05, "loss": 0.8231, "step": 27230 }, { "epoch": 0.3, "learning_rate": 4.497390527652287e-05, "loss": 0.7822, "step": 27235 }, { "epoch": 0.3, "learning_rate": 4.497298254938436e-05, "loss": 0.785, "step": 27240 }, { "epoch": 0.3, "learning_rate": 4.4972059822245846e-05, "loss": 0.7578, "step": 27245 }, { "epoch": 0.3, "learning_rate": 4.4971137095107334e-05, "loss": 0.744, "step": 27250 }, { "epoch": 0.3, "learning_rate": 4.497021436796882e-05, "loss": 0.8404, "step": 27255 }, { "epoch": 0.3, "learning_rate": 4.496929164083031e-05, "loss": 0.8305, "step": 27260 }, { "epoch": 0.3, "learning_rate": 4.496836891369179e-05, "loss": 0.8172, "step": 27265 }, { "epoch": 0.3, "learning_rate": 4.4967446186553286e-05, "loss": 0.7324, "step": 27270 }, { "epoch": 0.3, "learning_rate": 4.4966523459414774e-05, "loss": 0.8014, "step": 27275 }, { "epoch": 0.3, "learning_rate": 4.496560073227626e-05, "loss": 0.7694, "step": 27280 }, { "epoch": 0.3, "learning_rate": 4.496467800513774e-05, "loss": 0.828, "step": 27285 }, { "epoch": 0.3, "learning_rate": 4.496375527799924e-05, "loss": 0.7696, "step": 27290 }, { "epoch": 0.3, "learning_rate": 4.4962832550860725e-05, "loss": 0.8588, "step": 27295 }, { "epoch": 0.3, "learning_rate": 4.4961909823722206e-05, "loss": 0.8542, "step": 27300 }, { "epoch": 0.3, "learning_rate": 4.4960987096583694e-05, "loss": 0.8321, "step": 27305 }, { "epoch": 0.3, "learning_rate": 4.496006436944519e-05, "loss": 0.8386, "step": 27310 }, { "epoch": 0.3, "learning_rate": 4.4959141642306676e-05, "loss": 0.7715, "step": 27315 }, { "epoch": 0.3, "learning_rate": 4.495821891516816e-05, "loss": 0.7364, "step": 27320 }, { "epoch": 0.3, "learning_rate": 4.4957296188029645e-05, "loss": 0.7288, "step": 27325 }, { "epoch": 0.3, "learning_rate": 4.495637346089113e-05, "loss": 0.7616, "step": 27330 }, { "epoch": 0.3, "learning_rate": 4.495545073375262e-05, "loss": 0.7477, "step": 27335 }, { "epoch": 0.3, "learning_rate": 4.495452800661411e-05, "loss": 0.7307, "step": 27340 }, { "epoch": 0.3, "learning_rate": 4.49536052794756e-05, "loss": 0.8005, "step": 27345 }, { "epoch": 0.3, "learning_rate": 4.4952682552337085e-05, "loss": 0.786, "step": 27350 }, { "epoch": 0.3, "learning_rate": 4.495175982519857e-05, "loss": 0.7746, "step": 27355 }, { "epoch": 0.3, "learning_rate": 4.495083709806006e-05, "loss": 0.769, "step": 27360 }, { "epoch": 0.3, "learning_rate": 4.494991437092155e-05, "loss": 0.7832, "step": 27365 }, { "epoch": 0.3, "learning_rate": 4.4948991643783036e-05, "loss": 0.7358, "step": 27370 }, { "epoch": 0.3, "learning_rate": 4.4948068916644524e-05, "loss": 0.7622, "step": 27375 }, { "epoch": 0.3, "learning_rate": 4.494714618950601e-05, "loss": 0.8301, "step": 27380 }, { "epoch": 0.3, "learning_rate": 4.49462234623675e-05, "loss": 0.7731, "step": 27385 }, { "epoch": 0.3, "learning_rate": 4.494530073522899e-05, "loss": 0.7531, "step": 27390 }, { "epoch": 0.3, "learning_rate": 4.4944378008090475e-05, "loss": 0.7421, "step": 27395 }, { "epoch": 0.3, "learning_rate": 4.494345528095196e-05, "loss": 0.7839, "step": 27400 }, { "epoch": 0.3, "learning_rate": 4.494253255381345e-05, "loss": 0.8281, "step": 27405 }, { "epoch": 0.3, "learning_rate": 4.494160982667493e-05, "loss": 0.7656, "step": 27410 }, { "epoch": 0.3, "learning_rate": 4.494068709953642e-05, "loss": 0.8212, "step": 27415 }, { "epoch": 0.3, "learning_rate": 4.4939764372397914e-05, "loss": 0.7397, "step": 27420 }, { "epoch": 0.3, "learning_rate": 4.49388416452594e-05, "loss": 0.8158, "step": 27425 }, { "epoch": 0.3, "learning_rate": 4.493791891812088e-05, "loss": 0.7977, "step": 27430 }, { "epoch": 0.3, "learning_rate": 4.493699619098237e-05, "loss": 0.7623, "step": 27435 }, { "epoch": 0.3, "learning_rate": 4.4936073463843866e-05, "loss": 0.8389, "step": 27440 }, { "epoch": 0.3, "learning_rate": 4.4935150736705354e-05, "loss": 0.8049, "step": 27445 }, { "epoch": 0.3, "learning_rate": 4.4934228009566835e-05, "loss": 0.7583, "step": 27450 }, { "epoch": 0.3, "learning_rate": 4.493330528242832e-05, "loss": 0.7844, "step": 27455 }, { "epoch": 0.3, "learning_rate": 4.493238255528982e-05, "loss": 0.7497, "step": 27460 }, { "epoch": 0.3, "learning_rate": 4.49314598281513e-05, "loss": 0.7799, "step": 27465 }, { "epoch": 0.3, "learning_rate": 4.4930537101012786e-05, "loss": 0.7648, "step": 27470 }, { "epoch": 0.3, "learning_rate": 4.4929614373874274e-05, "loss": 0.7222, "step": 27475 }, { "epoch": 0.3, "learning_rate": 4.492869164673576e-05, "loss": 0.7664, "step": 27480 }, { "epoch": 0.3, "learning_rate": 4.492776891959725e-05, "loss": 0.8727, "step": 27485 }, { "epoch": 0.3, "learning_rate": 4.492684619245874e-05, "loss": 0.7153, "step": 27490 }, { "epoch": 0.3, "learning_rate": 4.4925923465320225e-05, "loss": 0.7972, "step": 27495 }, { "epoch": 0.3, "learning_rate": 4.492500073818171e-05, "loss": 0.7757, "step": 27500 }, { "epoch": 0.3, "learning_rate": 4.49240780110432e-05, "loss": 0.788, "step": 27505 }, { "epoch": 0.3, "learning_rate": 4.492315528390469e-05, "loss": 0.7899, "step": 27510 }, { "epoch": 0.3, "learning_rate": 4.492223255676618e-05, "loss": 0.8078, "step": 27515 }, { "epoch": 0.3, "learning_rate": 4.4921309829627665e-05, "loss": 0.7964, "step": 27520 }, { "epoch": 0.3, "learning_rate": 4.492038710248915e-05, "loss": 0.8312, "step": 27525 }, { "epoch": 0.3, "learning_rate": 4.491946437535064e-05, "loss": 0.7932, "step": 27530 }, { "epoch": 0.3, "learning_rate": 4.491854164821213e-05, "loss": 0.7704, "step": 27535 }, { "epoch": 0.3, "learning_rate": 4.491761892107361e-05, "loss": 0.8188, "step": 27540 }, { "epoch": 0.3, "learning_rate": 4.4916696193935104e-05, "loss": 0.7622, "step": 27545 }, { "epoch": 0.31, "learning_rate": 4.491577346679659e-05, "loss": 0.7969, "step": 27550 }, { "epoch": 0.31, "learning_rate": 4.491485073965808e-05, "loss": 0.784, "step": 27555 }, { "epoch": 0.31, "learning_rate": 4.491392801251956e-05, "loss": 0.7245, "step": 27560 }, { "epoch": 0.31, "learning_rate": 4.491300528538105e-05, "loss": 0.8471, "step": 27565 }, { "epoch": 0.31, "learning_rate": 4.491208255824254e-05, "loss": 0.7894, "step": 27570 }, { "epoch": 0.31, "learning_rate": 4.4911159831104024e-05, "loss": 0.7918, "step": 27575 }, { "epoch": 0.31, "learning_rate": 4.491023710396551e-05, "loss": 0.8149, "step": 27580 }, { "epoch": 0.31, "learning_rate": 4.4909314376827e-05, "loss": 0.8328, "step": 27585 }, { "epoch": 0.31, "learning_rate": 4.4908391649688494e-05, "loss": 0.7882, "step": 27590 }, { "epoch": 0.31, "learning_rate": 4.4907468922549976e-05, "loss": 0.8154, "step": 27595 }, { "epoch": 0.31, "learning_rate": 4.4906546195411463e-05, "loss": 0.766, "step": 27600 }, { "epoch": 0.31, "learning_rate": 4.490562346827295e-05, "loss": 0.885, "step": 27605 }, { "epoch": 0.31, "learning_rate": 4.490470074113444e-05, "loss": 0.7874, "step": 27610 }, { "epoch": 0.31, "learning_rate": 4.490377801399593e-05, "loss": 0.7683, "step": 27615 }, { "epoch": 0.31, "learning_rate": 4.4902855286857415e-05, "loss": 0.7596, "step": 27620 }, { "epoch": 0.31, "learning_rate": 4.49019325597189e-05, "loss": 0.8432, "step": 27625 }, { "epoch": 0.31, "learning_rate": 4.490100983258039e-05, "loss": 0.8014, "step": 27630 }, { "epoch": 0.31, "learning_rate": 4.490008710544188e-05, "loss": 0.7849, "step": 27635 }, { "epoch": 0.31, "learning_rate": 4.4899164378303366e-05, "loss": 0.7452, "step": 27640 }, { "epoch": 0.31, "learning_rate": 4.4898241651164854e-05, "loss": 0.835, "step": 27645 }, { "epoch": 0.31, "learning_rate": 4.4897318924026335e-05, "loss": 0.7777, "step": 27650 }, { "epoch": 0.31, "learning_rate": 4.489639619688783e-05, "loss": 0.773, "step": 27655 }, { "epoch": 0.31, "learning_rate": 4.489547346974932e-05, "loss": 0.8349, "step": 27660 }, { "epoch": 0.31, "learning_rate": 4.4894550742610805e-05, "loss": 0.7357, "step": 27665 }, { "epoch": 0.31, "learning_rate": 4.4893628015472287e-05, "loss": 0.8355, "step": 27670 }, { "epoch": 0.31, "learning_rate": 4.489270528833378e-05, "loss": 0.7722, "step": 27675 }, { "epoch": 0.31, "learning_rate": 4.489178256119527e-05, "loss": 0.8034, "step": 27680 }, { "epoch": 0.31, "learning_rate": 4.489085983405675e-05, "loss": 0.8139, "step": 27685 }, { "epoch": 0.31, "learning_rate": 4.488993710691824e-05, "loss": 0.8152, "step": 27690 }, { "epoch": 0.31, "learning_rate": 4.488901437977973e-05, "loss": 0.7716, "step": 27695 }, { "epoch": 0.31, "learning_rate": 4.488809165264122e-05, "loss": 0.8477, "step": 27700 }, { "epoch": 0.31, "learning_rate": 4.48871689255027e-05, "loss": 0.6903, "step": 27705 }, { "epoch": 0.31, "learning_rate": 4.488624619836419e-05, "loss": 0.756, "step": 27710 }, { "epoch": 0.31, "learning_rate": 4.488532347122568e-05, "loss": 0.8637, "step": 27715 }, { "epoch": 0.31, "learning_rate": 4.4884400744087165e-05, "loss": 0.8127, "step": 27720 }, { "epoch": 0.31, "learning_rate": 4.488347801694865e-05, "loss": 0.7305, "step": 27725 }, { "epoch": 0.31, "learning_rate": 4.488255528981014e-05, "loss": 0.8039, "step": 27730 }, { "epoch": 0.31, "learning_rate": 4.488163256267163e-05, "loss": 0.7969, "step": 27735 }, { "epoch": 0.31, "learning_rate": 4.4880709835533116e-05, "loss": 0.8066, "step": 27740 }, { "epoch": 0.31, "learning_rate": 4.4879787108394604e-05, "loss": 0.7992, "step": 27745 }, { "epoch": 0.31, "learning_rate": 4.487886438125609e-05, "loss": 0.7746, "step": 27750 }, { "epoch": 0.31, "learning_rate": 4.487794165411758e-05, "loss": 0.7485, "step": 27755 }, { "epoch": 0.31, "learning_rate": 4.487701892697907e-05, "loss": 0.7802, "step": 27760 }, { "epoch": 0.31, "learning_rate": 4.4876096199840556e-05, "loss": 0.7601, "step": 27765 }, { "epoch": 0.31, "learning_rate": 4.4875173472702043e-05, "loss": 0.8066, "step": 27770 }, { "epoch": 0.31, "learning_rate": 4.487425074556353e-05, "loss": 0.8167, "step": 27775 }, { "epoch": 0.31, "learning_rate": 4.487332801842501e-05, "loss": 0.7903, "step": 27780 }, { "epoch": 0.31, "learning_rate": 4.487240529128651e-05, "loss": 0.8527, "step": 27785 }, { "epoch": 0.31, "learning_rate": 4.4871482564147995e-05, "loss": 0.8265, "step": 27790 }, { "epoch": 0.31, "learning_rate": 4.4870559837009476e-05, "loss": 0.7684, "step": 27795 }, { "epoch": 0.31, "learning_rate": 4.4869637109870964e-05, "loss": 0.7674, "step": 27800 }, { "epoch": 0.31, "learning_rate": 4.486871438273246e-05, "loss": 0.8082, "step": 27805 }, { "epoch": 0.31, "learning_rate": 4.4867791655593946e-05, "loss": 0.7302, "step": 27810 }, { "epoch": 0.31, "learning_rate": 4.486686892845543e-05, "loss": 0.7006, "step": 27815 }, { "epoch": 0.31, "learning_rate": 4.4865946201316915e-05, "loss": 0.7659, "step": 27820 }, { "epoch": 0.31, "learning_rate": 4.486502347417841e-05, "loss": 0.7998, "step": 27825 }, { "epoch": 0.31, "learning_rate": 4.48641007470399e-05, "loss": 0.8112, "step": 27830 }, { "epoch": 0.31, "learning_rate": 4.486317801990138e-05, "loss": 0.7866, "step": 27835 }, { "epoch": 0.31, "learning_rate": 4.4862255292762867e-05, "loss": 0.7893, "step": 27840 }, { "epoch": 0.31, "learning_rate": 4.486133256562436e-05, "loss": 0.7737, "step": 27845 }, { "epoch": 0.31, "learning_rate": 4.486040983848584e-05, "loss": 0.8028, "step": 27850 }, { "epoch": 0.31, "learning_rate": 4.485948711134733e-05, "loss": 0.8922, "step": 27855 }, { "epoch": 0.31, "learning_rate": 4.485856438420882e-05, "loss": 0.8244, "step": 27860 }, { "epoch": 0.31, "learning_rate": 4.4857641657070306e-05, "loss": 0.8052, "step": 27865 }, { "epoch": 0.31, "learning_rate": 4.4856718929931794e-05, "loss": 0.8066, "step": 27870 }, { "epoch": 0.31, "learning_rate": 4.485579620279328e-05, "loss": 0.863, "step": 27875 }, { "epoch": 0.31, "learning_rate": 4.485487347565477e-05, "loss": 0.7248, "step": 27880 }, { "epoch": 0.31, "learning_rate": 4.485395074851626e-05, "loss": 0.7979, "step": 27885 }, { "epoch": 0.31, "learning_rate": 4.4853028021377745e-05, "loss": 0.7973, "step": 27890 }, { "epoch": 0.31, "learning_rate": 4.485210529423923e-05, "loss": 0.8605, "step": 27895 }, { "epoch": 0.31, "learning_rate": 4.485118256710072e-05, "loss": 0.7969, "step": 27900 }, { "epoch": 0.31, "learning_rate": 4.485025983996221e-05, "loss": 0.7057, "step": 27905 }, { "epoch": 0.31, "learning_rate": 4.4849337112823696e-05, "loss": 0.7894, "step": 27910 }, { "epoch": 0.31, "learning_rate": 4.4848414385685184e-05, "loss": 0.7994, "step": 27915 }, { "epoch": 0.31, "learning_rate": 4.484749165854667e-05, "loss": 0.8039, "step": 27920 }, { "epoch": 0.31, "learning_rate": 4.484656893140815e-05, "loss": 0.7981, "step": 27925 }, { "epoch": 0.31, "learning_rate": 4.484564620426965e-05, "loss": 0.7849, "step": 27930 }, { "epoch": 0.31, "learning_rate": 4.4844723477131136e-05, "loss": 0.8147, "step": 27935 }, { "epoch": 0.31, "learning_rate": 4.4843800749992624e-05, "loss": 0.8059, "step": 27940 }, { "epoch": 0.31, "learning_rate": 4.4842878022854105e-05, "loss": 0.8203, "step": 27945 }, { "epoch": 0.31, "learning_rate": 4.484195529571559e-05, "loss": 0.6996, "step": 27950 }, { "epoch": 0.31, "learning_rate": 4.484103256857709e-05, "loss": 0.8158, "step": 27955 }, { "epoch": 0.31, "learning_rate": 4.484010984143857e-05, "loss": 0.7609, "step": 27960 }, { "epoch": 0.31, "learning_rate": 4.4839187114300056e-05, "loss": 0.758, "step": 27965 }, { "epoch": 0.31, "learning_rate": 4.4838264387161544e-05, "loss": 0.7674, "step": 27970 }, { "epoch": 0.31, "learning_rate": 4.483734166002304e-05, "loss": 0.8543, "step": 27975 }, { "epoch": 0.31, "learning_rate": 4.483641893288452e-05, "loss": 0.7468, "step": 27980 }, { "epoch": 0.31, "learning_rate": 4.483549620574601e-05, "loss": 0.7882, "step": 27985 }, { "epoch": 0.31, "learning_rate": 4.4834573478607495e-05, "loss": 0.7861, "step": 27990 }, { "epoch": 0.31, "learning_rate": 4.483365075146898e-05, "loss": 0.7319, "step": 27995 }, { "epoch": 0.31, "learning_rate": 4.483272802433047e-05, "loss": 0.8048, "step": 28000 }, { "epoch": 0.31, "eval_loss": 0.7697468400001526, "eval_runtime": 69.2888, "eval_samples_per_second": 28.865, "eval_steps_per_second": 14.432, "step": 28000 }, { "epoch": 0.31, "learning_rate": 4.483180529719196e-05, "loss": 0.77, "step": 28005 }, { "epoch": 0.31, "learning_rate": 4.483088257005345e-05, "loss": 0.7488, "step": 28010 }, { "epoch": 0.31, "learning_rate": 4.4829959842914935e-05, "loss": 0.7829, "step": 28015 }, { "epoch": 0.31, "learning_rate": 4.482903711577642e-05, "loss": 0.8301, "step": 28020 }, { "epoch": 0.31, "learning_rate": 4.482811438863791e-05, "loss": 0.8785, "step": 28025 }, { "epoch": 0.31, "learning_rate": 4.48271916614994e-05, "loss": 0.8225, "step": 28030 }, { "epoch": 0.31, "learning_rate": 4.482626893436088e-05, "loss": 0.8423, "step": 28035 }, { "epoch": 0.31, "learning_rate": 4.4825346207222374e-05, "loss": 0.7636, "step": 28040 }, { "epoch": 0.31, "learning_rate": 4.482442348008386e-05, "loss": 0.6888, "step": 28045 }, { "epoch": 0.31, "learning_rate": 4.482350075294535e-05, "loss": 0.7987, "step": 28050 }, { "epoch": 0.31, "learning_rate": 4.482257802580683e-05, "loss": 0.7584, "step": 28055 }, { "epoch": 0.31, "learning_rate": 4.4821655298668325e-05, "loss": 0.7717, "step": 28060 }, { "epoch": 0.31, "learning_rate": 4.482073257152981e-05, "loss": 0.8057, "step": 28065 }, { "epoch": 0.31, "learning_rate": 4.4819809844391294e-05, "loss": 0.7506, "step": 28070 }, { "epoch": 0.31, "learning_rate": 4.481888711725278e-05, "loss": 0.7838, "step": 28075 }, { "epoch": 0.31, "learning_rate": 4.4817964390114277e-05, "loss": 0.7998, "step": 28080 }, { "epoch": 0.31, "learning_rate": 4.4817041662975764e-05, "loss": 0.746, "step": 28085 }, { "epoch": 0.31, "learning_rate": 4.4816118935837245e-05, "loss": 0.7731, "step": 28090 }, { "epoch": 0.31, "learning_rate": 4.481519620869873e-05, "loss": 0.8482, "step": 28095 }, { "epoch": 0.31, "learning_rate": 4.481427348156022e-05, "loss": 0.7567, "step": 28100 }, { "epoch": 0.31, "learning_rate": 4.481335075442171e-05, "loss": 0.7399, "step": 28105 }, { "epoch": 0.31, "learning_rate": 4.48124280272832e-05, "loss": 0.753, "step": 28110 }, { "epoch": 0.31, "learning_rate": 4.4811505300144685e-05, "loss": 0.834, "step": 28115 }, { "epoch": 0.31, "learning_rate": 4.481058257300617e-05, "loss": 0.7732, "step": 28120 }, { "epoch": 0.31, "learning_rate": 4.480965984586766e-05, "loss": 0.7785, "step": 28125 }, { "epoch": 0.31, "learning_rate": 4.480873711872915e-05, "loss": 0.7774, "step": 28130 }, { "epoch": 0.31, "learning_rate": 4.4807814391590636e-05, "loss": 0.7835, "step": 28135 }, { "epoch": 0.31, "learning_rate": 4.4806891664452124e-05, "loss": 0.7884, "step": 28140 }, { "epoch": 0.31, "learning_rate": 4.480596893731361e-05, "loss": 0.7881, "step": 28145 }, { "epoch": 0.31, "learning_rate": 4.48050462101751e-05, "loss": 0.7723, "step": 28150 }, { "epoch": 0.31, "learning_rate": 4.480412348303659e-05, "loss": 0.7871, "step": 28155 }, { "epoch": 0.31, "learning_rate": 4.4803200755898075e-05, "loss": 0.7378, "step": 28160 }, { "epoch": 0.31, "learning_rate": 4.4802278028759556e-05, "loss": 0.7725, "step": 28165 }, { "epoch": 0.31, "learning_rate": 4.480135530162105e-05, "loss": 0.8417, "step": 28170 }, { "epoch": 0.31, "learning_rate": 4.480043257448254e-05, "loss": 0.786, "step": 28175 }, { "epoch": 0.31, "learning_rate": 4.479950984734402e-05, "loss": 0.8164, "step": 28180 }, { "epoch": 0.31, "learning_rate": 4.479858712020551e-05, "loss": 0.7561, "step": 28185 }, { "epoch": 0.31, "learning_rate": 4.4797664393067e-05, "loss": 0.79, "step": 28190 }, { "epoch": 0.31, "learning_rate": 4.479674166592849e-05, "loss": 0.8269, "step": 28195 }, { "epoch": 0.31, "learning_rate": 4.479581893878997e-05, "loss": 0.7754, "step": 28200 }, { "epoch": 0.31, "learning_rate": 4.479489621165146e-05, "loss": 0.7894, "step": 28205 }, { "epoch": 0.31, "learning_rate": 4.4793973484512954e-05, "loss": 0.7985, "step": 28210 }, { "epoch": 0.31, "learning_rate": 4.479305075737444e-05, "loss": 0.7548, "step": 28215 }, { "epoch": 0.31, "learning_rate": 4.479212803023592e-05, "loss": 0.7474, "step": 28220 }, { "epoch": 0.31, "learning_rate": 4.479120530309741e-05, "loss": 0.7812, "step": 28225 }, { "epoch": 0.31, "learning_rate": 4.4790282575958905e-05, "loss": 0.7469, "step": 28230 }, { "epoch": 0.31, "learning_rate": 4.4789359848820386e-05, "loss": 0.8018, "step": 28235 }, { "epoch": 0.31, "learning_rate": 4.4788437121681874e-05, "loss": 0.7746, "step": 28240 }, { "epoch": 0.31, "learning_rate": 4.478751439454336e-05, "loss": 0.7689, "step": 28245 }, { "epoch": 0.31, "learning_rate": 4.478659166740485e-05, "loss": 0.6819, "step": 28250 }, { "epoch": 0.31, "learning_rate": 4.478566894026634e-05, "loss": 0.6992, "step": 28255 }, { "epoch": 0.31, "learning_rate": 4.4784746213127826e-05, "loss": 0.7772, "step": 28260 }, { "epoch": 0.31, "learning_rate": 4.4783823485989313e-05, "loss": 0.8186, "step": 28265 }, { "epoch": 0.31, "learning_rate": 4.47829007588508e-05, "loss": 0.7161, "step": 28270 }, { "epoch": 0.31, "learning_rate": 4.478197803171229e-05, "loss": 0.7934, "step": 28275 }, { "epoch": 0.31, "learning_rate": 4.478105530457378e-05, "loss": 0.7588, "step": 28280 }, { "epoch": 0.31, "learning_rate": 4.4780132577435265e-05, "loss": 0.8015, "step": 28285 }, { "epoch": 0.31, "learning_rate": 4.477920985029675e-05, "loss": 0.7826, "step": 28290 }, { "epoch": 0.31, "learning_rate": 4.477828712315824e-05, "loss": 0.7963, "step": 28295 }, { "epoch": 0.31, "learning_rate": 4.477736439601973e-05, "loss": 0.7424, "step": 28300 }, { "epoch": 0.31, "learning_rate": 4.4776441668881216e-05, "loss": 0.737, "step": 28305 }, { "epoch": 0.31, "learning_rate": 4.47755189417427e-05, "loss": 0.7573, "step": 28310 }, { "epoch": 0.31, "learning_rate": 4.4774596214604185e-05, "loss": 0.7675, "step": 28315 }, { "epoch": 0.31, "learning_rate": 4.477367348746568e-05, "loss": 0.7515, "step": 28320 }, { "epoch": 0.31, "learning_rate": 4.477275076032717e-05, "loss": 0.8131, "step": 28325 }, { "epoch": 0.31, "learning_rate": 4.477182803318865e-05, "loss": 0.7982, "step": 28330 }, { "epoch": 0.31, "learning_rate": 4.4770905306050137e-05, "loss": 0.8193, "step": 28335 }, { "epoch": 0.31, "learning_rate": 4.476998257891163e-05, "loss": 0.7359, "step": 28340 }, { "epoch": 0.31, "learning_rate": 4.476905985177311e-05, "loss": 0.7517, "step": 28345 }, { "epoch": 0.31, "learning_rate": 4.47681371246346e-05, "loss": 0.8105, "step": 28350 }, { "epoch": 0.31, "learning_rate": 4.476721439749609e-05, "loss": 0.7658, "step": 28355 }, { "epoch": 0.31, "learning_rate": 4.476629167035758e-05, "loss": 0.8292, "step": 28360 }, { "epoch": 0.31, "learning_rate": 4.4765368943219064e-05, "loss": 0.7478, "step": 28365 }, { "epoch": 0.31, "learning_rate": 4.476444621608055e-05, "loss": 0.783, "step": 28370 }, { "epoch": 0.31, "learning_rate": 4.476352348894204e-05, "loss": 0.7298, "step": 28375 }, { "epoch": 0.31, "learning_rate": 4.476260076180353e-05, "loss": 0.8131, "step": 28380 }, { "epoch": 0.31, "learning_rate": 4.4761678034665015e-05, "loss": 0.7515, "step": 28385 }, { "epoch": 0.31, "learning_rate": 4.47607553075265e-05, "loss": 0.7885, "step": 28390 }, { "epoch": 0.31, "learning_rate": 4.475983258038799e-05, "loss": 0.774, "step": 28395 }, { "epoch": 0.31, "learning_rate": 4.475890985324948e-05, "loss": 0.7627, "step": 28400 }, { "epoch": 0.31, "learning_rate": 4.4757987126110966e-05, "loss": 0.729, "step": 28405 }, { "epoch": 0.31, "learning_rate": 4.4757064398972454e-05, "loss": 0.743, "step": 28410 }, { "epoch": 0.31, "learning_rate": 4.475614167183394e-05, "loss": 0.7591, "step": 28415 }, { "epoch": 0.31, "learning_rate": 4.475521894469542e-05, "loss": 0.8595, "step": 28420 }, { "epoch": 0.31, "learning_rate": 4.475429621755692e-05, "loss": 0.804, "step": 28425 }, { "epoch": 0.31, "learning_rate": 4.4753373490418406e-05, "loss": 0.7935, "step": 28430 }, { "epoch": 0.31, "learning_rate": 4.4752450763279893e-05, "loss": 0.7248, "step": 28435 }, { "epoch": 0.31, "learning_rate": 4.4751528036141375e-05, "loss": 0.7399, "step": 28440 }, { "epoch": 0.31, "learning_rate": 4.475060530900287e-05, "loss": 0.7975, "step": 28445 }, { "epoch": 0.32, "learning_rate": 4.474968258186436e-05, "loss": 0.7273, "step": 28450 }, { "epoch": 0.32, "learning_rate": 4.474875985472584e-05, "loss": 0.787, "step": 28455 }, { "epoch": 0.32, "learning_rate": 4.4747837127587326e-05, "loss": 0.8262, "step": 28460 }, { "epoch": 0.32, "learning_rate": 4.4746914400448814e-05, "loss": 0.7799, "step": 28465 }, { "epoch": 0.32, "learning_rate": 4.474599167331031e-05, "loss": 0.7855, "step": 28470 }, { "epoch": 0.32, "learning_rate": 4.474506894617179e-05, "loss": 0.8122, "step": 28475 }, { "epoch": 0.32, "learning_rate": 4.474414621903328e-05, "loss": 0.794, "step": 28480 }, { "epoch": 0.32, "learning_rate": 4.4743223491894765e-05, "loss": 0.8066, "step": 28485 }, { "epoch": 0.32, "learning_rate": 4.474230076475625e-05, "loss": 0.8603, "step": 28490 }, { "epoch": 0.32, "learning_rate": 4.474137803761774e-05, "loss": 0.7569, "step": 28495 }, { "epoch": 0.32, "learning_rate": 4.474045531047923e-05, "loss": 0.7888, "step": 28500 }, { "epoch": 0.32, "learning_rate": 4.4739532583340717e-05, "loss": 0.7651, "step": 28505 }, { "epoch": 0.32, "learning_rate": 4.4738609856202204e-05, "loss": 0.7959, "step": 28510 }, { "epoch": 0.32, "learning_rate": 4.473768712906369e-05, "loss": 0.7725, "step": 28515 }, { "epoch": 0.32, "learning_rate": 4.473676440192518e-05, "loss": 0.7956, "step": 28520 }, { "epoch": 0.32, "learning_rate": 4.473584167478667e-05, "loss": 0.771, "step": 28525 }, { "epoch": 0.32, "learning_rate": 4.4734918947648156e-05, "loss": 0.7484, "step": 28530 }, { "epoch": 0.32, "learning_rate": 4.4733996220509644e-05, "loss": 0.7907, "step": 28535 }, { "epoch": 0.32, "learning_rate": 4.473307349337113e-05, "loss": 0.7455, "step": 28540 }, { "epoch": 0.32, "learning_rate": 4.473215076623262e-05, "loss": 0.7203, "step": 28545 }, { "epoch": 0.32, "learning_rate": 4.47312280390941e-05, "loss": 0.7811, "step": 28550 }, { "epoch": 0.32, "learning_rate": 4.4730305311955595e-05, "loss": 0.916, "step": 28555 }, { "epoch": 0.32, "learning_rate": 4.472938258481708e-05, "loss": 0.8105, "step": 28560 }, { "epoch": 0.32, "learning_rate": 4.472845985767857e-05, "loss": 0.7498, "step": 28565 }, { "epoch": 0.32, "learning_rate": 4.472753713054005e-05, "loss": 0.8076, "step": 28570 }, { "epoch": 0.32, "learning_rate": 4.4726614403401546e-05, "loss": 0.8375, "step": 28575 }, { "epoch": 0.32, "learning_rate": 4.4725691676263034e-05, "loss": 0.7669, "step": 28580 }, { "epoch": 0.32, "learning_rate": 4.4724768949124515e-05, "loss": 0.8204, "step": 28585 }, { "epoch": 0.32, "learning_rate": 4.4723846221986e-05, "loss": 0.7655, "step": 28590 }, { "epoch": 0.32, "learning_rate": 4.47229234948475e-05, "loss": 0.8002, "step": 28595 }, { "epoch": 0.32, "learning_rate": 4.4722000767708986e-05, "loss": 0.808, "step": 28600 }, { "epoch": 0.32, "learning_rate": 4.472107804057047e-05, "loss": 0.7735, "step": 28605 }, { "epoch": 0.32, "learning_rate": 4.4720155313431955e-05, "loss": 0.8178, "step": 28610 }, { "epoch": 0.32, "learning_rate": 4.471923258629344e-05, "loss": 0.7314, "step": 28615 }, { "epoch": 0.32, "learning_rate": 4.471830985915493e-05, "loss": 0.7682, "step": 28620 }, { "epoch": 0.32, "learning_rate": 4.471738713201642e-05, "loss": 0.7496, "step": 28625 }, { "epoch": 0.32, "learning_rate": 4.4716464404877906e-05, "loss": 0.8168, "step": 28630 }, { "epoch": 0.32, "learning_rate": 4.4715541677739394e-05, "loss": 0.8064, "step": 28635 }, { "epoch": 0.32, "learning_rate": 4.471461895060088e-05, "loss": 0.7858, "step": 28640 }, { "epoch": 0.32, "learning_rate": 4.471369622346237e-05, "loss": 0.7504, "step": 28645 }, { "epoch": 0.32, "learning_rate": 4.471277349632386e-05, "loss": 0.7477, "step": 28650 }, { "epoch": 0.32, "learning_rate": 4.4711850769185345e-05, "loss": 0.8238, "step": 28655 }, { "epoch": 0.32, "learning_rate": 4.471092804204683e-05, "loss": 0.8394, "step": 28660 }, { "epoch": 0.32, "learning_rate": 4.471000531490832e-05, "loss": 0.8456, "step": 28665 }, { "epoch": 0.32, "learning_rate": 4.470908258776981e-05, "loss": 0.8155, "step": 28670 }, { "epoch": 0.32, "learning_rate": 4.47081598606313e-05, "loss": 0.6923, "step": 28675 }, { "epoch": 0.32, "learning_rate": 4.4707237133492785e-05, "loss": 0.7571, "step": 28680 }, { "epoch": 0.32, "learning_rate": 4.470631440635427e-05, "loss": 0.7437, "step": 28685 }, { "epoch": 0.32, "learning_rate": 4.470539167921576e-05, "loss": 0.7502, "step": 28690 }, { "epoch": 0.32, "learning_rate": 4.470446895207724e-05, "loss": 0.7966, "step": 28695 }, { "epoch": 0.32, "learning_rate": 4.470354622493873e-05, "loss": 0.7859, "step": 28700 }, { "epoch": 0.32, "learning_rate": 4.4702623497800224e-05, "loss": 0.7465, "step": 28705 }, { "epoch": 0.32, "learning_rate": 4.470170077066171e-05, "loss": 0.7834, "step": 28710 }, { "epoch": 0.32, "learning_rate": 4.470077804352319e-05, "loss": 0.75, "step": 28715 }, { "epoch": 0.32, "learning_rate": 4.469985531638468e-05, "loss": 0.6904, "step": 28720 }, { "epoch": 0.32, "learning_rate": 4.4698932589246175e-05, "loss": 0.7656, "step": 28725 }, { "epoch": 0.32, "learning_rate": 4.4698009862107656e-05, "loss": 0.7263, "step": 28730 }, { "epoch": 0.32, "learning_rate": 4.4697087134969144e-05, "loss": 0.8198, "step": 28735 }, { "epoch": 0.32, "learning_rate": 4.469616440783063e-05, "loss": 0.7586, "step": 28740 }, { "epoch": 0.32, "learning_rate": 4.4695241680692127e-05, "loss": 0.8186, "step": 28745 }, { "epoch": 0.32, "learning_rate": 4.469431895355361e-05, "loss": 0.7755, "step": 28750 }, { "epoch": 0.32, "learning_rate": 4.4693396226415095e-05, "loss": 0.745, "step": 28755 }, { "epoch": 0.32, "learning_rate": 4.469247349927658e-05, "loss": 0.8349, "step": 28760 }, { "epoch": 0.32, "learning_rate": 4.469155077213807e-05, "loss": 0.8301, "step": 28765 }, { "epoch": 0.32, "learning_rate": 4.469062804499956e-05, "loss": 0.7704, "step": 28770 }, { "epoch": 0.32, "learning_rate": 4.468970531786105e-05, "loss": 0.7627, "step": 28775 }, { "epoch": 0.32, "learning_rate": 4.4688782590722535e-05, "loss": 0.8104, "step": 28780 }, { "epoch": 0.32, "learning_rate": 4.468785986358402e-05, "loss": 0.7672, "step": 28785 }, { "epoch": 0.32, "learning_rate": 4.468693713644551e-05, "loss": 0.7827, "step": 28790 }, { "epoch": 0.32, "learning_rate": 4.4686014409307e-05, "loss": 0.7748, "step": 28795 }, { "epoch": 0.32, "learning_rate": 4.4685091682168486e-05, "loss": 0.8317, "step": 28800 }, { "epoch": 0.32, "learning_rate": 4.468416895502997e-05, "loss": 0.7412, "step": 28805 }, { "epoch": 0.32, "learning_rate": 4.468324622789146e-05, "loss": 0.802, "step": 28810 }, { "epoch": 0.32, "learning_rate": 4.468232350075295e-05, "loss": 0.7652, "step": 28815 }, { "epoch": 0.32, "learning_rate": 4.468140077361444e-05, "loss": 0.7998, "step": 28820 }, { "epoch": 0.32, "learning_rate": 4.468047804647592e-05, "loss": 0.8208, "step": 28825 }, { "epoch": 0.32, "learning_rate": 4.467955531933741e-05, "loss": 0.8179, "step": 28830 }, { "epoch": 0.32, "learning_rate": 4.46786325921989e-05, "loss": 0.8294, "step": 28835 }, { "epoch": 0.32, "learning_rate": 4.467770986506038e-05, "loss": 0.8085, "step": 28840 }, { "epoch": 0.32, "learning_rate": 4.467678713792187e-05, "loss": 0.7992, "step": 28845 }, { "epoch": 0.32, "learning_rate": 4.467586441078336e-05, "loss": 0.7442, "step": 28850 }, { "epoch": 0.32, "learning_rate": 4.467494168364485e-05, "loss": 0.7183, "step": 28855 }, { "epoch": 0.32, "learning_rate": 4.4674018956506334e-05, "loss": 0.7496, "step": 28860 }, { "epoch": 0.32, "learning_rate": 4.467309622936782e-05, "loss": 0.8113, "step": 28865 }, { "epoch": 0.32, "learning_rate": 4.467217350222931e-05, "loss": 0.7987, "step": 28870 }, { "epoch": 0.32, "learning_rate": 4.4671250775090804e-05, "loss": 0.7433, "step": 28875 }, { "epoch": 0.32, "learning_rate": 4.4670328047952285e-05, "loss": 0.7388, "step": 28880 }, { "epoch": 0.32, "learning_rate": 4.466940532081377e-05, "loss": 0.7046, "step": 28885 }, { "epoch": 0.32, "learning_rate": 4.466848259367526e-05, "loss": 0.7817, "step": 28890 }, { "epoch": 0.32, "learning_rate": 4.466755986653675e-05, "loss": 0.7934, "step": 28895 }, { "epoch": 0.32, "learning_rate": 4.4666637139398236e-05, "loss": 0.8475, "step": 28900 }, { "epoch": 0.32, "learning_rate": 4.4665714412259724e-05, "loss": 0.8126, "step": 28905 }, { "epoch": 0.32, "learning_rate": 4.466479168512121e-05, "loss": 0.7906, "step": 28910 }, { "epoch": 0.32, "learning_rate": 4.46638689579827e-05, "loss": 0.8185, "step": 28915 }, { "epoch": 0.32, "learning_rate": 4.466294623084419e-05, "loss": 0.8069, "step": 28920 }, { "epoch": 0.32, "learning_rate": 4.4662023503705676e-05, "loss": 0.797, "step": 28925 }, { "epoch": 0.32, "learning_rate": 4.4661100776567163e-05, "loss": 0.8106, "step": 28930 }, { "epoch": 0.32, "learning_rate": 4.4660178049428644e-05, "loss": 0.8214, "step": 28935 }, { "epoch": 0.32, "learning_rate": 4.465925532229014e-05, "loss": 0.8072, "step": 28940 }, { "epoch": 0.32, "learning_rate": 4.465833259515163e-05, "loss": 0.768, "step": 28945 }, { "epoch": 0.32, "learning_rate": 4.4657409868013115e-05, "loss": 0.7319, "step": 28950 }, { "epoch": 0.32, "learning_rate": 4.4656487140874596e-05, "loss": 0.8081, "step": 28955 }, { "epoch": 0.32, "learning_rate": 4.465556441373609e-05, "loss": 0.7971, "step": 28960 }, { "epoch": 0.32, "learning_rate": 4.465464168659758e-05, "loss": 0.7484, "step": 28965 }, { "epoch": 0.32, "learning_rate": 4.465371895945906e-05, "loss": 0.7233, "step": 28970 }, { "epoch": 0.32, "learning_rate": 4.465279623232055e-05, "loss": 0.8018, "step": 28975 }, { "epoch": 0.32, "learning_rate": 4.465187350518204e-05, "loss": 0.7466, "step": 28980 }, { "epoch": 0.32, "learning_rate": 4.465095077804353e-05, "loss": 0.7464, "step": 28985 }, { "epoch": 0.32, "learning_rate": 4.465002805090501e-05, "loss": 0.7711, "step": 28990 }, { "epoch": 0.32, "learning_rate": 4.46491053237665e-05, "loss": 0.779, "step": 28995 }, { "epoch": 0.32, "learning_rate": 4.4648182596627987e-05, "loss": 0.7962, "step": 29000 }, { "epoch": 0.32, "eval_loss": 0.735903799533844, "eval_runtime": 69.3295, "eval_samples_per_second": 28.848, "eval_steps_per_second": 14.424, "step": 29000 }, { "epoch": 0.32, "learning_rate": 4.4647259869489474e-05, "loss": 0.7701, "step": 29005 }, { "epoch": 0.32, "learning_rate": 4.464633714235096e-05, "loss": 0.7857, "step": 29010 }, { "epoch": 0.32, "learning_rate": 4.464541441521245e-05, "loss": 0.8017, "step": 29015 }, { "epoch": 0.32, "learning_rate": 4.464449168807394e-05, "loss": 0.7904, "step": 29020 }, { "epoch": 0.32, "learning_rate": 4.4643568960935426e-05, "loss": 0.8276, "step": 29025 }, { "epoch": 0.32, "learning_rate": 4.4642646233796914e-05, "loss": 0.7853, "step": 29030 }, { "epoch": 0.32, "learning_rate": 4.46417235066584e-05, "loss": 0.8213, "step": 29035 }, { "epoch": 0.32, "learning_rate": 4.464080077951989e-05, "loss": 0.7843, "step": 29040 }, { "epoch": 0.32, "learning_rate": 4.463987805238138e-05, "loss": 0.8006, "step": 29045 }, { "epoch": 0.32, "learning_rate": 4.4638955325242865e-05, "loss": 0.766, "step": 29050 }, { "epoch": 0.32, "learning_rate": 4.463803259810435e-05, "loss": 0.7097, "step": 29055 }, { "epoch": 0.32, "learning_rate": 4.463710987096584e-05, "loss": 0.7766, "step": 29060 }, { "epoch": 0.32, "learning_rate": 4.463618714382733e-05, "loss": 0.7232, "step": 29065 }, { "epoch": 0.32, "learning_rate": 4.4635264416688816e-05, "loss": 0.7876, "step": 29070 }, { "epoch": 0.32, "learning_rate": 4.4634341689550304e-05, "loss": 0.8066, "step": 29075 }, { "epoch": 0.32, "learning_rate": 4.4633418962411785e-05, "loss": 0.7301, "step": 29080 }, { "epoch": 0.32, "learning_rate": 4.463249623527327e-05, "loss": 0.8424, "step": 29085 }, { "epoch": 0.32, "learning_rate": 4.463157350813477e-05, "loss": 0.7407, "step": 29090 }, { "epoch": 0.32, "learning_rate": 4.4630650780996256e-05, "loss": 0.7283, "step": 29095 }, { "epoch": 0.32, "learning_rate": 4.462972805385774e-05, "loss": 0.7804, "step": 29100 }, { "epoch": 0.32, "learning_rate": 4.4628805326719225e-05, "loss": 0.6993, "step": 29105 }, { "epoch": 0.32, "learning_rate": 4.462788259958072e-05, "loss": 0.7859, "step": 29110 }, { "epoch": 0.32, "learning_rate": 4.46269598724422e-05, "loss": 0.8133, "step": 29115 }, { "epoch": 0.32, "learning_rate": 4.462603714530369e-05, "loss": 0.775, "step": 29120 }, { "epoch": 0.32, "learning_rate": 4.4625114418165176e-05, "loss": 0.8645, "step": 29125 }, { "epoch": 0.32, "learning_rate": 4.462419169102667e-05, "loss": 0.7497, "step": 29130 }, { "epoch": 0.32, "learning_rate": 4.462326896388815e-05, "loss": 0.7664, "step": 29135 }, { "epoch": 0.32, "learning_rate": 4.462234623674964e-05, "loss": 0.743, "step": 29140 }, { "epoch": 0.32, "learning_rate": 4.462142350961113e-05, "loss": 0.8447, "step": 29145 }, { "epoch": 0.32, "learning_rate": 4.4620500782472615e-05, "loss": 0.7718, "step": 29150 }, { "epoch": 0.32, "learning_rate": 4.46195780553341e-05, "loss": 0.7345, "step": 29155 }, { "epoch": 0.32, "learning_rate": 4.461865532819559e-05, "loss": 0.7581, "step": 29160 }, { "epoch": 0.32, "learning_rate": 4.461773260105708e-05, "loss": 0.8235, "step": 29165 }, { "epoch": 0.32, "learning_rate": 4.4616809873918567e-05, "loss": 0.7085, "step": 29170 }, { "epoch": 0.32, "learning_rate": 4.4615887146780054e-05, "loss": 0.7511, "step": 29175 }, { "epoch": 0.32, "learning_rate": 4.461496441964154e-05, "loss": 0.8098, "step": 29180 }, { "epoch": 0.32, "learning_rate": 4.461404169250303e-05, "loss": 0.8286, "step": 29185 }, { "epoch": 0.32, "learning_rate": 4.461311896536451e-05, "loss": 0.816, "step": 29190 }, { "epoch": 0.32, "learning_rate": 4.4612196238226006e-05, "loss": 0.798, "step": 29195 }, { "epoch": 0.32, "learning_rate": 4.4611273511087494e-05, "loss": 0.7997, "step": 29200 }, { "epoch": 0.32, "learning_rate": 4.461035078394898e-05, "loss": 0.8359, "step": 29205 }, { "epoch": 0.32, "learning_rate": 4.460942805681046e-05, "loss": 0.8126, "step": 29210 }, { "epoch": 0.32, "learning_rate": 4.460850532967196e-05, "loss": 0.7089, "step": 29215 }, { "epoch": 0.32, "learning_rate": 4.4607582602533445e-05, "loss": 0.8234, "step": 29220 }, { "epoch": 0.32, "learning_rate": 4.4606659875394926e-05, "loss": 0.8831, "step": 29225 }, { "epoch": 0.32, "learning_rate": 4.4605737148256414e-05, "loss": 0.7392, "step": 29230 }, { "epoch": 0.32, "learning_rate": 4.46048144211179e-05, "loss": 0.7965, "step": 29235 }, { "epoch": 0.32, "learning_rate": 4.4603891693979396e-05, "loss": 0.8336, "step": 29240 }, { "epoch": 0.32, "learning_rate": 4.460296896684088e-05, "loss": 0.8179, "step": 29245 }, { "epoch": 0.32, "learning_rate": 4.4602046239702365e-05, "loss": 0.7388, "step": 29250 }, { "epoch": 0.32, "learning_rate": 4.460112351256385e-05, "loss": 0.8222, "step": 29255 }, { "epoch": 0.32, "learning_rate": 4.460020078542535e-05, "loss": 0.759, "step": 29260 }, { "epoch": 0.32, "learning_rate": 4.459927805828683e-05, "loss": 0.7505, "step": 29265 }, { "epoch": 0.32, "learning_rate": 4.459835533114832e-05, "loss": 0.7809, "step": 29270 }, { "epoch": 0.32, "learning_rate": 4.4597432604009805e-05, "loss": 0.7925, "step": 29275 }, { "epoch": 0.32, "learning_rate": 4.459650987687129e-05, "loss": 0.7597, "step": 29280 }, { "epoch": 0.32, "learning_rate": 4.459558714973278e-05, "loss": 0.7939, "step": 29285 }, { "epoch": 0.32, "learning_rate": 4.459466442259427e-05, "loss": 0.7675, "step": 29290 }, { "epoch": 0.32, "learning_rate": 4.4593741695455756e-05, "loss": 0.7864, "step": 29295 }, { "epoch": 0.32, "learning_rate": 4.459281896831724e-05, "loss": 0.7919, "step": 29300 }, { "epoch": 0.32, "learning_rate": 4.459189624117873e-05, "loss": 0.7929, "step": 29305 }, { "epoch": 0.32, "learning_rate": 4.459097351404022e-05, "loss": 0.7984, "step": 29310 }, { "epoch": 0.32, "learning_rate": 4.459005078690171e-05, "loss": 0.831, "step": 29315 }, { "epoch": 0.32, "learning_rate": 4.458912805976319e-05, "loss": 0.7361, "step": 29320 }, { "epoch": 0.32, "learning_rate": 4.458820533262468e-05, "loss": 0.7443, "step": 29325 }, { "epoch": 0.32, "learning_rate": 4.458728260548617e-05, "loss": 0.7567, "step": 29330 }, { "epoch": 0.32, "learning_rate": 4.458635987834766e-05, "loss": 0.7562, "step": 29335 }, { "epoch": 0.32, "learning_rate": 4.458543715120914e-05, "loss": 0.8223, "step": 29340 }, { "epoch": 0.32, "learning_rate": 4.4584514424070635e-05, "loss": 0.7603, "step": 29345 }, { "epoch": 0.32, "learning_rate": 4.458359169693212e-05, "loss": 0.7813, "step": 29350 }, { "epoch": 0.33, "learning_rate": 4.4582668969793603e-05, "loss": 0.7856, "step": 29355 }, { "epoch": 0.33, "learning_rate": 4.458174624265509e-05, "loss": 0.7409, "step": 29360 }, { "epoch": 0.33, "learning_rate": 4.4580823515516586e-05, "loss": 0.8391, "step": 29365 }, { "epoch": 0.33, "learning_rate": 4.4579900788378074e-05, "loss": 0.7598, "step": 29370 }, { "epoch": 0.33, "learning_rate": 4.4578978061239555e-05, "loss": 0.744, "step": 29375 }, { "epoch": 0.33, "learning_rate": 4.457805533410104e-05, "loss": 0.7283, "step": 29380 }, { "epoch": 0.33, "learning_rate": 4.457713260696253e-05, "loss": 0.8284, "step": 29385 }, { "epoch": 0.33, "learning_rate": 4.457620987982402e-05, "loss": 0.7635, "step": 29390 }, { "epoch": 0.33, "learning_rate": 4.4575287152685506e-05, "loss": 0.7842, "step": 29395 }, { "epoch": 0.33, "learning_rate": 4.4574364425546994e-05, "loss": 0.7632, "step": 29400 }, { "epoch": 0.33, "learning_rate": 4.457344169840848e-05, "loss": 0.8188, "step": 29405 }, { "epoch": 0.33, "learning_rate": 4.457251897126997e-05, "loss": 0.789, "step": 29410 }, { "epoch": 0.33, "learning_rate": 4.457159624413146e-05, "loss": 0.8118, "step": 29415 }, { "epoch": 0.33, "learning_rate": 4.4570673516992945e-05, "loss": 0.7899, "step": 29420 }, { "epoch": 0.33, "learning_rate": 4.456975078985443e-05, "loss": 0.7878, "step": 29425 }, { "epoch": 0.33, "learning_rate": 4.456882806271592e-05, "loss": 0.8009, "step": 29430 }, { "epoch": 0.33, "learning_rate": 4.456790533557741e-05, "loss": 0.8018, "step": 29435 }, { "epoch": 0.33, "learning_rate": 4.45669826084389e-05, "loss": 0.7647, "step": 29440 }, { "epoch": 0.33, "learning_rate": 4.4566059881300385e-05, "loss": 0.7655, "step": 29445 }, { "epoch": 0.33, "learning_rate": 4.4565137154161866e-05, "loss": 0.7602, "step": 29450 }, { "epoch": 0.33, "learning_rate": 4.456421442702336e-05, "loss": 0.8204, "step": 29455 }, { "epoch": 0.33, "learning_rate": 4.456329169988485e-05, "loss": 0.7361, "step": 29460 }, { "epoch": 0.33, "learning_rate": 4.456236897274633e-05, "loss": 0.7951, "step": 29465 }, { "epoch": 0.33, "learning_rate": 4.456144624560782e-05, "loss": 0.808, "step": 29470 }, { "epoch": 0.33, "learning_rate": 4.456052351846931e-05, "loss": 0.7568, "step": 29475 }, { "epoch": 0.33, "learning_rate": 4.45596007913308e-05, "loss": 0.7703, "step": 29480 }, { "epoch": 0.33, "learning_rate": 4.455867806419228e-05, "loss": 0.7683, "step": 29485 }, { "epoch": 0.33, "learning_rate": 4.455775533705377e-05, "loss": 0.782, "step": 29490 }, { "epoch": 0.33, "learning_rate": 4.455683260991526e-05, "loss": 0.8018, "step": 29495 }, { "epoch": 0.33, "learning_rate": 4.4555909882776744e-05, "loss": 0.846, "step": 29500 }, { "epoch": 0.33, "learning_rate": 4.455498715563823e-05, "loss": 0.7688, "step": 29505 }, { "epoch": 0.33, "learning_rate": 4.455406442849972e-05, "loss": 0.7804, "step": 29510 }, { "epoch": 0.33, "learning_rate": 4.4553141701361215e-05, "loss": 0.8144, "step": 29515 }, { "epoch": 0.33, "learning_rate": 4.4552218974222696e-05, "loss": 0.8344, "step": 29520 }, { "epoch": 0.33, "learning_rate": 4.4551296247084184e-05, "loss": 0.7611, "step": 29525 }, { "epoch": 0.33, "learning_rate": 4.455037351994567e-05, "loss": 0.7245, "step": 29530 }, { "epoch": 0.33, "learning_rate": 4.454945079280716e-05, "loss": 0.7625, "step": 29535 }, { "epoch": 0.33, "learning_rate": 4.454852806566865e-05, "loss": 0.7786, "step": 29540 }, { "epoch": 0.33, "learning_rate": 4.4547605338530135e-05, "loss": 0.7464, "step": 29545 }, { "epoch": 0.33, "learning_rate": 4.454668261139162e-05, "loss": 0.8698, "step": 29550 }, { "epoch": 0.33, "learning_rate": 4.454575988425311e-05, "loss": 0.8171, "step": 29555 }, { "epoch": 0.33, "learning_rate": 4.45448371571146e-05, "loss": 0.8373, "step": 29560 }, { "epoch": 0.33, "learning_rate": 4.4543914429976086e-05, "loss": 0.7604, "step": 29565 }, { "epoch": 0.33, "learning_rate": 4.4542991702837574e-05, "loss": 0.8215, "step": 29570 }, { "epoch": 0.33, "learning_rate": 4.4542068975699055e-05, "loss": 0.7563, "step": 29575 }, { "epoch": 0.33, "learning_rate": 4.454114624856055e-05, "loss": 0.78, "step": 29580 }, { "epoch": 0.33, "learning_rate": 4.454022352142204e-05, "loss": 0.79, "step": 29585 }, { "epoch": 0.33, "learning_rate": 4.4539300794283526e-05, "loss": 0.7696, "step": 29590 }, { "epoch": 0.33, "learning_rate": 4.453837806714501e-05, "loss": 0.7815, "step": 29595 }, { "epoch": 0.33, "learning_rate": 4.45374553400065e-05, "loss": 0.8074, "step": 29600 }, { "epoch": 0.33, "learning_rate": 4.453653261286799e-05, "loss": 0.7484, "step": 29605 }, { "epoch": 0.33, "learning_rate": 4.453560988572947e-05, "loss": 0.8008, "step": 29610 }, { "epoch": 0.33, "learning_rate": 4.453468715859096e-05, "loss": 0.7773, "step": 29615 }, { "epoch": 0.33, "learning_rate": 4.4533764431452446e-05, "loss": 0.8131, "step": 29620 }, { "epoch": 0.33, "learning_rate": 4.453284170431394e-05, "loss": 0.7398, "step": 29625 }, { "epoch": 0.33, "learning_rate": 4.453191897717542e-05, "loss": 0.808, "step": 29630 }, { "epoch": 0.33, "learning_rate": 4.453099625003691e-05, "loss": 0.7926, "step": 29635 }, { "epoch": 0.33, "learning_rate": 4.45300735228984e-05, "loss": 0.8389, "step": 29640 }, { "epoch": 0.33, "learning_rate": 4.452915079575989e-05, "loss": 0.7702, "step": 29645 }, { "epoch": 0.33, "learning_rate": 4.452822806862137e-05, "loss": 0.8011, "step": 29650 }, { "epoch": 0.33, "learning_rate": 4.452730534148286e-05, "loss": 0.8118, "step": 29655 }, { "epoch": 0.33, "learning_rate": 4.452638261434435e-05, "loss": 0.7461, "step": 29660 }, { "epoch": 0.33, "learning_rate": 4.4525459887205836e-05, "loss": 0.7617, "step": 29665 }, { "epoch": 0.33, "learning_rate": 4.4524537160067324e-05, "loss": 0.7666, "step": 29670 }, { "epoch": 0.33, "learning_rate": 4.452361443292881e-05, "loss": 0.9429, "step": 29675 }, { "epoch": 0.33, "learning_rate": 4.45226917057903e-05, "loss": 0.7764, "step": 29680 }, { "epoch": 0.33, "learning_rate": 4.452176897865178e-05, "loss": 0.7181, "step": 29685 }, { "epoch": 0.33, "learning_rate": 4.4520846251513276e-05, "loss": 0.767, "step": 29690 }, { "epoch": 0.33, "learning_rate": 4.4519923524374764e-05, "loss": 0.7768, "step": 29695 }, { "epoch": 0.33, "learning_rate": 4.451900079723625e-05, "loss": 0.8037, "step": 29700 }, { "epoch": 0.33, "learning_rate": 4.451807807009773e-05, "loss": 0.7442, "step": 29705 }, { "epoch": 0.33, "learning_rate": 4.451715534295923e-05, "loss": 0.7215, "step": 29710 }, { "epoch": 0.33, "learning_rate": 4.4516232615820715e-05, "loss": 0.7827, "step": 29715 }, { "epoch": 0.33, "learning_rate": 4.45153098886822e-05, "loss": 0.6861, "step": 29720 }, { "epoch": 0.33, "learning_rate": 4.4514387161543684e-05, "loss": 0.7837, "step": 29725 }, { "epoch": 0.33, "learning_rate": 4.451346443440518e-05, "loss": 0.7979, "step": 29730 }, { "epoch": 0.33, "learning_rate": 4.4512541707266666e-05, "loss": 0.763, "step": 29735 }, { "epoch": 0.33, "learning_rate": 4.451161898012815e-05, "loss": 0.7198, "step": 29740 }, { "epoch": 0.33, "learning_rate": 4.4510696252989635e-05, "loss": 0.7735, "step": 29745 }, { "epoch": 0.33, "learning_rate": 4.450977352585113e-05, "loss": 0.8059, "step": 29750 }, { "epoch": 0.33, "learning_rate": 4.450885079871262e-05, "loss": 0.7655, "step": 29755 }, { "epoch": 0.33, "learning_rate": 4.45079280715741e-05, "loss": 0.7986, "step": 29760 }, { "epoch": 0.33, "learning_rate": 4.450700534443559e-05, "loss": 0.7935, "step": 29765 }, { "epoch": 0.33, "learning_rate": 4.4506082617297075e-05, "loss": 0.738, "step": 29770 }, { "epoch": 0.33, "learning_rate": 4.450515989015856e-05, "loss": 0.7304, "step": 29775 }, { "epoch": 0.33, "learning_rate": 4.450423716302005e-05, "loss": 0.7551, "step": 29780 }, { "epoch": 0.33, "learning_rate": 4.450331443588154e-05, "loss": 0.7284, "step": 29785 }, { "epoch": 0.33, "learning_rate": 4.4502391708743026e-05, "loss": 0.7737, "step": 29790 }, { "epoch": 0.33, "learning_rate": 4.4501468981604514e-05, "loss": 0.8273, "step": 29795 }, { "epoch": 0.33, "learning_rate": 4.4500546254466e-05, "loss": 0.7901, "step": 29800 }, { "epoch": 0.33, "learning_rate": 4.449962352732749e-05, "loss": 0.8324, "step": 29805 }, { "epoch": 0.33, "learning_rate": 4.449870080018898e-05, "loss": 0.7792, "step": 29810 }, { "epoch": 0.33, "learning_rate": 4.4497778073050465e-05, "loss": 0.7706, "step": 29815 }, { "epoch": 0.33, "learning_rate": 4.449685534591195e-05, "loss": 0.7537, "step": 29820 }, { "epoch": 0.33, "learning_rate": 4.449593261877344e-05, "loss": 0.7894, "step": 29825 }, { "epoch": 0.33, "learning_rate": 4.449500989163493e-05, "loss": 0.7402, "step": 29830 }, { "epoch": 0.33, "learning_rate": 4.449408716449641e-05, "loss": 0.832, "step": 29835 }, { "epoch": 0.33, "learning_rate": 4.4493164437357904e-05, "loss": 0.745, "step": 29840 }, { "epoch": 0.33, "learning_rate": 4.449224171021939e-05, "loss": 0.7306, "step": 29845 }, { "epoch": 0.33, "learning_rate": 4.449131898308087e-05, "loss": 0.7836, "step": 29850 }, { "epoch": 0.33, "learning_rate": 4.449039625594236e-05, "loss": 0.7084, "step": 29855 }, { "epoch": 0.33, "learning_rate": 4.4489473528803856e-05, "loss": 0.7753, "step": 29860 }, { "epoch": 0.33, "learning_rate": 4.4488550801665344e-05, "loss": 0.7198, "step": 29865 }, { "epoch": 0.33, "learning_rate": 4.4487628074526825e-05, "loss": 0.8057, "step": 29870 }, { "epoch": 0.33, "learning_rate": 4.448670534738831e-05, "loss": 0.7718, "step": 29875 }, { "epoch": 0.33, "learning_rate": 4.448578262024981e-05, "loss": 0.7442, "step": 29880 }, { "epoch": 0.33, "learning_rate": 4.448485989311129e-05, "loss": 0.8117, "step": 29885 }, { "epoch": 0.33, "learning_rate": 4.4483937165972776e-05, "loss": 0.8052, "step": 29890 }, { "epoch": 0.33, "learning_rate": 4.4483014438834264e-05, "loss": 0.6923, "step": 29895 }, { "epoch": 0.33, "learning_rate": 4.448209171169576e-05, "loss": 0.7647, "step": 29900 }, { "epoch": 0.33, "learning_rate": 4.448116898455724e-05, "loss": 0.7806, "step": 29905 }, { "epoch": 0.33, "learning_rate": 4.448024625741873e-05, "loss": 0.8033, "step": 29910 }, { "epoch": 0.33, "learning_rate": 4.4479323530280215e-05, "loss": 0.7513, "step": 29915 }, { "epoch": 0.33, "learning_rate": 4.44784008031417e-05, "loss": 0.7577, "step": 29920 }, { "epoch": 0.33, "learning_rate": 4.447747807600319e-05, "loss": 0.7627, "step": 29925 }, { "epoch": 0.33, "learning_rate": 4.447655534886468e-05, "loss": 0.6734, "step": 29930 }, { "epoch": 0.33, "learning_rate": 4.447563262172617e-05, "loss": 0.7196, "step": 29935 }, { "epoch": 0.33, "learning_rate": 4.4474709894587655e-05, "loss": 0.7968, "step": 29940 }, { "epoch": 0.33, "learning_rate": 4.447378716744914e-05, "loss": 0.7703, "step": 29945 }, { "epoch": 0.33, "learning_rate": 4.447286444031063e-05, "loss": 0.7832, "step": 29950 }, { "epoch": 0.33, "learning_rate": 4.447194171317212e-05, "loss": 0.8281, "step": 29955 }, { "epoch": 0.33, "learning_rate": 4.44710189860336e-05, "loss": 0.773, "step": 29960 }, { "epoch": 0.33, "learning_rate": 4.4470096258895094e-05, "loss": 0.83, "step": 29965 }, { "epoch": 0.33, "learning_rate": 4.446917353175658e-05, "loss": 0.7658, "step": 29970 }, { "epoch": 0.33, "learning_rate": 4.446825080461807e-05, "loss": 0.7575, "step": 29975 }, { "epoch": 0.33, "learning_rate": 4.446732807747955e-05, "loss": 0.8105, "step": 29980 }, { "epoch": 0.33, "learning_rate": 4.446640535034104e-05, "loss": 0.7866, "step": 29985 }, { "epoch": 0.33, "learning_rate": 4.446548262320253e-05, "loss": 0.7766, "step": 29990 }, { "epoch": 0.33, "learning_rate": 4.4464559896064014e-05, "loss": 0.7908, "step": 29995 }, { "epoch": 0.33, "learning_rate": 4.44636371689255e-05, "loss": 0.8048, "step": 30000 }, { "epoch": 0.33, "eval_loss": 0.7409001588821411, "eval_runtime": 69.2416, "eval_samples_per_second": 28.884, "eval_steps_per_second": 14.442, "step": 30000 }, { "epoch": 0.33, "learning_rate": 4.446271444178699e-05, "loss": 0.7543, "step": 30005 }, { "epoch": 0.33, "learning_rate": 4.4461791714648485e-05, "loss": 0.8328, "step": 30010 }, { "epoch": 0.33, "learning_rate": 4.4460868987509966e-05, "loss": 0.7727, "step": 30015 }, { "epoch": 0.33, "learning_rate": 4.4459946260371453e-05, "loss": 0.829, "step": 30020 }, { "epoch": 0.33, "learning_rate": 4.445902353323294e-05, "loss": 0.7555, "step": 30025 }, { "epoch": 0.33, "learning_rate": 4.4458100806094436e-05, "loss": 0.8533, "step": 30030 }, { "epoch": 0.33, "learning_rate": 4.445717807895592e-05, "loss": 0.7865, "step": 30035 }, { "epoch": 0.33, "learning_rate": 4.4456255351817405e-05, "loss": 0.7875, "step": 30040 }, { "epoch": 0.33, "learning_rate": 4.445533262467889e-05, "loss": 0.7166, "step": 30045 }, { "epoch": 0.33, "learning_rate": 4.445440989754038e-05, "loss": 0.76, "step": 30050 }, { "epoch": 0.33, "learning_rate": 4.445348717040187e-05, "loss": 0.7845, "step": 30055 }, { "epoch": 0.33, "learning_rate": 4.4452564443263356e-05, "loss": 0.7342, "step": 30060 }, { "epoch": 0.33, "learning_rate": 4.4451641716124844e-05, "loss": 0.7389, "step": 30065 }, { "epoch": 0.33, "learning_rate": 4.4450718988986325e-05, "loss": 0.7662, "step": 30070 }, { "epoch": 0.33, "learning_rate": 4.444979626184782e-05, "loss": 0.7456, "step": 30075 }, { "epoch": 0.33, "learning_rate": 4.444887353470931e-05, "loss": 0.8015, "step": 30080 }, { "epoch": 0.33, "learning_rate": 4.4447950807570795e-05, "loss": 0.7117, "step": 30085 }, { "epoch": 0.33, "learning_rate": 4.4447028080432277e-05, "loss": 0.7861, "step": 30090 }, { "epoch": 0.33, "learning_rate": 4.444610535329377e-05, "loss": 0.7698, "step": 30095 }, { "epoch": 0.33, "learning_rate": 4.444518262615526e-05, "loss": 0.7654, "step": 30100 }, { "epoch": 0.33, "learning_rate": 4.444425989901675e-05, "loss": 0.7344, "step": 30105 }, { "epoch": 0.33, "learning_rate": 4.444333717187823e-05, "loss": 0.8331, "step": 30110 }, { "epoch": 0.33, "learning_rate": 4.444241444473972e-05, "loss": 0.7495, "step": 30115 }, { "epoch": 0.33, "learning_rate": 4.444149171760121e-05, "loss": 0.7423, "step": 30120 }, { "epoch": 0.33, "learning_rate": 4.444056899046269e-05, "loss": 0.8212, "step": 30125 }, { "epoch": 0.33, "learning_rate": 4.443964626332418e-05, "loss": 0.8031, "step": 30130 }, { "epoch": 0.33, "learning_rate": 4.443872353618567e-05, "loss": 0.7925, "step": 30135 }, { "epoch": 0.33, "learning_rate": 4.443780080904716e-05, "loss": 0.8024, "step": 30140 }, { "epoch": 0.33, "learning_rate": 4.443687808190864e-05, "loss": 0.8142, "step": 30145 }, { "epoch": 0.33, "learning_rate": 4.443595535477013e-05, "loss": 0.899, "step": 30150 }, { "epoch": 0.33, "learning_rate": 4.443503262763162e-05, "loss": 0.7415, "step": 30155 }, { "epoch": 0.33, "learning_rate": 4.4434109900493106e-05, "loss": 0.7655, "step": 30160 }, { "epoch": 0.33, "learning_rate": 4.4433187173354594e-05, "loss": 0.7778, "step": 30165 }, { "epoch": 0.33, "learning_rate": 4.443226444621608e-05, "loss": 0.7748, "step": 30170 }, { "epoch": 0.33, "learning_rate": 4.443134171907757e-05, "loss": 0.8532, "step": 30175 }, { "epoch": 0.33, "learning_rate": 4.443041899193906e-05, "loss": 0.8574, "step": 30180 }, { "epoch": 0.33, "learning_rate": 4.4429496264800546e-05, "loss": 0.7975, "step": 30185 }, { "epoch": 0.33, "learning_rate": 4.4428573537662034e-05, "loss": 0.739, "step": 30190 }, { "epoch": 0.33, "learning_rate": 4.442765081052352e-05, "loss": 0.805, "step": 30195 }, { "epoch": 0.33, "learning_rate": 4.442672808338501e-05, "loss": 0.7869, "step": 30200 }, { "epoch": 0.33, "learning_rate": 4.44258053562465e-05, "loss": 0.7815, "step": 30205 }, { "epoch": 0.33, "learning_rate": 4.4424882629107985e-05, "loss": 0.8033, "step": 30210 }, { "epoch": 0.33, "learning_rate": 4.442395990196947e-05, "loss": 0.7528, "step": 30215 }, { "epoch": 0.33, "learning_rate": 4.4423037174830954e-05, "loss": 0.7503, "step": 30220 }, { "epoch": 0.33, "learning_rate": 4.442211444769245e-05, "loss": 0.7501, "step": 30225 }, { "epoch": 0.33, "learning_rate": 4.4421191720553936e-05, "loss": 0.7873, "step": 30230 }, { "epoch": 0.33, "learning_rate": 4.442026899341542e-05, "loss": 0.8129, "step": 30235 }, { "epoch": 0.33, "learning_rate": 4.4419346266276905e-05, "loss": 0.7738, "step": 30240 }, { "epoch": 0.33, "learning_rate": 4.44184235391384e-05, "loss": 0.7421, "step": 30245 }, { "epoch": 0.33, "learning_rate": 4.441750081199989e-05, "loss": 0.7777, "step": 30250 }, { "epoch": 0.34, "learning_rate": 4.441657808486137e-05, "loss": 0.7889, "step": 30255 }, { "epoch": 0.34, "learning_rate": 4.4415655357722857e-05, "loss": 0.8183, "step": 30260 }, { "epoch": 0.34, "learning_rate": 4.441473263058435e-05, "loss": 0.8115, "step": 30265 }, { "epoch": 0.34, "learning_rate": 4.441380990344583e-05, "loss": 0.7742, "step": 30270 }, { "epoch": 0.34, "learning_rate": 4.441288717630732e-05, "loss": 0.8196, "step": 30275 }, { "epoch": 0.34, "learning_rate": 4.441196444916881e-05, "loss": 0.7636, "step": 30280 }, { "epoch": 0.34, "learning_rate": 4.4411041722030296e-05, "loss": 0.7704, "step": 30285 }, { "epoch": 0.34, "learning_rate": 4.4410118994891784e-05, "loss": 0.7776, "step": 30290 }, { "epoch": 0.34, "learning_rate": 4.440919626775327e-05, "loss": 0.7406, "step": 30295 }, { "epoch": 0.34, "learning_rate": 4.440827354061476e-05, "loss": 0.7802, "step": 30300 }, { "epoch": 0.34, "learning_rate": 4.440735081347625e-05, "loss": 0.7588, "step": 30305 }, { "epoch": 0.34, "learning_rate": 4.4406428086337735e-05, "loss": 0.7304, "step": 30310 }, { "epoch": 0.34, "learning_rate": 4.440550535919922e-05, "loss": 0.7714, "step": 30315 }, { "epoch": 0.34, "learning_rate": 4.440458263206071e-05, "loss": 0.776, "step": 30320 }, { "epoch": 0.34, "learning_rate": 4.44036599049222e-05, "loss": 0.785, "step": 30325 }, { "epoch": 0.34, "learning_rate": 4.4402737177783686e-05, "loss": 0.7583, "step": 30330 }, { "epoch": 0.34, "learning_rate": 4.4401814450645174e-05, "loss": 0.7943, "step": 30335 }, { "epoch": 0.34, "learning_rate": 4.440089172350666e-05, "loss": 0.7641, "step": 30340 }, { "epoch": 0.34, "learning_rate": 4.439996899636814e-05, "loss": 0.872, "step": 30345 }, { "epoch": 0.34, "learning_rate": 4.439904626922964e-05, "loss": 0.7701, "step": 30350 }, { "epoch": 0.34, "learning_rate": 4.4398123542091126e-05, "loss": 0.8501, "step": 30355 }, { "epoch": 0.34, "learning_rate": 4.4397200814952614e-05, "loss": 0.7841, "step": 30360 }, { "epoch": 0.34, "learning_rate": 4.4396278087814095e-05, "loss": 0.7691, "step": 30365 }, { "epoch": 0.34, "learning_rate": 4.439535536067558e-05, "loss": 0.7842, "step": 30370 }, { "epoch": 0.34, "learning_rate": 4.439443263353708e-05, "loss": 0.8065, "step": 30375 }, { "epoch": 0.34, "learning_rate": 4.439350990639856e-05, "loss": 0.8438, "step": 30380 }, { "epoch": 0.34, "learning_rate": 4.4392587179260046e-05, "loss": 0.808, "step": 30385 }, { "epoch": 0.34, "learning_rate": 4.4391664452121534e-05, "loss": 0.7507, "step": 30390 }, { "epoch": 0.34, "learning_rate": 4.439074172498303e-05, "loss": 0.7818, "step": 30395 }, { "epoch": 0.34, "learning_rate": 4.438981899784451e-05, "loss": 0.7962, "step": 30400 }, { "epoch": 0.34, "learning_rate": 4.4388896270706e-05, "loss": 0.7598, "step": 30405 }, { "epoch": 0.34, "learning_rate": 4.4387973543567485e-05, "loss": 0.7529, "step": 30410 }, { "epoch": 0.34, "learning_rate": 4.438705081642898e-05, "loss": 0.7877, "step": 30415 }, { "epoch": 0.34, "learning_rate": 4.438612808929046e-05, "loss": 0.8417, "step": 30420 }, { "epoch": 0.34, "learning_rate": 4.438520536215195e-05, "loss": 0.7848, "step": 30425 }, { "epoch": 0.34, "learning_rate": 4.438428263501344e-05, "loss": 0.7636, "step": 30430 }, { "epoch": 0.34, "learning_rate": 4.4383359907874925e-05, "loss": 0.8167, "step": 30435 }, { "epoch": 0.34, "learning_rate": 4.438243718073641e-05, "loss": 0.7699, "step": 30440 }, { "epoch": 0.34, "learning_rate": 4.43815144535979e-05, "loss": 0.7406, "step": 30445 }, { "epoch": 0.34, "learning_rate": 4.438059172645939e-05, "loss": 0.7527, "step": 30450 }, { "epoch": 0.34, "learning_rate": 4.437966899932087e-05, "loss": 0.8136, "step": 30455 }, { "epoch": 0.34, "learning_rate": 4.4378746272182364e-05, "loss": 0.7618, "step": 30460 }, { "epoch": 0.34, "learning_rate": 4.437782354504385e-05, "loss": 0.7578, "step": 30465 }, { "epoch": 0.34, "learning_rate": 4.437690081790534e-05, "loss": 0.7803, "step": 30470 }, { "epoch": 0.34, "learning_rate": 4.437597809076682e-05, "loss": 0.7562, "step": 30475 }, { "epoch": 0.34, "learning_rate": 4.4375055363628315e-05, "loss": 0.7613, "step": 30480 }, { "epoch": 0.34, "learning_rate": 4.43741326364898e-05, "loss": 0.7226, "step": 30485 }, { "epoch": 0.34, "learning_rate": 4.437320990935129e-05, "loss": 0.7767, "step": 30490 }, { "epoch": 0.34, "learning_rate": 4.437228718221277e-05, "loss": 0.8812, "step": 30495 }, { "epoch": 0.34, "learning_rate": 4.4371364455074267e-05, "loss": 0.7979, "step": 30500 }, { "epoch": 0.34, "learning_rate": 4.4370441727935754e-05, "loss": 0.6825, "step": 30505 }, { "epoch": 0.34, "learning_rate": 4.4369519000797235e-05, "loss": 0.7978, "step": 30510 }, { "epoch": 0.34, "learning_rate": 4.436859627365872e-05, "loss": 0.8703, "step": 30515 }, { "epoch": 0.34, "learning_rate": 4.436767354652021e-05, "loss": 0.7831, "step": 30520 }, { "epoch": 0.34, "learning_rate": 4.4366750819381706e-05, "loss": 0.7661, "step": 30525 }, { "epoch": 0.34, "learning_rate": 4.436582809224319e-05, "loss": 0.8052, "step": 30530 }, { "epoch": 0.34, "learning_rate": 4.4364905365104675e-05, "loss": 0.7425, "step": 30535 }, { "epoch": 0.34, "learning_rate": 4.436398263796616e-05, "loss": 0.7212, "step": 30540 }, { "epoch": 0.34, "learning_rate": 4.436305991082765e-05, "loss": 0.7718, "step": 30545 }, { "epoch": 0.34, "learning_rate": 4.436213718368914e-05, "loss": 0.7567, "step": 30550 }, { "epoch": 0.34, "learning_rate": 4.4361214456550626e-05, "loss": 0.802, "step": 30555 }, { "epoch": 0.34, "learning_rate": 4.4360291729412114e-05, "loss": 0.8192, "step": 30560 }, { "epoch": 0.34, "learning_rate": 4.43593690022736e-05, "loss": 0.8008, "step": 30565 }, { "epoch": 0.34, "learning_rate": 4.435844627513509e-05, "loss": 0.7917, "step": 30570 }, { "epoch": 0.34, "learning_rate": 4.435752354799658e-05, "loss": 0.6957, "step": 30575 }, { "epoch": 0.34, "learning_rate": 4.4356600820858065e-05, "loss": 0.8159, "step": 30580 }, { "epoch": 0.34, "learning_rate": 4.435567809371955e-05, "loss": 0.7609, "step": 30585 }, { "epoch": 0.34, "learning_rate": 4.435475536658104e-05, "loss": 0.7859, "step": 30590 }, { "epoch": 0.34, "learning_rate": 4.435383263944253e-05, "loss": 0.7925, "step": 30595 }, { "epoch": 0.34, "learning_rate": 4.435290991230402e-05, "loss": 0.7931, "step": 30600 }, { "epoch": 0.34, "learning_rate": 4.43519871851655e-05, "loss": 0.7754, "step": 30605 }, { "epoch": 0.34, "learning_rate": 4.435106445802699e-05, "loss": 0.7758, "step": 30610 }, { "epoch": 0.34, "learning_rate": 4.435014173088848e-05, "loss": 0.7593, "step": 30615 }, { "epoch": 0.34, "learning_rate": 4.434921900374996e-05, "loss": 0.7275, "step": 30620 }, { "epoch": 0.34, "learning_rate": 4.434829627661145e-05, "loss": 0.7817, "step": 30625 }, { "epoch": 0.34, "learning_rate": 4.4347373549472944e-05, "loss": 0.7366, "step": 30630 }, { "epoch": 0.34, "learning_rate": 4.434645082233443e-05, "loss": 0.7573, "step": 30635 }, { "epoch": 0.34, "learning_rate": 4.434552809519591e-05, "loss": 0.7274, "step": 30640 }, { "epoch": 0.34, "learning_rate": 4.43446053680574e-05, "loss": 0.8568, "step": 30645 }, { "epoch": 0.34, "learning_rate": 4.4343682640918895e-05, "loss": 0.7357, "step": 30650 }, { "epoch": 0.34, "learning_rate": 4.4342759913780376e-05, "loss": 0.769, "step": 30655 }, { "epoch": 0.34, "learning_rate": 4.4341837186641864e-05, "loss": 0.7788, "step": 30660 }, { "epoch": 0.34, "learning_rate": 4.434091445950335e-05, "loss": 0.8092, "step": 30665 }, { "epoch": 0.34, "learning_rate": 4.433999173236484e-05, "loss": 0.7663, "step": 30670 }, { "epoch": 0.34, "learning_rate": 4.433906900522633e-05, "loss": 0.8047, "step": 30675 }, { "epoch": 0.34, "learning_rate": 4.4338146278087816e-05, "loss": 0.794, "step": 30680 }, { "epoch": 0.34, "learning_rate": 4.4337223550949303e-05, "loss": 0.7429, "step": 30685 }, { "epoch": 0.34, "learning_rate": 4.433630082381079e-05, "loss": 0.7305, "step": 30690 }, { "epoch": 0.34, "learning_rate": 4.433537809667228e-05, "loss": 0.7879, "step": 30695 }, { "epoch": 0.34, "learning_rate": 4.433445536953377e-05, "loss": 0.7377, "step": 30700 }, { "epoch": 0.34, "learning_rate": 4.4333532642395255e-05, "loss": 0.8032, "step": 30705 }, { "epoch": 0.34, "learning_rate": 4.433260991525674e-05, "loss": 0.7461, "step": 30710 }, { "epoch": 0.34, "learning_rate": 4.433168718811823e-05, "loss": 0.7628, "step": 30715 }, { "epoch": 0.34, "learning_rate": 4.433076446097972e-05, "loss": 0.8436, "step": 30720 }, { "epoch": 0.34, "learning_rate": 4.4329841733841206e-05, "loss": 0.7835, "step": 30725 }, { "epoch": 0.34, "learning_rate": 4.432891900670269e-05, "loss": 0.7979, "step": 30730 }, { "epoch": 0.34, "learning_rate": 4.432799627956418e-05, "loss": 0.7864, "step": 30735 }, { "epoch": 0.34, "learning_rate": 4.432707355242567e-05, "loss": 0.7712, "step": 30740 }, { "epoch": 0.34, "learning_rate": 4.432615082528716e-05, "loss": 0.7627, "step": 30745 }, { "epoch": 0.34, "learning_rate": 4.432522809814864e-05, "loss": 0.7527, "step": 30750 }, { "epoch": 0.34, "learning_rate": 4.4324305371010127e-05, "loss": 0.7803, "step": 30755 }, { "epoch": 0.34, "learning_rate": 4.432338264387162e-05, "loss": 0.7097, "step": 30760 }, { "epoch": 0.34, "learning_rate": 4.43224599167331e-05, "loss": 0.7977, "step": 30765 }, { "epoch": 0.34, "learning_rate": 4.432153718959459e-05, "loss": 0.767, "step": 30770 }, { "epoch": 0.34, "learning_rate": 4.432061446245608e-05, "loss": 0.7973, "step": 30775 }, { "epoch": 0.34, "learning_rate": 4.431969173531757e-05, "loss": 0.8108, "step": 30780 }, { "epoch": 0.34, "learning_rate": 4.4318769008179054e-05, "loss": 0.7303, "step": 30785 }, { "epoch": 0.34, "learning_rate": 4.431784628104054e-05, "loss": 0.786, "step": 30790 }, { "epoch": 0.34, "learning_rate": 4.431692355390203e-05, "loss": 0.7898, "step": 30795 }, { "epoch": 0.34, "learning_rate": 4.4316000826763524e-05, "loss": 0.8414, "step": 30800 }, { "epoch": 0.34, "learning_rate": 4.4315078099625005e-05, "loss": 0.7887, "step": 30805 }, { "epoch": 0.34, "learning_rate": 4.431415537248649e-05, "loss": 0.7903, "step": 30810 }, { "epoch": 0.34, "learning_rate": 4.431323264534798e-05, "loss": 0.7072, "step": 30815 }, { "epoch": 0.34, "learning_rate": 4.431230991820947e-05, "loss": 0.7967, "step": 30820 }, { "epoch": 0.34, "learning_rate": 4.4311387191070956e-05, "loss": 0.8255, "step": 30825 }, { "epoch": 0.34, "learning_rate": 4.4310464463932444e-05, "loss": 0.7252, "step": 30830 }, { "epoch": 0.34, "learning_rate": 4.430954173679393e-05, "loss": 0.7251, "step": 30835 }, { "epoch": 0.34, "learning_rate": 4.430861900965542e-05, "loss": 0.7832, "step": 30840 }, { "epoch": 0.34, "learning_rate": 4.430769628251691e-05, "loss": 0.7165, "step": 30845 }, { "epoch": 0.34, "learning_rate": 4.4306773555378396e-05, "loss": 0.7041, "step": 30850 }, { "epoch": 0.34, "learning_rate": 4.4305850828239883e-05, "loss": 0.8179, "step": 30855 }, { "epoch": 0.34, "learning_rate": 4.4304928101101365e-05, "loss": 0.7888, "step": 30860 }, { "epoch": 0.34, "learning_rate": 4.430400537396286e-05, "loss": 0.7873, "step": 30865 }, { "epoch": 0.34, "learning_rate": 4.430308264682435e-05, "loss": 0.843, "step": 30870 }, { "epoch": 0.34, "learning_rate": 4.4302159919685835e-05, "loss": 0.7389, "step": 30875 }, { "epoch": 0.34, "learning_rate": 4.4301237192547316e-05, "loss": 0.7843, "step": 30880 }, { "epoch": 0.34, "learning_rate": 4.430031446540881e-05, "loss": 0.7442, "step": 30885 }, { "epoch": 0.34, "learning_rate": 4.42993917382703e-05, "loss": 0.7178, "step": 30890 }, { "epoch": 0.34, "learning_rate": 4.429846901113178e-05, "loss": 0.8035, "step": 30895 }, { "epoch": 0.34, "learning_rate": 4.429754628399327e-05, "loss": 0.7577, "step": 30900 }, { "epoch": 0.34, "learning_rate": 4.4296623556854755e-05, "loss": 0.7628, "step": 30905 }, { "epoch": 0.34, "learning_rate": 4.429570082971625e-05, "loss": 0.7593, "step": 30910 }, { "epoch": 0.34, "learning_rate": 4.429477810257773e-05, "loss": 0.732, "step": 30915 }, { "epoch": 0.34, "learning_rate": 4.429385537543922e-05, "loss": 0.8049, "step": 30920 }, { "epoch": 0.34, "learning_rate": 4.4292932648300707e-05, "loss": 0.7695, "step": 30925 }, { "epoch": 0.34, "learning_rate": 4.4292009921162194e-05, "loss": 0.7554, "step": 30930 }, { "epoch": 0.34, "learning_rate": 4.429108719402368e-05, "loss": 0.8048, "step": 30935 }, { "epoch": 0.34, "learning_rate": 4.429016446688517e-05, "loss": 0.7543, "step": 30940 }, { "epoch": 0.34, "learning_rate": 4.428924173974666e-05, "loss": 0.7504, "step": 30945 }, { "epoch": 0.34, "learning_rate": 4.4288319012608146e-05, "loss": 0.7877, "step": 30950 }, { "epoch": 0.34, "learning_rate": 4.4287396285469634e-05, "loss": 0.7539, "step": 30955 }, { "epoch": 0.34, "learning_rate": 4.428647355833112e-05, "loss": 0.7784, "step": 30960 }, { "epoch": 0.34, "learning_rate": 4.428555083119261e-05, "loss": 0.7777, "step": 30965 }, { "epoch": 0.34, "learning_rate": 4.428462810405409e-05, "loss": 0.7432, "step": 30970 }, { "epoch": 0.34, "learning_rate": 4.4283705376915585e-05, "loss": 0.7925, "step": 30975 }, { "epoch": 0.34, "learning_rate": 4.428278264977707e-05, "loss": 0.7084, "step": 30980 }, { "epoch": 0.34, "learning_rate": 4.428185992263856e-05, "loss": 0.8079, "step": 30985 }, { "epoch": 0.34, "learning_rate": 4.428093719550004e-05, "loss": 0.7636, "step": 30990 }, { "epoch": 0.34, "learning_rate": 4.4280014468361536e-05, "loss": 0.7497, "step": 30995 }, { "epoch": 0.34, "learning_rate": 4.4279091741223024e-05, "loss": 0.8095, "step": 31000 }, { "epoch": 0.34, "eval_loss": 0.7433749437332153, "eval_runtime": 69.3, "eval_samples_per_second": 28.86, "eval_steps_per_second": 14.43, "step": 31000 }, { "epoch": 0.34, "learning_rate": 4.4278169014084505e-05, "loss": 0.7892, "step": 31005 }, { "epoch": 0.34, "learning_rate": 4.427724628694599e-05, "loss": 0.7672, "step": 31010 }, { "epoch": 0.34, "learning_rate": 4.427632355980749e-05, "loss": 0.8029, "step": 31015 }, { "epoch": 0.34, "learning_rate": 4.4275400832668976e-05, "loss": 0.7985, "step": 31020 }, { "epoch": 0.34, "learning_rate": 4.427447810553046e-05, "loss": 0.7593, "step": 31025 }, { "epoch": 0.34, "learning_rate": 4.4273555378391945e-05, "loss": 0.7093, "step": 31030 }, { "epoch": 0.34, "learning_rate": 4.427263265125344e-05, "loss": 0.7565, "step": 31035 }, { "epoch": 0.34, "learning_rate": 4.427170992411492e-05, "loss": 0.6901, "step": 31040 }, { "epoch": 0.34, "learning_rate": 4.427078719697641e-05, "loss": 0.7577, "step": 31045 }, { "epoch": 0.34, "learning_rate": 4.4269864469837896e-05, "loss": 0.8322, "step": 31050 }, { "epoch": 0.34, "learning_rate": 4.4268941742699384e-05, "loss": 0.7483, "step": 31055 }, { "epoch": 0.34, "learning_rate": 4.426801901556087e-05, "loss": 0.774, "step": 31060 }, { "epoch": 0.34, "learning_rate": 4.426709628842236e-05, "loss": 0.7817, "step": 31065 }, { "epoch": 0.34, "learning_rate": 4.426617356128385e-05, "loss": 0.8571, "step": 31070 }, { "epoch": 0.34, "learning_rate": 4.4265250834145335e-05, "loss": 0.7252, "step": 31075 }, { "epoch": 0.34, "learning_rate": 4.426432810700682e-05, "loss": 0.8188, "step": 31080 }, { "epoch": 0.34, "learning_rate": 4.426340537986831e-05, "loss": 0.7803, "step": 31085 }, { "epoch": 0.34, "learning_rate": 4.42624826527298e-05, "loss": 0.7262, "step": 31090 }, { "epoch": 0.34, "learning_rate": 4.426155992559129e-05, "loss": 0.8076, "step": 31095 }, { "epoch": 0.34, "learning_rate": 4.4260637198452775e-05, "loss": 0.7787, "step": 31100 }, { "epoch": 0.34, "learning_rate": 4.425971447131426e-05, "loss": 0.7694, "step": 31105 }, { "epoch": 0.34, "learning_rate": 4.425879174417575e-05, "loss": 0.7804, "step": 31110 }, { "epoch": 0.34, "learning_rate": 4.425786901703723e-05, "loss": 0.7559, "step": 31115 }, { "epoch": 0.34, "learning_rate": 4.425694628989872e-05, "loss": 0.7373, "step": 31120 }, { "epoch": 0.34, "learning_rate": 4.4256023562760214e-05, "loss": 0.7594, "step": 31125 }, { "epoch": 0.34, "learning_rate": 4.42551008356217e-05, "loss": 0.749, "step": 31130 }, { "epoch": 0.34, "learning_rate": 4.425417810848318e-05, "loss": 0.8101, "step": 31135 }, { "epoch": 0.34, "learning_rate": 4.425325538134467e-05, "loss": 0.7774, "step": 31140 }, { "epoch": 0.34, "learning_rate": 4.4252332654206165e-05, "loss": 0.8138, "step": 31145 }, { "epoch": 0.34, "learning_rate": 4.425140992706765e-05, "loss": 0.7943, "step": 31150 }, { "epoch": 0.34, "learning_rate": 4.4250487199929134e-05, "loss": 0.8, "step": 31155 }, { "epoch": 0.35, "learning_rate": 4.424956447279062e-05, "loss": 0.712, "step": 31160 }, { "epoch": 0.35, "learning_rate": 4.4248641745652117e-05, "loss": 0.7508, "step": 31165 }, { "epoch": 0.35, "learning_rate": 4.42477190185136e-05, "loss": 0.8127, "step": 31170 }, { "epoch": 0.35, "learning_rate": 4.4246796291375085e-05, "loss": 0.7676, "step": 31175 }, { "epoch": 0.35, "learning_rate": 4.424587356423657e-05, "loss": 0.7923, "step": 31180 }, { "epoch": 0.35, "learning_rate": 4.424495083709807e-05, "loss": 0.7564, "step": 31185 }, { "epoch": 0.35, "learning_rate": 4.424402810995955e-05, "loss": 0.748, "step": 31190 }, { "epoch": 0.35, "learning_rate": 4.424310538282104e-05, "loss": 0.7255, "step": 31195 }, { "epoch": 0.35, "learning_rate": 4.4242182655682525e-05, "loss": 0.7614, "step": 31200 }, { "epoch": 0.35, "learning_rate": 4.424125992854401e-05, "loss": 0.7942, "step": 31205 }, { "epoch": 0.35, "learning_rate": 4.42403372014055e-05, "loss": 0.8504, "step": 31210 }, { "epoch": 0.35, "learning_rate": 4.423941447426699e-05, "loss": 0.8032, "step": 31215 }, { "epoch": 0.35, "learning_rate": 4.4238491747128476e-05, "loss": 0.7317, "step": 31220 }, { "epoch": 0.35, "learning_rate": 4.4237569019989964e-05, "loss": 0.7976, "step": 31225 }, { "epoch": 0.35, "learning_rate": 4.423664629285145e-05, "loss": 0.7136, "step": 31230 }, { "epoch": 0.35, "learning_rate": 4.423572356571294e-05, "loss": 0.7762, "step": 31235 }, { "epoch": 0.35, "learning_rate": 4.423480083857443e-05, "loss": 0.7639, "step": 31240 }, { "epoch": 0.35, "learning_rate": 4.423387811143591e-05, "loss": 0.7264, "step": 31245 }, { "epoch": 0.35, "learning_rate": 4.42329553842974e-05, "loss": 0.749, "step": 31250 }, { "epoch": 0.35, "learning_rate": 4.423203265715889e-05, "loss": 0.76, "step": 31255 }, { "epoch": 0.35, "learning_rate": 4.423110993002038e-05, "loss": 0.7943, "step": 31260 }, { "epoch": 0.35, "learning_rate": 4.423018720288186e-05, "loss": 0.7495, "step": 31265 }, { "epoch": 0.35, "learning_rate": 4.4229264475743355e-05, "loss": 0.76, "step": 31270 }, { "epoch": 0.35, "learning_rate": 4.422834174860484e-05, "loss": 0.8034, "step": 31275 }, { "epoch": 0.35, "learning_rate": 4.4227419021466324e-05, "loss": 0.7687, "step": 31280 }, { "epoch": 0.35, "learning_rate": 4.422649629432781e-05, "loss": 0.7873, "step": 31285 }, { "epoch": 0.35, "learning_rate": 4.42255735671893e-05, "loss": 0.7684, "step": 31290 }, { "epoch": 0.35, "learning_rate": 4.4224650840050794e-05, "loss": 0.7474, "step": 31295 }, { "epoch": 0.35, "learning_rate": 4.4223728112912275e-05, "loss": 0.7897, "step": 31300 }, { "epoch": 0.35, "learning_rate": 4.422280538577376e-05, "loss": 0.7852, "step": 31305 }, { "epoch": 0.35, "learning_rate": 4.422188265863525e-05, "loss": 0.7355, "step": 31310 }, { "epoch": 0.35, "learning_rate": 4.422095993149674e-05, "loss": 0.7911, "step": 31315 }, { "epoch": 0.35, "learning_rate": 4.4220037204358226e-05, "loss": 0.7395, "step": 31320 }, { "epoch": 0.35, "learning_rate": 4.4219114477219714e-05, "loss": 0.6889, "step": 31325 }, { "epoch": 0.35, "learning_rate": 4.42181917500812e-05, "loss": 0.7297, "step": 31330 }, { "epoch": 0.35, "learning_rate": 4.421726902294269e-05, "loss": 0.7015, "step": 31335 }, { "epoch": 0.35, "learning_rate": 4.421634629580418e-05, "loss": 0.7377, "step": 31340 }, { "epoch": 0.35, "learning_rate": 4.4215423568665666e-05, "loss": 0.7606, "step": 31345 }, { "epoch": 0.35, "learning_rate": 4.4214500841527153e-05, "loss": 0.7597, "step": 31350 }, { "epoch": 0.35, "learning_rate": 4.4213578114388634e-05, "loss": 0.8015, "step": 31355 }, { "epoch": 0.35, "learning_rate": 4.421265538725013e-05, "loss": 0.7781, "step": 31360 }, { "epoch": 0.35, "learning_rate": 4.421173266011162e-05, "loss": 0.8172, "step": 31365 }, { "epoch": 0.35, "learning_rate": 4.4210809932973105e-05, "loss": 0.7854, "step": 31370 }, { "epoch": 0.35, "learning_rate": 4.4209887205834586e-05, "loss": 0.7654, "step": 31375 }, { "epoch": 0.35, "learning_rate": 4.420896447869608e-05, "loss": 0.7577, "step": 31380 }, { "epoch": 0.35, "learning_rate": 4.420804175155757e-05, "loss": 0.7342, "step": 31385 }, { "epoch": 0.35, "learning_rate": 4.420711902441905e-05, "loss": 0.8153, "step": 31390 }, { "epoch": 0.35, "learning_rate": 4.420619629728054e-05, "loss": 0.7738, "step": 31395 }, { "epoch": 0.35, "learning_rate": 4.420527357014203e-05, "loss": 0.787, "step": 31400 }, { "epoch": 0.35, "learning_rate": 4.420435084300352e-05, "loss": 0.7857, "step": 31405 }, { "epoch": 0.35, "learning_rate": 4.4203428115865e-05, "loss": 0.7762, "step": 31410 }, { "epoch": 0.35, "learning_rate": 4.420250538872649e-05, "loss": 0.7357, "step": 31415 }, { "epoch": 0.35, "learning_rate": 4.420158266158798e-05, "loss": 0.8446, "step": 31420 }, { "epoch": 0.35, "learning_rate": 4.4200659934449464e-05, "loss": 0.7545, "step": 31425 }, { "epoch": 0.35, "learning_rate": 4.419973720731095e-05, "loss": 0.7141, "step": 31430 }, { "epoch": 0.35, "learning_rate": 4.419881448017244e-05, "loss": 0.7995, "step": 31435 }, { "epoch": 0.35, "learning_rate": 4.419789175303393e-05, "loss": 0.7168, "step": 31440 }, { "epoch": 0.35, "learning_rate": 4.4196969025895416e-05, "loss": 0.7677, "step": 31445 }, { "epoch": 0.35, "learning_rate": 4.4196046298756904e-05, "loss": 0.8026, "step": 31450 }, { "epoch": 0.35, "learning_rate": 4.419512357161839e-05, "loss": 0.7417, "step": 31455 }, { "epoch": 0.35, "learning_rate": 4.419420084447988e-05, "loss": 0.8066, "step": 31460 }, { "epoch": 0.35, "learning_rate": 4.419327811734137e-05, "loss": 0.7237, "step": 31465 }, { "epoch": 0.35, "learning_rate": 4.4192355390202855e-05, "loss": 0.7199, "step": 31470 }, { "epoch": 0.35, "learning_rate": 4.419143266306434e-05, "loss": 0.7423, "step": 31475 }, { "epoch": 0.35, "learning_rate": 4.419050993592583e-05, "loss": 0.7379, "step": 31480 }, { "epoch": 0.35, "learning_rate": 4.418958720878732e-05, "loss": 0.737, "step": 31485 }, { "epoch": 0.35, "learning_rate": 4.4188664481648806e-05, "loss": 0.7833, "step": 31490 }, { "epoch": 0.35, "learning_rate": 4.4187741754510294e-05, "loss": 0.8017, "step": 31495 }, { "epoch": 0.35, "learning_rate": 4.4186819027371775e-05, "loss": 0.803, "step": 31500 }, { "epoch": 0.35, "learning_rate": 4.418589630023326e-05, "loss": 0.7296, "step": 31505 }, { "epoch": 0.35, "learning_rate": 4.418497357309476e-05, "loss": 0.7515, "step": 31510 }, { "epoch": 0.35, "learning_rate": 4.4184050845956246e-05, "loss": 0.7241, "step": 31515 }, { "epoch": 0.35, "learning_rate": 4.418312811881773e-05, "loss": 0.7802, "step": 31520 }, { "epoch": 0.35, "learning_rate": 4.4182205391679215e-05, "loss": 0.774, "step": 31525 }, { "epoch": 0.35, "learning_rate": 4.418128266454071e-05, "loss": 0.8119, "step": 31530 }, { "epoch": 0.35, "learning_rate": 4.41803599374022e-05, "loss": 0.7091, "step": 31535 }, { "epoch": 0.35, "learning_rate": 4.417943721026368e-05, "loss": 0.7772, "step": 31540 }, { "epoch": 0.35, "learning_rate": 4.4178514483125166e-05, "loss": 0.8133, "step": 31545 }, { "epoch": 0.35, "learning_rate": 4.417759175598666e-05, "loss": 0.8219, "step": 31550 }, { "epoch": 0.35, "learning_rate": 4.417666902884814e-05, "loss": 0.7443, "step": 31555 }, { "epoch": 0.35, "learning_rate": 4.417574630170963e-05, "loss": 0.79, "step": 31560 }, { "epoch": 0.35, "learning_rate": 4.417482357457112e-05, "loss": 0.7236, "step": 31565 }, { "epoch": 0.35, "learning_rate": 4.417390084743261e-05, "loss": 0.7967, "step": 31570 }, { "epoch": 0.35, "learning_rate": 4.417297812029409e-05, "loss": 0.7729, "step": 31575 }, { "epoch": 0.35, "learning_rate": 4.417205539315558e-05, "loss": 0.7972, "step": 31580 }, { "epoch": 0.35, "learning_rate": 4.417113266601707e-05, "loss": 0.7539, "step": 31585 }, { "epoch": 0.35, "learning_rate": 4.4170209938878557e-05, "loss": 0.7066, "step": 31590 }, { "epoch": 0.35, "learning_rate": 4.4169287211740044e-05, "loss": 0.7617, "step": 31595 }, { "epoch": 0.35, "learning_rate": 4.416836448460153e-05, "loss": 0.7496, "step": 31600 }, { "epoch": 0.35, "learning_rate": 4.416744175746302e-05, "loss": 0.8116, "step": 31605 }, { "epoch": 0.35, "learning_rate": 4.416651903032451e-05, "loss": 0.7552, "step": 31610 }, { "epoch": 0.35, "learning_rate": 4.4165596303185996e-05, "loss": 0.7478, "step": 31615 }, { "epoch": 0.35, "learning_rate": 4.4164673576047484e-05, "loss": 0.7518, "step": 31620 }, { "epoch": 0.35, "learning_rate": 4.416375084890897e-05, "loss": 0.7386, "step": 31625 }, { "epoch": 0.35, "learning_rate": 4.416282812177045e-05, "loss": 0.7716, "step": 31630 }, { "epoch": 0.35, "learning_rate": 4.416190539463195e-05, "loss": 0.7673, "step": 31635 }, { "epoch": 0.35, "learning_rate": 4.4160982667493435e-05, "loss": 0.8233, "step": 31640 }, { "epoch": 0.35, "learning_rate": 4.416005994035492e-05, "loss": 0.8021, "step": 31645 }, { "epoch": 0.35, "learning_rate": 4.4159137213216404e-05, "loss": 0.8199, "step": 31650 }, { "epoch": 0.35, "learning_rate": 4.415821448607789e-05, "loss": 0.7214, "step": 31655 }, { "epoch": 0.35, "learning_rate": 4.4157291758939386e-05, "loss": 0.777, "step": 31660 }, { "epoch": 0.35, "learning_rate": 4.415636903180087e-05, "loss": 0.7489, "step": 31665 }, { "epoch": 0.35, "learning_rate": 4.4155446304662355e-05, "loss": 0.69, "step": 31670 }, { "epoch": 0.35, "learning_rate": 4.415452357752384e-05, "loss": 0.7218, "step": 31675 }, { "epoch": 0.35, "learning_rate": 4.415360085038534e-05, "loss": 0.7885, "step": 31680 }, { "epoch": 0.35, "learning_rate": 4.415267812324682e-05, "loss": 0.79, "step": 31685 }, { "epoch": 0.35, "learning_rate": 4.415175539610831e-05, "loss": 0.721, "step": 31690 }, { "epoch": 0.35, "learning_rate": 4.4150832668969795e-05, "loss": 0.7603, "step": 31695 }, { "epoch": 0.35, "learning_rate": 4.414990994183128e-05, "loss": 0.8355, "step": 31700 }, { "epoch": 0.35, "learning_rate": 4.414898721469277e-05, "loss": 0.7973, "step": 31705 }, { "epoch": 0.35, "learning_rate": 4.414806448755426e-05, "loss": 0.7657, "step": 31710 }, { "epoch": 0.35, "learning_rate": 4.4147141760415746e-05, "loss": 0.7782, "step": 31715 }, { "epoch": 0.35, "learning_rate": 4.4146219033277234e-05, "loss": 0.7324, "step": 31720 }, { "epoch": 0.35, "learning_rate": 4.414529630613872e-05, "loss": 0.7829, "step": 31725 }, { "epoch": 0.35, "learning_rate": 4.414437357900021e-05, "loss": 0.7587, "step": 31730 }, { "epoch": 0.35, "learning_rate": 4.41434508518617e-05, "loss": 0.7721, "step": 31735 }, { "epoch": 0.35, "learning_rate": 4.414252812472318e-05, "loss": 0.7919, "step": 31740 }, { "epoch": 0.35, "learning_rate": 4.414160539758467e-05, "loss": 0.7975, "step": 31745 }, { "epoch": 0.35, "learning_rate": 4.414068267044616e-05, "loss": 0.7406, "step": 31750 }, { "epoch": 0.35, "learning_rate": 4.413975994330765e-05, "loss": 0.7835, "step": 31755 }, { "epoch": 0.35, "learning_rate": 4.413883721616913e-05, "loss": 0.7442, "step": 31760 }, { "epoch": 0.35, "learning_rate": 4.4137914489030625e-05, "loss": 0.7759, "step": 31765 }, { "epoch": 0.35, "learning_rate": 4.413699176189211e-05, "loss": 0.7804, "step": 31770 }, { "epoch": 0.35, "learning_rate": 4.4136069034753593e-05, "loss": 0.8088, "step": 31775 }, { "epoch": 0.35, "learning_rate": 4.413514630761508e-05, "loss": 0.7135, "step": 31780 }, { "epoch": 0.35, "learning_rate": 4.4134223580476576e-05, "loss": 0.7799, "step": 31785 }, { "epoch": 0.35, "learning_rate": 4.4133300853338064e-05, "loss": 0.8263, "step": 31790 }, { "epoch": 0.35, "learning_rate": 4.4132378126199545e-05, "loss": 0.7754, "step": 31795 }, { "epoch": 0.35, "learning_rate": 4.413145539906103e-05, "loss": 0.8135, "step": 31800 }, { "epoch": 0.35, "learning_rate": 4.413053267192252e-05, "loss": 0.784, "step": 31805 }, { "epoch": 0.35, "learning_rate": 4.412960994478401e-05, "loss": 0.7262, "step": 31810 }, { "epoch": 0.35, "learning_rate": 4.4128687217645496e-05, "loss": 0.7464, "step": 31815 }, { "epoch": 0.35, "learning_rate": 4.4127764490506984e-05, "loss": 0.7533, "step": 31820 }, { "epoch": 0.35, "learning_rate": 4.412684176336847e-05, "loss": 0.7853, "step": 31825 }, { "epoch": 0.35, "learning_rate": 4.412591903622996e-05, "loss": 0.7733, "step": 31830 }, { "epoch": 0.35, "learning_rate": 4.412499630909145e-05, "loss": 0.7676, "step": 31835 }, { "epoch": 0.35, "learning_rate": 4.4124073581952935e-05, "loss": 0.7911, "step": 31840 }, { "epoch": 0.35, "learning_rate": 4.412315085481442e-05, "loss": 0.8126, "step": 31845 }, { "epoch": 0.35, "learning_rate": 4.412222812767591e-05, "loss": 0.7409, "step": 31850 }, { "epoch": 0.35, "learning_rate": 4.41213054005374e-05, "loss": 0.8009, "step": 31855 }, { "epoch": 0.35, "learning_rate": 4.412038267339889e-05, "loss": 0.7835, "step": 31860 }, { "epoch": 0.35, "learning_rate": 4.4119459946260375e-05, "loss": 0.7808, "step": 31865 }, { "epoch": 0.35, "learning_rate": 4.411853721912186e-05, "loss": 0.7708, "step": 31870 }, { "epoch": 0.35, "learning_rate": 4.411761449198335e-05, "loss": 0.7493, "step": 31875 }, { "epoch": 0.35, "learning_rate": 4.411669176484484e-05, "loss": 0.6751, "step": 31880 }, { "epoch": 0.35, "learning_rate": 4.411576903770632e-05, "loss": 0.858, "step": 31885 }, { "epoch": 0.35, "learning_rate": 4.411484631056781e-05, "loss": 0.7821, "step": 31890 }, { "epoch": 0.35, "learning_rate": 4.41139235834293e-05, "loss": 0.79, "step": 31895 }, { "epoch": 0.35, "learning_rate": 4.411300085629079e-05, "loss": 0.7542, "step": 31900 }, { "epoch": 0.35, "learning_rate": 4.411207812915227e-05, "loss": 0.7803, "step": 31905 }, { "epoch": 0.35, "learning_rate": 4.411115540201376e-05, "loss": 0.763, "step": 31910 }, { "epoch": 0.35, "learning_rate": 4.411023267487525e-05, "loss": 0.7543, "step": 31915 }, { "epoch": 0.35, "learning_rate": 4.410930994773674e-05, "loss": 0.6923, "step": 31920 }, { "epoch": 0.35, "learning_rate": 4.410838722059822e-05, "loss": 0.7735, "step": 31925 }, { "epoch": 0.35, "learning_rate": 4.410746449345971e-05, "loss": 0.7665, "step": 31930 }, { "epoch": 0.35, "learning_rate": 4.4106541766321205e-05, "loss": 0.7578, "step": 31935 }, { "epoch": 0.35, "learning_rate": 4.4105619039182686e-05, "loss": 0.7606, "step": 31940 }, { "epoch": 0.35, "learning_rate": 4.4104696312044174e-05, "loss": 0.8179, "step": 31945 }, { "epoch": 0.35, "learning_rate": 4.410377358490566e-05, "loss": 0.7738, "step": 31950 }, { "epoch": 0.35, "learning_rate": 4.410285085776715e-05, "loss": 0.7763, "step": 31955 }, { "epoch": 0.35, "learning_rate": 4.410192813062864e-05, "loss": 0.7336, "step": 31960 }, { "epoch": 0.35, "learning_rate": 4.4101005403490125e-05, "loss": 0.7708, "step": 31965 }, { "epoch": 0.35, "learning_rate": 4.410008267635161e-05, "loss": 0.767, "step": 31970 }, { "epoch": 0.35, "learning_rate": 4.40991599492131e-05, "loss": 0.8332, "step": 31975 }, { "epoch": 0.35, "learning_rate": 4.409823722207459e-05, "loss": 0.7307, "step": 31980 }, { "epoch": 0.35, "learning_rate": 4.4097314494936076e-05, "loss": 0.7702, "step": 31985 }, { "epoch": 0.35, "learning_rate": 4.4096391767797564e-05, "loss": 0.752, "step": 31990 }, { "epoch": 0.35, "learning_rate": 4.409546904065905e-05, "loss": 0.7943, "step": 31995 }, { "epoch": 0.35, "learning_rate": 4.409454631352054e-05, "loss": 0.7451, "step": 32000 }, { "epoch": 0.35, "eval_loss": 0.7534207105636597, "eval_runtime": 69.24, "eval_samples_per_second": 28.885, "eval_steps_per_second": 14.443, "step": 32000 }, { "epoch": 0.35, "learning_rate": 4.409362358638203e-05, "loss": 0.76, "step": 32005 }, { "epoch": 0.35, "learning_rate": 4.4092700859243516e-05, "loss": 0.7851, "step": 32010 }, { "epoch": 0.35, "learning_rate": 4.4091778132105e-05, "loss": 0.7537, "step": 32015 }, { "epoch": 0.35, "learning_rate": 4.409085540496649e-05, "loss": 0.7154, "step": 32020 }, { "epoch": 0.35, "learning_rate": 4.408993267782798e-05, "loss": 0.7848, "step": 32025 }, { "epoch": 0.35, "learning_rate": 4.408900995068947e-05, "loss": 0.7615, "step": 32030 }, { "epoch": 0.35, "learning_rate": 4.408808722355095e-05, "loss": 0.7082, "step": 32035 }, { "epoch": 0.35, "learning_rate": 4.4087164496412436e-05, "loss": 0.741, "step": 32040 }, { "epoch": 0.35, "learning_rate": 4.408624176927393e-05, "loss": 0.7692, "step": 32045 }, { "epoch": 0.35, "learning_rate": 4.408531904213541e-05, "loss": 0.8084, "step": 32050 }, { "epoch": 0.35, "learning_rate": 4.40843963149969e-05, "loss": 0.7708, "step": 32055 }, { "epoch": 0.35, "learning_rate": 4.408347358785839e-05, "loss": 0.7868, "step": 32060 }, { "epoch": 0.36, "learning_rate": 4.408255086071988e-05, "loss": 0.7382, "step": 32065 }, { "epoch": 0.36, "learning_rate": 4.408162813358136e-05, "loss": 0.7275, "step": 32070 }, { "epoch": 0.36, "learning_rate": 4.408070540644285e-05, "loss": 0.7704, "step": 32075 }, { "epoch": 0.36, "learning_rate": 4.407978267930434e-05, "loss": 0.723, "step": 32080 }, { "epoch": 0.36, "learning_rate": 4.4078859952165827e-05, "loss": 0.7664, "step": 32085 }, { "epoch": 0.36, "learning_rate": 4.4077937225027314e-05, "loss": 0.7933, "step": 32090 }, { "epoch": 0.36, "learning_rate": 4.40770144978888e-05, "loss": 0.7586, "step": 32095 }, { "epoch": 0.36, "learning_rate": 4.407609177075029e-05, "loss": 0.6992, "step": 32100 }, { "epoch": 0.36, "learning_rate": 4.407516904361178e-05, "loss": 0.8246, "step": 32105 }, { "epoch": 0.36, "learning_rate": 4.4074246316473266e-05, "loss": 0.7222, "step": 32110 }, { "epoch": 0.36, "learning_rate": 4.4073323589334754e-05, "loss": 0.7961, "step": 32115 }, { "epoch": 0.36, "learning_rate": 4.407240086219624e-05, "loss": 0.8136, "step": 32120 }, { "epoch": 0.36, "learning_rate": 4.407147813505772e-05, "loss": 0.7463, "step": 32125 }, { "epoch": 0.36, "learning_rate": 4.407055540791922e-05, "loss": 0.7927, "step": 32130 }, { "epoch": 0.36, "learning_rate": 4.4069632680780705e-05, "loss": 0.789, "step": 32135 }, { "epoch": 0.36, "learning_rate": 4.406870995364219e-05, "loss": 0.7249, "step": 32140 }, { "epoch": 0.36, "learning_rate": 4.4067787226503674e-05, "loss": 0.776, "step": 32145 }, { "epoch": 0.36, "learning_rate": 4.406686449936517e-05, "loss": 0.7005, "step": 32150 }, { "epoch": 0.36, "learning_rate": 4.4065941772226656e-05, "loss": 0.7999, "step": 32155 }, { "epoch": 0.36, "learning_rate": 4.406501904508814e-05, "loss": 0.7293, "step": 32160 }, { "epoch": 0.36, "learning_rate": 4.4064096317949625e-05, "loss": 0.8054, "step": 32165 }, { "epoch": 0.36, "learning_rate": 4.406317359081112e-05, "loss": 0.7381, "step": 32170 }, { "epoch": 0.36, "learning_rate": 4.406225086367261e-05, "loss": 0.8169, "step": 32175 }, { "epoch": 0.36, "learning_rate": 4.406132813653409e-05, "loss": 0.785, "step": 32180 }, { "epoch": 0.36, "learning_rate": 4.406040540939558e-05, "loss": 0.7572, "step": 32185 }, { "epoch": 0.36, "learning_rate": 4.4059482682257065e-05, "loss": 0.7716, "step": 32190 }, { "epoch": 0.36, "learning_rate": 4.405855995511855e-05, "loss": 0.806, "step": 32195 }, { "epoch": 0.36, "learning_rate": 4.405763722798004e-05, "loss": 0.7973, "step": 32200 }, { "epoch": 0.36, "learning_rate": 4.405671450084153e-05, "loss": 0.7887, "step": 32205 }, { "epoch": 0.36, "learning_rate": 4.4055791773703016e-05, "loss": 0.7772, "step": 32210 }, { "epoch": 0.36, "learning_rate": 4.4054869046564504e-05, "loss": 0.7639, "step": 32215 }, { "epoch": 0.36, "learning_rate": 4.405394631942599e-05, "loss": 0.7938, "step": 32220 }, { "epoch": 0.36, "learning_rate": 4.405302359228748e-05, "loss": 0.7287, "step": 32225 }, { "epoch": 0.36, "learning_rate": 4.405210086514897e-05, "loss": 0.8385, "step": 32230 }, { "epoch": 0.36, "learning_rate": 4.4051178138010455e-05, "loss": 0.783, "step": 32235 }, { "epoch": 0.36, "learning_rate": 4.405025541087194e-05, "loss": 0.7363, "step": 32240 }, { "epoch": 0.36, "learning_rate": 4.404933268373343e-05, "loss": 0.7209, "step": 32245 }, { "epoch": 0.36, "learning_rate": 4.404840995659492e-05, "loss": 0.7782, "step": 32250 }, { "epoch": 0.36, "learning_rate": 4.4047487229456407e-05, "loss": 0.799, "step": 32255 }, { "epoch": 0.36, "learning_rate": 4.4046564502317894e-05, "loss": 0.7807, "step": 32260 }, { "epoch": 0.36, "learning_rate": 4.404564177517938e-05, "loss": 0.7867, "step": 32265 }, { "epoch": 0.36, "learning_rate": 4.404471904804086e-05, "loss": 0.7536, "step": 32270 }, { "epoch": 0.36, "learning_rate": 4.404379632090235e-05, "loss": 0.7673, "step": 32275 }, { "epoch": 0.36, "learning_rate": 4.4042873593763846e-05, "loss": 0.7672, "step": 32280 }, { "epoch": 0.36, "learning_rate": 4.4041950866625334e-05, "loss": 0.7537, "step": 32285 }, { "epoch": 0.36, "learning_rate": 4.4041028139486815e-05, "loss": 0.8027, "step": 32290 }, { "epoch": 0.36, "learning_rate": 4.40401054123483e-05, "loss": 0.7468, "step": 32295 }, { "epoch": 0.36, "learning_rate": 4.40391826852098e-05, "loss": 0.7338, "step": 32300 }, { "epoch": 0.36, "learning_rate": 4.4038259958071285e-05, "loss": 0.8099, "step": 32305 }, { "epoch": 0.36, "learning_rate": 4.4037337230932766e-05, "loss": 0.8489, "step": 32310 }, { "epoch": 0.36, "learning_rate": 4.4036414503794254e-05, "loss": 0.7628, "step": 32315 }, { "epoch": 0.36, "learning_rate": 4.403549177665575e-05, "loss": 0.7832, "step": 32320 }, { "epoch": 0.36, "learning_rate": 4.403456904951723e-05, "loss": 0.7897, "step": 32325 }, { "epoch": 0.36, "learning_rate": 4.403364632237872e-05, "loss": 0.8087, "step": 32330 }, { "epoch": 0.36, "learning_rate": 4.4032723595240205e-05, "loss": 0.7695, "step": 32335 }, { "epoch": 0.36, "learning_rate": 4.403180086810169e-05, "loss": 0.7001, "step": 32340 }, { "epoch": 0.36, "learning_rate": 4.403087814096318e-05, "loss": 0.7896, "step": 32345 }, { "epoch": 0.36, "learning_rate": 4.402995541382467e-05, "loss": 0.725, "step": 32350 }, { "epoch": 0.36, "learning_rate": 4.402903268668616e-05, "loss": 0.7478, "step": 32355 }, { "epoch": 0.36, "learning_rate": 4.4028109959547645e-05, "loss": 0.7507, "step": 32360 }, { "epoch": 0.36, "learning_rate": 4.402718723240913e-05, "loss": 0.7378, "step": 32365 }, { "epoch": 0.36, "learning_rate": 4.402626450527062e-05, "loss": 0.7716, "step": 32370 }, { "epoch": 0.36, "learning_rate": 4.402534177813211e-05, "loss": 0.774, "step": 32375 }, { "epoch": 0.36, "learning_rate": 4.4024419050993596e-05, "loss": 0.7327, "step": 32380 }, { "epoch": 0.36, "learning_rate": 4.4023496323855084e-05, "loss": 0.8149, "step": 32385 }, { "epoch": 0.36, "learning_rate": 4.402257359671657e-05, "loss": 0.7142, "step": 32390 }, { "epoch": 0.36, "learning_rate": 4.402165086957806e-05, "loss": 0.7558, "step": 32395 }, { "epoch": 0.36, "learning_rate": 4.402072814243954e-05, "loss": 0.7936, "step": 32400 }, { "epoch": 0.36, "learning_rate": 4.4019805415301035e-05, "loss": 0.7932, "step": 32405 }, { "epoch": 0.36, "learning_rate": 4.401888268816252e-05, "loss": 0.7309, "step": 32410 }, { "epoch": 0.36, "learning_rate": 4.401795996102401e-05, "loss": 0.8121, "step": 32415 }, { "epoch": 0.36, "learning_rate": 4.401703723388549e-05, "loss": 0.7608, "step": 32420 }, { "epoch": 0.36, "learning_rate": 4.401611450674698e-05, "loss": 0.808, "step": 32425 }, { "epoch": 0.36, "learning_rate": 4.4015191779608475e-05, "loss": 0.7347, "step": 32430 }, { "epoch": 0.36, "learning_rate": 4.4014269052469956e-05, "loss": 0.7913, "step": 32435 }, { "epoch": 0.36, "learning_rate": 4.4013346325331443e-05, "loss": 0.7682, "step": 32440 }, { "epoch": 0.36, "learning_rate": 4.401242359819293e-05, "loss": 0.7757, "step": 32445 }, { "epoch": 0.36, "learning_rate": 4.4011500871054426e-05, "loss": 0.7178, "step": 32450 }, { "epoch": 0.36, "learning_rate": 4.401057814391591e-05, "loss": 0.7194, "step": 32455 }, { "epoch": 0.36, "learning_rate": 4.4009655416777395e-05, "loss": 0.7761, "step": 32460 }, { "epoch": 0.36, "learning_rate": 4.400873268963888e-05, "loss": 0.819, "step": 32465 }, { "epoch": 0.36, "learning_rate": 4.400780996250037e-05, "loss": 0.7885, "step": 32470 }, { "epoch": 0.36, "learning_rate": 4.400688723536186e-05, "loss": 0.7428, "step": 32475 }, { "epoch": 0.36, "learning_rate": 4.4005964508223346e-05, "loss": 0.7151, "step": 32480 }, { "epoch": 0.36, "learning_rate": 4.4005041781084834e-05, "loss": 0.7861, "step": 32485 }, { "epoch": 0.36, "learning_rate": 4.400411905394632e-05, "loss": 0.7231, "step": 32490 }, { "epoch": 0.36, "learning_rate": 4.400319632680781e-05, "loss": 0.8015, "step": 32495 }, { "epoch": 0.36, "learning_rate": 4.40022735996693e-05, "loss": 0.7754, "step": 32500 }, { "epoch": 0.36, "learning_rate": 4.4001350872530785e-05, "loss": 0.7657, "step": 32505 }, { "epoch": 0.36, "learning_rate": 4.4000428145392267e-05, "loss": 0.7844, "step": 32510 }, { "epoch": 0.36, "learning_rate": 4.399950541825376e-05, "loss": 0.781, "step": 32515 }, { "epoch": 0.36, "learning_rate": 4.399858269111525e-05, "loss": 0.7929, "step": 32520 }, { "epoch": 0.36, "learning_rate": 4.399765996397674e-05, "loss": 0.8006, "step": 32525 }, { "epoch": 0.36, "learning_rate": 4.399673723683822e-05, "loss": 0.8113, "step": 32530 }, { "epoch": 0.36, "learning_rate": 4.399581450969971e-05, "loss": 0.7834, "step": 32535 }, { "epoch": 0.36, "learning_rate": 4.39948917825612e-05, "loss": 0.7754, "step": 32540 }, { "epoch": 0.36, "learning_rate": 4.399396905542268e-05, "loss": 0.7203, "step": 32545 }, { "epoch": 0.36, "learning_rate": 4.399304632828417e-05, "loss": 0.7297, "step": 32550 }, { "epoch": 0.36, "learning_rate": 4.3992123601145664e-05, "loss": 0.7342, "step": 32555 }, { "epoch": 0.36, "learning_rate": 4.399120087400715e-05, "loss": 0.7603, "step": 32560 }, { "epoch": 0.36, "learning_rate": 4.399027814686863e-05, "loss": 0.8012, "step": 32565 }, { "epoch": 0.36, "learning_rate": 4.398935541973012e-05, "loss": 0.7303, "step": 32570 }, { "epoch": 0.36, "learning_rate": 4.398843269259161e-05, "loss": 0.7541, "step": 32575 }, { "epoch": 0.36, "learning_rate": 4.3987509965453096e-05, "loss": 0.7416, "step": 32580 }, { "epoch": 0.36, "learning_rate": 4.3986587238314584e-05, "loss": 0.7338, "step": 32585 }, { "epoch": 0.36, "learning_rate": 4.398566451117607e-05, "loss": 0.7627, "step": 32590 }, { "epoch": 0.36, "learning_rate": 4.398474178403756e-05, "loss": 0.8177, "step": 32595 }, { "epoch": 0.36, "learning_rate": 4.398381905689905e-05, "loss": 0.7868, "step": 32600 }, { "epoch": 0.36, "learning_rate": 4.3982896329760536e-05, "loss": 0.7664, "step": 32605 }, { "epoch": 0.36, "learning_rate": 4.3981973602622024e-05, "loss": 0.7443, "step": 32610 }, { "epoch": 0.36, "learning_rate": 4.398105087548351e-05, "loss": 0.7987, "step": 32615 }, { "epoch": 0.36, "learning_rate": 4.3980128148345e-05, "loss": 0.7678, "step": 32620 }, { "epoch": 0.36, "learning_rate": 4.397920542120649e-05, "loss": 0.7084, "step": 32625 }, { "epoch": 0.36, "learning_rate": 4.3978282694067975e-05, "loss": 0.773, "step": 32630 }, { "epoch": 0.36, "learning_rate": 4.397735996692946e-05, "loss": 0.7948, "step": 32635 }, { "epoch": 0.36, "learning_rate": 4.3976437239790944e-05, "loss": 0.7525, "step": 32640 }, { "epoch": 0.36, "learning_rate": 4.397551451265244e-05, "loss": 0.7375, "step": 32645 }, { "epoch": 0.36, "learning_rate": 4.3974591785513926e-05, "loss": 0.8037, "step": 32650 }, { "epoch": 0.36, "learning_rate": 4.397366905837541e-05, "loss": 0.805, "step": 32655 }, { "epoch": 0.36, "learning_rate": 4.3972746331236895e-05, "loss": 0.8025, "step": 32660 }, { "epoch": 0.36, "learning_rate": 4.397182360409839e-05, "loss": 0.6874, "step": 32665 }, { "epoch": 0.36, "learning_rate": 4.397090087695988e-05, "loss": 0.7877, "step": 32670 }, { "epoch": 0.36, "learning_rate": 4.396997814982136e-05, "loss": 0.7802, "step": 32675 }, { "epoch": 0.36, "learning_rate": 4.396905542268285e-05, "loss": 0.7551, "step": 32680 }, { "epoch": 0.36, "learning_rate": 4.396813269554434e-05, "loss": 0.8182, "step": 32685 }, { "epoch": 0.36, "learning_rate": 4.396720996840583e-05, "loss": 0.7954, "step": 32690 }, { "epoch": 0.36, "learning_rate": 4.396628724126731e-05, "loss": 0.8303, "step": 32695 }, { "epoch": 0.36, "learning_rate": 4.39653645141288e-05, "loss": 0.8042, "step": 32700 }, { "epoch": 0.36, "learning_rate": 4.396444178699029e-05, "loss": 0.7425, "step": 32705 }, { "epoch": 0.36, "learning_rate": 4.3963519059851774e-05, "loss": 0.8537, "step": 32710 }, { "epoch": 0.36, "learning_rate": 4.396259633271326e-05, "loss": 0.7572, "step": 32715 }, { "epoch": 0.36, "learning_rate": 4.396167360557475e-05, "loss": 0.7646, "step": 32720 }, { "epoch": 0.36, "learning_rate": 4.396075087843624e-05, "loss": 0.7719, "step": 32725 }, { "epoch": 0.36, "learning_rate": 4.3959828151297725e-05, "loss": 0.7487, "step": 32730 }, { "epoch": 0.36, "learning_rate": 4.395890542415921e-05, "loss": 0.7432, "step": 32735 }, { "epoch": 0.36, "learning_rate": 4.39579826970207e-05, "loss": 0.763, "step": 32740 }, { "epoch": 0.36, "learning_rate": 4.395705996988219e-05, "loss": 0.7231, "step": 32745 }, { "epoch": 0.36, "learning_rate": 4.3956137242743677e-05, "loss": 0.8258, "step": 32750 }, { "epoch": 0.36, "learning_rate": 4.3955214515605164e-05, "loss": 0.7587, "step": 32755 }, { "epoch": 0.36, "learning_rate": 4.395429178846665e-05, "loss": 0.8279, "step": 32760 }, { "epoch": 0.36, "learning_rate": 4.395336906132814e-05, "loss": 0.7969, "step": 32765 }, { "epoch": 0.36, "learning_rate": 4.395244633418963e-05, "loss": 0.7875, "step": 32770 }, { "epoch": 0.36, "learning_rate": 4.3951523607051116e-05, "loss": 0.7765, "step": 32775 }, { "epoch": 0.36, "learning_rate": 4.3950600879912604e-05, "loss": 0.7468, "step": 32780 }, { "epoch": 0.36, "learning_rate": 4.3949678152774085e-05, "loss": 0.7146, "step": 32785 }, { "epoch": 0.36, "learning_rate": 4.394875542563557e-05, "loss": 0.6835, "step": 32790 }, { "epoch": 0.36, "learning_rate": 4.394783269849707e-05, "loss": 0.7938, "step": 32795 }, { "epoch": 0.36, "learning_rate": 4.3946909971358555e-05, "loss": 0.7206, "step": 32800 }, { "epoch": 0.36, "learning_rate": 4.3945987244220036e-05, "loss": 0.7811, "step": 32805 }, { "epoch": 0.36, "learning_rate": 4.3945064517081524e-05, "loss": 0.791, "step": 32810 }, { "epoch": 0.36, "learning_rate": 4.394414178994302e-05, "loss": 0.7837, "step": 32815 }, { "epoch": 0.36, "learning_rate": 4.39432190628045e-05, "loss": 0.7869, "step": 32820 }, { "epoch": 0.36, "learning_rate": 4.394229633566599e-05, "loss": 0.7203, "step": 32825 }, { "epoch": 0.36, "learning_rate": 4.3941373608527475e-05, "loss": 0.7527, "step": 32830 }, { "epoch": 0.36, "learning_rate": 4.394045088138897e-05, "loss": 0.8158, "step": 32835 }, { "epoch": 0.36, "learning_rate": 4.393952815425045e-05, "loss": 0.7532, "step": 32840 }, { "epoch": 0.36, "learning_rate": 4.393860542711194e-05, "loss": 0.779, "step": 32845 }, { "epoch": 0.36, "learning_rate": 4.393768269997343e-05, "loss": 0.8002, "step": 32850 }, { "epoch": 0.36, "learning_rate": 4.3936759972834915e-05, "loss": 0.7842, "step": 32855 }, { "epoch": 0.36, "learning_rate": 4.39358372456964e-05, "loss": 0.7639, "step": 32860 }, { "epoch": 0.36, "learning_rate": 4.393491451855789e-05, "loss": 0.739, "step": 32865 }, { "epoch": 0.36, "learning_rate": 4.393399179141938e-05, "loss": 0.7999, "step": 32870 }, { "epoch": 0.36, "learning_rate": 4.3933069064280866e-05, "loss": 0.7644, "step": 32875 }, { "epoch": 0.36, "learning_rate": 4.3932146337142354e-05, "loss": 0.7539, "step": 32880 }, { "epoch": 0.36, "learning_rate": 4.393122361000384e-05, "loss": 0.7757, "step": 32885 }, { "epoch": 0.36, "learning_rate": 4.393030088286533e-05, "loss": 0.781, "step": 32890 }, { "epoch": 0.36, "learning_rate": 4.392937815572681e-05, "loss": 0.7121, "step": 32895 }, { "epoch": 0.36, "learning_rate": 4.3928455428588305e-05, "loss": 0.7955, "step": 32900 }, { "epoch": 0.36, "learning_rate": 4.392753270144979e-05, "loss": 0.808, "step": 32905 }, { "epoch": 0.36, "learning_rate": 4.392660997431128e-05, "loss": 0.7974, "step": 32910 }, { "epoch": 0.36, "learning_rate": 4.392568724717276e-05, "loss": 0.8259, "step": 32915 }, { "epoch": 0.36, "learning_rate": 4.3924764520034257e-05, "loss": 0.8335, "step": 32920 }, { "epoch": 0.36, "learning_rate": 4.3923841792895744e-05, "loss": 0.7366, "step": 32925 }, { "epoch": 0.36, "learning_rate": 4.3922919065757226e-05, "loss": 0.7811, "step": 32930 }, { "epoch": 0.36, "learning_rate": 4.392199633861871e-05, "loss": 0.7946, "step": 32935 }, { "epoch": 0.36, "learning_rate": 4.392107361148021e-05, "loss": 0.7957, "step": 32940 }, { "epoch": 0.36, "learning_rate": 4.3920150884341696e-05, "loss": 0.7544, "step": 32945 }, { "epoch": 0.36, "learning_rate": 4.391922815720318e-05, "loss": 0.6727, "step": 32950 }, { "epoch": 0.36, "learning_rate": 4.3918305430064665e-05, "loss": 0.7487, "step": 32955 }, { "epoch": 0.36, "learning_rate": 4.391738270292615e-05, "loss": 0.7588, "step": 32960 }, { "epoch": 0.37, "learning_rate": 4.391645997578764e-05, "loss": 0.8126, "step": 32965 }, { "epoch": 0.37, "learning_rate": 4.391553724864913e-05, "loss": 0.7315, "step": 32970 }, { "epoch": 0.37, "learning_rate": 4.3914614521510616e-05, "loss": 0.795, "step": 32975 }, { "epoch": 0.37, "learning_rate": 4.3913691794372104e-05, "loss": 0.7411, "step": 32980 }, { "epoch": 0.37, "learning_rate": 4.391276906723359e-05, "loss": 0.7736, "step": 32985 }, { "epoch": 0.37, "learning_rate": 4.391184634009508e-05, "loss": 0.7755, "step": 32990 }, { "epoch": 0.37, "learning_rate": 4.391092361295657e-05, "loss": 0.7142, "step": 32995 }, { "epoch": 0.37, "learning_rate": 4.3910000885818055e-05, "loss": 0.6997, "step": 33000 }, { "epoch": 0.37, "eval_loss": 0.7602348923683167, "eval_runtime": 69.1927, "eval_samples_per_second": 28.905, "eval_steps_per_second": 14.452, "step": 33000 }, { "epoch": 0.37, "learning_rate": 4.390907815867954e-05, "loss": 0.8092, "step": 33005 }, { "epoch": 0.37, "learning_rate": 4.390815543154103e-05, "loss": 0.7833, "step": 33010 }, { "epoch": 0.37, "learning_rate": 4.390723270440252e-05, "loss": 0.7435, "step": 33015 }, { "epoch": 0.37, "learning_rate": 4.390630997726401e-05, "loss": 0.7867, "step": 33020 }, { "epoch": 0.37, "learning_rate": 4.390538725012549e-05, "loss": 0.7827, "step": 33025 }, { "epoch": 0.37, "learning_rate": 4.390446452298698e-05, "loss": 0.8089, "step": 33030 }, { "epoch": 0.37, "learning_rate": 4.390354179584847e-05, "loss": 0.7683, "step": 33035 }, { "epoch": 0.37, "learning_rate": 4.390261906870995e-05, "loss": 0.7965, "step": 33040 }, { "epoch": 0.37, "learning_rate": 4.390169634157144e-05, "loss": 0.7916, "step": 33045 }, { "epoch": 0.37, "learning_rate": 4.3900773614432934e-05, "loss": 0.7475, "step": 33050 }, { "epoch": 0.37, "learning_rate": 4.389985088729442e-05, "loss": 0.7192, "step": 33055 }, { "epoch": 0.37, "learning_rate": 4.38989281601559e-05, "loss": 0.7128, "step": 33060 }, { "epoch": 0.37, "learning_rate": 4.389800543301739e-05, "loss": 0.7691, "step": 33065 }, { "epoch": 0.37, "learning_rate": 4.3897082705878885e-05, "loss": 0.7069, "step": 33070 }, { "epoch": 0.37, "learning_rate": 4.389615997874037e-05, "loss": 0.7894, "step": 33075 }, { "epoch": 0.37, "learning_rate": 4.3895237251601854e-05, "loss": 0.7389, "step": 33080 }, { "epoch": 0.37, "learning_rate": 4.389431452446334e-05, "loss": 0.7057, "step": 33085 }, { "epoch": 0.37, "learning_rate": 4.389339179732484e-05, "loss": 0.7672, "step": 33090 }, { "epoch": 0.37, "learning_rate": 4.389246907018632e-05, "loss": 0.7586, "step": 33095 }, { "epoch": 0.37, "learning_rate": 4.3891546343047806e-05, "loss": 0.6993, "step": 33100 }, { "epoch": 0.37, "learning_rate": 4.3890623615909293e-05, "loss": 0.7108, "step": 33105 }, { "epoch": 0.37, "learning_rate": 4.388970088877078e-05, "loss": 0.789, "step": 33110 }, { "epoch": 0.37, "learning_rate": 4.388877816163227e-05, "loss": 0.7607, "step": 33115 }, { "epoch": 0.37, "learning_rate": 4.388785543449376e-05, "loss": 0.8105, "step": 33120 }, { "epoch": 0.37, "learning_rate": 4.3886932707355245e-05, "loss": 0.7396, "step": 33125 }, { "epoch": 0.37, "learning_rate": 4.388600998021673e-05, "loss": 0.7697, "step": 33130 }, { "epoch": 0.37, "learning_rate": 4.388508725307822e-05, "loss": 0.7251, "step": 33135 }, { "epoch": 0.37, "learning_rate": 4.388416452593971e-05, "loss": 0.802, "step": 33140 }, { "epoch": 0.37, "learning_rate": 4.3883241798801196e-05, "loss": 0.7623, "step": 33145 }, { "epoch": 0.37, "learning_rate": 4.3882319071662684e-05, "loss": 0.8177, "step": 33150 }, { "epoch": 0.37, "learning_rate": 4.388139634452417e-05, "loss": 0.7421, "step": 33155 }, { "epoch": 0.37, "learning_rate": 4.388047361738566e-05, "loss": 0.7941, "step": 33160 }, { "epoch": 0.37, "learning_rate": 4.387955089024715e-05, "loss": 0.7837, "step": 33165 }, { "epoch": 0.37, "learning_rate": 4.387862816310863e-05, "loss": 0.8001, "step": 33170 }, { "epoch": 0.37, "learning_rate": 4.3877705435970117e-05, "loss": 0.7759, "step": 33175 }, { "epoch": 0.37, "learning_rate": 4.387678270883161e-05, "loss": 0.7603, "step": 33180 }, { "epoch": 0.37, "learning_rate": 4.38758599816931e-05, "loss": 0.7365, "step": 33185 }, { "epoch": 0.37, "learning_rate": 4.387493725455458e-05, "loss": 0.8141, "step": 33190 }, { "epoch": 0.37, "learning_rate": 4.387401452741607e-05, "loss": 0.7853, "step": 33195 }, { "epoch": 0.37, "learning_rate": 4.387309180027756e-05, "loss": 0.7903, "step": 33200 }, { "epoch": 0.37, "learning_rate": 4.3872169073139044e-05, "loss": 0.7327, "step": 33205 }, { "epoch": 0.37, "learning_rate": 4.387124634600053e-05, "loss": 0.7549, "step": 33210 }, { "epoch": 0.37, "learning_rate": 4.387032361886202e-05, "loss": 0.7657, "step": 33215 }, { "epoch": 0.37, "learning_rate": 4.3869400891723514e-05, "loss": 0.8057, "step": 33220 }, { "epoch": 0.37, "learning_rate": 4.3868478164584995e-05, "loss": 0.6974, "step": 33225 }, { "epoch": 0.37, "learning_rate": 4.386755543744648e-05, "loss": 0.7885, "step": 33230 }, { "epoch": 0.37, "learning_rate": 4.386663271030797e-05, "loss": 0.7849, "step": 33235 }, { "epoch": 0.37, "learning_rate": 4.386570998316946e-05, "loss": 0.8383, "step": 33240 }, { "epoch": 0.37, "learning_rate": 4.3864787256030946e-05, "loss": 0.8156, "step": 33245 }, { "epoch": 0.37, "learning_rate": 4.3863864528892434e-05, "loss": 0.8268, "step": 33250 }, { "epoch": 0.37, "learning_rate": 4.386294180175392e-05, "loss": 0.7382, "step": 33255 }, { "epoch": 0.37, "learning_rate": 4.386201907461541e-05, "loss": 0.7663, "step": 33260 }, { "epoch": 0.37, "learning_rate": 4.38610963474769e-05, "loss": 0.7569, "step": 33265 }, { "epoch": 0.37, "learning_rate": 4.3860173620338386e-05, "loss": 0.7689, "step": 33270 }, { "epoch": 0.37, "learning_rate": 4.3859250893199874e-05, "loss": 0.7569, "step": 33275 }, { "epoch": 0.37, "learning_rate": 4.3858328166061355e-05, "loss": 0.7712, "step": 33280 }, { "epoch": 0.37, "learning_rate": 4.385740543892285e-05, "loss": 0.7562, "step": 33285 }, { "epoch": 0.37, "learning_rate": 4.385648271178434e-05, "loss": 0.7632, "step": 33290 }, { "epoch": 0.37, "learning_rate": 4.3855559984645825e-05, "loss": 0.7233, "step": 33295 }, { "epoch": 0.37, "learning_rate": 4.3854637257507306e-05, "loss": 0.7543, "step": 33300 }, { "epoch": 0.37, "learning_rate": 4.38537145303688e-05, "loss": 0.768, "step": 33305 }, { "epoch": 0.37, "learning_rate": 4.385279180323029e-05, "loss": 0.8322, "step": 33310 }, { "epoch": 0.37, "learning_rate": 4.385186907609177e-05, "loss": 0.81, "step": 33315 }, { "epoch": 0.37, "learning_rate": 4.385094634895326e-05, "loss": 0.7383, "step": 33320 }, { "epoch": 0.37, "learning_rate": 4.3850023621814745e-05, "loss": 0.7416, "step": 33325 }, { "epoch": 0.37, "learning_rate": 4.384910089467624e-05, "loss": 0.7515, "step": 33330 }, { "epoch": 0.37, "learning_rate": 4.384817816753772e-05, "loss": 0.7418, "step": 33335 }, { "epoch": 0.37, "learning_rate": 4.384725544039921e-05, "loss": 0.7903, "step": 33340 }, { "epoch": 0.37, "learning_rate": 4.38463327132607e-05, "loss": 0.8361, "step": 33345 }, { "epoch": 0.37, "learning_rate": 4.384540998612219e-05, "loss": 0.7461, "step": 33350 }, { "epoch": 0.37, "learning_rate": 4.384448725898367e-05, "loss": 0.7404, "step": 33355 }, { "epoch": 0.37, "learning_rate": 4.384356453184516e-05, "loss": 0.7762, "step": 33360 }, { "epoch": 0.37, "learning_rate": 4.384264180470665e-05, "loss": 0.7733, "step": 33365 }, { "epoch": 0.37, "learning_rate": 4.3841719077568136e-05, "loss": 0.7727, "step": 33370 }, { "epoch": 0.37, "learning_rate": 4.3840796350429624e-05, "loss": 0.8245, "step": 33375 }, { "epoch": 0.37, "learning_rate": 4.383987362329111e-05, "loss": 0.7183, "step": 33380 }, { "epoch": 0.37, "learning_rate": 4.38389508961526e-05, "loss": 0.79, "step": 33385 }, { "epoch": 0.37, "learning_rate": 4.383802816901409e-05, "loss": 0.7904, "step": 33390 }, { "epoch": 0.37, "learning_rate": 4.3837105441875575e-05, "loss": 0.7629, "step": 33395 }, { "epoch": 0.37, "learning_rate": 4.383618271473706e-05, "loss": 0.7741, "step": 33400 }, { "epoch": 0.37, "learning_rate": 4.383525998759855e-05, "loss": 0.7372, "step": 33405 }, { "epoch": 0.37, "learning_rate": 4.383433726046003e-05, "loss": 0.7802, "step": 33410 }, { "epoch": 0.37, "learning_rate": 4.3833414533321526e-05, "loss": 0.7784, "step": 33415 }, { "epoch": 0.37, "learning_rate": 4.3832491806183014e-05, "loss": 0.7049, "step": 33420 }, { "epoch": 0.37, "learning_rate": 4.38315690790445e-05, "loss": 0.8157, "step": 33425 }, { "epoch": 0.37, "learning_rate": 4.383064635190598e-05, "loss": 0.8413, "step": 33430 }, { "epoch": 0.37, "learning_rate": 4.382972362476748e-05, "loss": 0.7869, "step": 33435 }, { "epoch": 0.37, "learning_rate": 4.3828800897628966e-05, "loss": 0.8499, "step": 33440 }, { "epoch": 0.37, "learning_rate": 4.382787817049045e-05, "loss": 0.7361, "step": 33445 }, { "epoch": 0.37, "learning_rate": 4.3826955443351935e-05, "loss": 0.7867, "step": 33450 }, { "epoch": 0.37, "learning_rate": 4.382603271621343e-05, "loss": 0.7743, "step": 33455 }, { "epoch": 0.37, "learning_rate": 4.382510998907492e-05, "loss": 0.7687, "step": 33460 }, { "epoch": 0.37, "learning_rate": 4.38241872619364e-05, "loss": 0.7325, "step": 33465 }, { "epoch": 0.37, "learning_rate": 4.3823264534797886e-05, "loss": 0.7925, "step": 33470 }, { "epoch": 0.37, "learning_rate": 4.3822341807659374e-05, "loss": 0.7691, "step": 33475 }, { "epoch": 0.37, "learning_rate": 4.382141908052086e-05, "loss": 0.7698, "step": 33480 }, { "epoch": 0.37, "learning_rate": 4.382049635338235e-05, "loss": 0.7603, "step": 33485 }, { "epoch": 0.37, "learning_rate": 4.381957362624384e-05, "loss": 0.756, "step": 33490 }, { "epoch": 0.37, "learning_rate": 4.3818650899105325e-05, "loss": 0.726, "step": 33495 }, { "epoch": 0.37, "learning_rate": 4.381772817196681e-05, "loss": 0.7482, "step": 33500 }, { "epoch": 0.37, "learning_rate": 4.38168054448283e-05, "loss": 0.7485, "step": 33505 }, { "epoch": 0.37, "learning_rate": 4.381588271768979e-05, "loss": 0.7224, "step": 33510 }, { "epoch": 0.37, "learning_rate": 4.381495999055128e-05, "loss": 0.7446, "step": 33515 }, { "epoch": 0.37, "learning_rate": 4.3814037263412765e-05, "loss": 0.6947, "step": 33520 }, { "epoch": 0.37, "learning_rate": 4.381311453627425e-05, "loss": 0.7617, "step": 33525 }, { "epoch": 0.37, "learning_rate": 4.381219180913574e-05, "loss": 0.7269, "step": 33530 }, { "epoch": 0.37, "learning_rate": 4.381126908199723e-05, "loss": 0.7989, "step": 33535 }, { "epoch": 0.37, "learning_rate": 4.3810346354858716e-05, "loss": 0.762, "step": 33540 }, { "epoch": 0.37, "learning_rate": 4.3809423627720204e-05, "loss": 0.8198, "step": 33545 }, { "epoch": 0.37, "learning_rate": 4.380850090058169e-05, "loss": 0.7832, "step": 33550 }, { "epoch": 0.37, "learning_rate": 4.380757817344317e-05, "loss": 0.7431, "step": 33555 }, { "epoch": 0.37, "learning_rate": 4.380665544630466e-05, "loss": 0.7825, "step": 33560 }, { "epoch": 0.37, "learning_rate": 4.3805732719166155e-05, "loss": 0.7237, "step": 33565 }, { "epoch": 0.37, "learning_rate": 4.380480999202764e-05, "loss": 0.8048, "step": 33570 }, { "epoch": 0.37, "learning_rate": 4.3803887264889124e-05, "loss": 0.7913, "step": 33575 }, { "epoch": 0.37, "learning_rate": 4.380296453775061e-05, "loss": 0.7538, "step": 33580 }, { "epoch": 0.37, "learning_rate": 4.3802041810612107e-05, "loss": 0.782, "step": 33585 }, { "epoch": 0.37, "learning_rate": 4.380111908347359e-05, "loss": 0.8012, "step": 33590 }, { "epoch": 0.37, "learning_rate": 4.3800196356335075e-05, "loss": 0.726, "step": 33595 }, { "epoch": 0.37, "learning_rate": 4.379927362919656e-05, "loss": 0.7554, "step": 33600 }, { "epoch": 0.37, "learning_rate": 4.379835090205806e-05, "loss": 0.8226, "step": 33605 }, { "epoch": 0.37, "learning_rate": 4.379742817491954e-05, "loss": 0.7462, "step": 33610 }, { "epoch": 0.37, "learning_rate": 4.379650544778103e-05, "loss": 0.7953, "step": 33615 }, { "epoch": 0.37, "learning_rate": 4.3795582720642515e-05, "loss": 0.6981, "step": 33620 }, { "epoch": 0.37, "learning_rate": 4.3794659993504e-05, "loss": 0.7453, "step": 33625 }, { "epoch": 0.37, "learning_rate": 4.379373726636549e-05, "loss": 0.768, "step": 33630 }, { "epoch": 0.37, "learning_rate": 4.379281453922698e-05, "loss": 0.724, "step": 33635 }, { "epoch": 0.37, "learning_rate": 4.3791891812088466e-05, "loss": 0.8075, "step": 33640 }, { "epoch": 0.37, "learning_rate": 4.3790969084949954e-05, "loss": 0.7564, "step": 33645 }, { "epoch": 0.37, "learning_rate": 4.379004635781144e-05, "loss": 0.8117, "step": 33650 }, { "epoch": 0.37, "learning_rate": 4.378912363067293e-05, "loss": 0.7266, "step": 33655 }, { "epoch": 0.37, "learning_rate": 4.378820090353442e-05, "loss": 0.7733, "step": 33660 }, { "epoch": 0.37, "learning_rate": 4.37872781763959e-05, "loss": 0.7994, "step": 33665 }, { "epoch": 0.37, "learning_rate": 4.378635544925739e-05, "loss": 0.8043, "step": 33670 }, { "epoch": 0.37, "learning_rate": 4.378543272211888e-05, "loss": 0.7123, "step": 33675 }, { "epoch": 0.37, "learning_rate": 4.378450999498037e-05, "loss": 0.7328, "step": 33680 }, { "epoch": 0.37, "learning_rate": 4.378358726784185e-05, "loss": 0.7942, "step": 33685 }, { "epoch": 0.37, "learning_rate": 4.3782664540703345e-05, "loss": 0.7443, "step": 33690 }, { "epoch": 0.37, "learning_rate": 4.378174181356483e-05, "loss": 0.7483, "step": 33695 }, { "epoch": 0.37, "learning_rate": 4.3780819086426314e-05, "loss": 0.8054, "step": 33700 }, { "epoch": 0.37, "learning_rate": 4.37798963592878e-05, "loss": 0.7665, "step": 33705 }, { "epoch": 0.37, "learning_rate": 4.377897363214929e-05, "loss": 0.7984, "step": 33710 }, { "epoch": 0.37, "learning_rate": 4.3778050905010784e-05, "loss": 0.786, "step": 33715 }, { "epoch": 0.37, "learning_rate": 4.3777128177872265e-05, "loss": 0.7965, "step": 33720 }, { "epoch": 0.37, "learning_rate": 4.377620545073375e-05, "loss": 0.8048, "step": 33725 }, { "epoch": 0.37, "learning_rate": 4.377528272359524e-05, "loss": 0.8028, "step": 33730 }, { "epoch": 0.37, "learning_rate": 4.3774359996456735e-05, "loss": 0.7739, "step": 33735 }, { "epoch": 0.37, "learning_rate": 4.3773437269318216e-05, "loss": 0.774, "step": 33740 }, { "epoch": 0.37, "learning_rate": 4.3772514542179704e-05, "loss": 0.7351, "step": 33745 }, { "epoch": 0.37, "learning_rate": 4.377159181504119e-05, "loss": 0.7846, "step": 33750 }, { "epoch": 0.37, "learning_rate": 4.377066908790268e-05, "loss": 0.757, "step": 33755 }, { "epoch": 0.37, "learning_rate": 4.376974636076417e-05, "loss": 0.7768, "step": 33760 }, { "epoch": 0.37, "learning_rate": 4.3768823633625656e-05, "loss": 0.7542, "step": 33765 }, { "epoch": 0.37, "learning_rate": 4.3767900906487143e-05, "loss": 0.7636, "step": 33770 }, { "epoch": 0.37, "learning_rate": 4.376697817934863e-05, "loss": 0.7714, "step": 33775 }, { "epoch": 0.37, "learning_rate": 4.376605545221012e-05, "loss": 0.7747, "step": 33780 }, { "epoch": 0.37, "learning_rate": 4.376513272507161e-05, "loss": 0.807, "step": 33785 }, { "epoch": 0.37, "learning_rate": 4.3764209997933095e-05, "loss": 0.7311, "step": 33790 }, { "epoch": 0.37, "learning_rate": 4.3763287270794576e-05, "loss": 0.7169, "step": 33795 }, { "epoch": 0.37, "learning_rate": 4.376236454365607e-05, "loss": 0.7391, "step": 33800 }, { "epoch": 0.37, "learning_rate": 4.376144181651756e-05, "loss": 0.8057, "step": 33805 }, { "epoch": 0.37, "learning_rate": 4.3760519089379046e-05, "loss": 0.7402, "step": 33810 }, { "epoch": 0.37, "learning_rate": 4.375959636224053e-05, "loss": 0.7382, "step": 33815 }, { "epoch": 0.37, "learning_rate": 4.375867363510202e-05, "loss": 0.7665, "step": 33820 }, { "epoch": 0.37, "learning_rate": 4.375775090796351e-05, "loss": 0.724, "step": 33825 }, { "epoch": 0.37, "learning_rate": 4.375682818082499e-05, "loss": 0.8017, "step": 33830 }, { "epoch": 0.37, "learning_rate": 4.375590545368648e-05, "loss": 0.8074, "step": 33835 }, { "epoch": 0.37, "learning_rate": 4.375498272654797e-05, "loss": 0.8101, "step": 33840 }, { "epoch": 0.37, "learning_rate": 4.375405999940946e-05, "loss": 0.7863, "step": 33845 }, { "epoch": 0.37, "learning_rate": 4.375313727227094e-05, "loss": 0.7746, "step": 33850 }, { "epoch": 0.37, "learning_rate": 4.375221454513243e-05, "loss": 0.7388, "step": 33855 }, { "epoch": 0.37, "learning_rate": 4.375129181799392e-05, "loss": 0.7838, "step": 33860 }, { "epoch": 0.37, "learning_rate": 4.3750369090855406e-05, "loss": 0.8142, "step": 33865 }, { "epoch": 0.38, "learning_rate": 4.3749446363716894e-05, "loss": 0.8212, "step": 33870 }, { "epoch": 0.38, "learning_rate": 4.374852363657838e-05, "loss": 0.7896, "step": 33875 }, { "epoch": 0.38, "learning_rate": 4.374760090943987e-05, "loss": 0.698, "step": 33880 }, { "epoch": 0.38, "learning_rate": 4.374667818230136e-05, "loss": 0.8099, "step": 33885 }, { "epoch": 0.38, "learning_rate": 4.3745755455162845e-05, "loss": 0.7786, "step": 33890 }, { "epoch": 0.38, "learning_rate": 4.374483272802433e-05, "loss": 0.7686, "step": 33895 }, { "epoch": 0.38, "learning_rate": 4.374391000088582e-05, "loss": 0.7797, "step": 33900 }, { "epoch": 0.38, "learning_rate": 4.374298727374731e-05, "loss": 0.792, "step": 33905 }, { "epoch": 0.38, "learning_rate": 4.3742064546608796e-05, "loss": 0.7665, "step": 33910 }, { "epoch": 0.38, "learning_rate": 4.3741141819470284e-05, "loss": 0.7088, "step": 33915 }, { "epoch": 0.38, "learning_rate": 4.374021909233177e-05, "loss": 0.762, "step": 33920 }, { "epoch": 0.38, "learning_rate": 4.373929636519326e-05, "loss": 0.6889, "step": 33925 }, { "epoch": 0.38, "learning_rate": 4.373837363805475e-05, "loss": 0.7804, "step": 33930 }, { "epoch": 0.38, "learning_rate": 4.3737450910916236e-05, "loss": 0.8552, "step": 33935 }, { "epoch": 0.38, "learning_rate": 4.373652818377772e-05, "loss": 0.768, "step": 33940 }, { "epoch": 0.38, "learning_rate": 4.3735605456639205e-05, "loss": 0.8192, "step": 33945 }, { "epoch": 0.38, "learning_rate": 4.37346827295007e-05, "loss": 0.8228, "step": 33950 }, { "epoch": 0.38, "learning_rate": 4.373376000236219e-05, "loss": 0.7594, "step": 33955 }, { "epoch": 0.38, "learning_rate": 4.373283727522367e-05, "loss": 0.7197, "step": 33960 }, { "epoch": 0.38, "learning_rate": 4.3731914548085156e-05, "loss": 0.7754, "step": 33965 }, { "epoch": 0.38, "learning_rate": 4.373099182094665e-05, "loss": 0.7629, "step": 33970 }, { "epoch": 0.38, "learning_rate": 4.373006909380813e-05, "loss": 0.7666, "step": 33975 }, { "epoch": 0.38, "learning_rate": 4.372914636666962e-05, "loss": 0.8417, "step": 33980 }, { "epoch": 0.38, "learning_rate": 4.372822363953111e-05, "loss": 0.808, "step": 33985 }, { "epoch": 0.38, "learning_rate": 4.37273009123926e-05, "loss": 0.7157, "step": 33990 }, { "epoch": 0.38, "learning_rate": 4.372637818525408e-05, "loss": 0.7138, "step": 33995 }, { "epoch": 0.38, "learning_rate": 4.372545545811557e-05, "loss": 0.8116, "step": 34000 }, { "epoch": 0.38, "eval_loss": 0.7566010355949402, "eval_runtime": 69.1842, "eval_samples_per_second": 28.908, "eval_steps_per_second": 14.454, "step": 34000 }, { "epoch": 0.38, "learning_rate": 4.372453273097706e-05, "loss": 0.7458, "step": 34005 }, { "epoch": 0.38, "learning_rate": 4.3723610003838547e-05, "loss": 0.7886, "step": 34010 }, { "epoch": 0.38, "learning_rate": 4.3722687276700034e-05, "loss": 0.7132, "step": 34015 }, { "epoch": 0.38, "learning_rate": 4.372176454956152e-05, "loss": 0.7933, "step": 34020 }, { "epoch": 0.38, "learning_rate": 4.372084182242301e-05, "loss": 0.8368, "step": 34025 }, { "epoch": 0.38, "learning_rate": 4.37199190952845e-05, "loss": 0.7589, "step": 34030 }, { "epoch": 0.38, "learning_rate": 4.3718996368145986e-05, "loss": 0.7838, "step": 34035 }, { "epoch": 0.38, "learning_rate": 4.3718073641007474e-05, "loss": 0.7732, "step": 34040 }, { "epoch": 0.38, "learning_rate": 4.371715091386896e-05, "loss": 0.7991, "step": 34045 }, { "epoch": 0.38, "learning_rate": 4.371622818673044e-05, "loss": 0.7862, "step": 34050 }, { "epoch": 0.38, "learning_rate": 4.371530545959194e-05, "loss": 0.7546, "step": 34055 }, { "epoch": 0.38, "learning_rate": 4.3714382732453425e-05, "loss": 0.807, "step": 34060 }, { "epoch": 0.38, "learning_rate": 4.371346000531491e-05, "loss": 0.7544, "step": 34065 }, { "epoch": 0.38, "learning_rate": 4.3712537278176394e-05, "loss": 0.7265, "step": 34070 }, { "epoch": 0.38, "learning_rate": 4.371161455103789e-05, "loss": 0.758, "step": 34075 }, { "epoch": 0.38, "learning_rate": 4.3710691823899376e-05, "loss": 0.7864, "step": 34080 }, { "epoch": 0.38, "learning_rate": 4.370976909676086e-05, "loss": 0.7805, "step": 34085 }, { "epoch": 0.38, "learning_rate": 4.3708846369622345e-05, "loss": 0.7439, "step": 34090 }, { "epoch": 0.38, "learning_rate": 4.370792364248383e-05, "loss": 0.7588, "step": 34095 }, { "epoch": 0.38, "learning_rate": 4.370700091534533e-05, "loss": 0.7453, "step": 34100 }, { "epoch": 0.38, "learning_rate": 4.370607818820681e-05, "loss": 0.7959, "step": 34105 }, { "epoch": 0.38, "learning_rate": 4.37051554610683e-05, "loss": 0.7256, "step": 34110 }, { "epoch": 0.38, "learning_rate": 4.3704232733929785e-05, "loss": 0.758, "step": 34115 }, { "epoch": 0.38, "learning_rate": 4.370331000679128e-05, "loss": 0.7801, "step": 34120 }, { "epoch": 0.38, "learning_rate": 4.370238727965276e-05, "loss": 0.8352, "step": 34125 }, { "epoch": 0.38, "learning_rate": 4.370146455251425e-05, "loss": 0.7878, "step": 34130 }, { "epoch": 0.38, "learning_rate": 4.3700541825375736e-05, "loss": 0.7739, "step": 34135 }, { "epoch": 0.38, "learning_rate": 4.3699619098237224e-05, "loss": 0.8254, "step": 34140 }, { "epoch": 0.38, "learning_rate": 4.369869637109871e-05, "loss": 0.8195, "step": 34145 }, { "epoch": 0.38, "learning_rate": 4.36977736439602e-05, "loss": 0.7452, "step": 34150 }, { "epoch": 0.38, "learning_rate": 4.369685091682169e-05, "loss": 0.8099, "step": 34155 }, { "epoch": 0.38, "learning_rate": 4.369592818968317e-05, "loss": 0.7802, "step": 34160 }, { "epoch": 0.38, "learning_rate": 4.369500546254466e-05, "loss": 0.7823, "step": 34165 }, { "epoch": 0.38, "learning_rate": 4.369408273540615e-05, "loss": 0.7514, "step": 34170 }, { "epoch": 0.38, "learning_rate": 4.369316000826764e-05, "loss": 0.7491, "step": 34175 }, { "epoch": 0.38, "learning_rate": 4.369223728112912e-05, "loss": 0.7476, "step": 34180 }, { "epoch": 0.38, "learning_rate": 4.3691314553990615e-05, "loss": 0.8067, "step": 34185 }, { "epoch": 0.38, "learning_rate": 4.36903918268521e-05, "loss": 0.8052, "step": 34190 }, { "epoch": 0.38, "learning_rate": 4.368946909971359e-05, "loss": 0.7857, "step": 34195 }, { "epoch": 0.38, "learning_rate": 4.368854637257507e-05, "loss": 0.7391, "step": 34200 }, { "epoch": 0.38, "learning_rate": 4.3687623645436566e-05, "loss": 0.7585, "step": 34205 }, { "epoch": 0.38, "learning_rate": 4.3686700918298054e-05, "loss": 0.6878, "step": 34210 }, { "epoch": 0.38, "learning_rate": 4.3685778191159535e-05, "loss": 0.8075, "step": 34215 }, { "epoch": 0.38, "learning_rate": 4.368485546402102e-05, "loss": 0.7526, "step": 34220 }, { "epoch": 0.38, "learning_rate": 4.368393273688252e-05, "loss": 0.7469, "step": 34225 }, { "epoch": 0.38, "learning_rate": 4.3683010009744005e-05, "loss": 0.7501, "step": 34230 }, { "epoch": 0.38, "learning_rate": 4.3682087282605486e-05, "loss": 0.734, "step": 34235 }, { "epoch": 0.38, "learning_rate": 4.3681164555466974e-05, "loss": 0.7575, "step": 34240 }, { "epoch": 0.38, "learning_rate": 4.368024182832846e-05, "loss": 0.7677, "step": 34245 }, { "epoch": 0.38, "learning_rate": 4.367931910118995e-05, "loss": 0.7723, "step": 34250 }, { "epoch": 0.38, "learning_rate": 4.367839637405144e-05, "loss": 0.7608, "step": 34255 }, { "epoch": 0.38, "learning_rate": 4.3677473646912925e-05, "loss": 0.7403, "step": 34260 }, { "epoch": 0.38, "learning_rate": 4.367655091977441e-05, "loss": 0.79, "step": 34265 }, { "epoch": 0.38, "learning_rate": 4.36756281926359e-05, "loss": 0.7466, "step": 34270 }, { "epoch": 0.38, "learning_rate": 4.367470546549739e-05, "loss": 0.7336, "step": 34275 }, { "epoch": 0.38, "learning_rate": 4.367378273835888e-05, "loss": 0.7492, "step": 34280 }, { "epoch": 0.38, "learning_rate": 4.3672860011220365e-05, "loss": 0.7751, "step": 34285 }, { "epoch": 0.38, "learning_rate": 4.367193728408185e-05, "loss": 0.7402, "step": 34290 }, { "epoch": 0.38, "learning_rate": 4.367101455694334e-05, "loss": 0.8176, "step": 34295 }, { "epoch": 0.38, "learning_rate": 4.367009182980483e-05, "loss": 0.7512, "step": 34300 }, { "epoch": 0.38, "learning_rate": 4.3669169102666316e-05, "loss": 0.6853, "step": 34305 }, { "epoch": 0.38, "learning_rate": 4.36682463755278e-05, "loss": 0.701, "step": 34310 }, { "epoch": 0.38, "learning_rate": 4.366732364838929e-05, "loss": 0.8345, "step": 34315 }, { "epoch": 0.38, "learning_rate": 4.366640092125078e-05, "loss": 0.7585, "step": 34320 }, { "epoch": 0.38, "learning_rate": 4.366547819411226e-05, "loss": 0.8247, "step": 34325 }, { "epoch": 0.38, "learning_rate": 4.366455546697375e-05, "loss": 0.7488, "step": 34330 }, { "epoch": 0.38, "learning_rate": 4.366363273983524e-05, "loss": 0.7578, "step": 34335 }, { "epoch": 0.38, "learning_rate": 4.366271001269673e-05, "loss": 0.7729, "step": 34340 }, { "epoch": 0.38, "learning_rate": 4.366178728555821e-05, "loss": 0.7573, "step": 34345 }, { "epoch": 0.38, "learning_rate": 4.36608645584197e-05, "loss": 0.7413, "step": 34350 }, { "epoch": 0.38, "learning_rate": 4.3659941831281195e-05, "loss": 0.7546, "step": 34355 }, { "epoch": 0.38, "learning_rate": 4.3659019104142676e-05, "loss": 0.8045, "step": 34360 }, { "epoch": 0.38, "learning_rate": 4.3658096377004164e-05, "loss": 0.7758, "step": 34365 }, { "epoch": 0.38, "learning_rate": 4.365717364986565e-05, "loss": 0.8362, "step": 34370 }, { "epoch": 0.38, "learning_rate": 4.3656250922727146e-05, "loss": 0.7725, "step": 34375 }, { "epoch": 0.38, "learning_rate": 4.365532819558863e-05, "loss": 0.8244, "step": 34380 }, { "epoch": 0.38, "learning_rate": 4.3654405468450115e-05, "loss": 0.7688, "step": 34385 }, { "epoch": 0.38, "learning_rate": 4.36534827413116e-05, "loss": 0.7787, "step": 34390 }, { "epoch": 0.38, "learning_rate": 4.365256001417309e-05, "loss": 0.742, "step": 34395 }, { "epoch": 0.38, "learning_rate": 4.365163728703458e-05, "loss": 0.7709, "step": 34400 }, { "epoch": 0.38, "learning_rate": 4.3650714559896066e-05, "loss": 0.7831, "step": 34405 }, { "epoch": 0.38, "learning_rate": 4.3649791832757554e-05, "loss": 0.7568, "step": 34410 }, { "epoch": 0.38, "learning_rate": 4.364886910561904e-05, "loss": 0.7633, "step": 34415 }, { "epoch": 0.38, "learning_rate": 4.364794637848053e-05, "loss": 0.7543, "step": 34420 }, { "epoch": 0.38, "learning_rate": 4.364702365134202e-05, "loss": 0.7516, "step": 34425 }, { "epoch": 0.38, "learning_rate": 4.3646100924203506e-05, "loss": 0.7484, "step": 34430 }, { "epoch": 0.38, "learning_rate": 4.364517819706499e-05, "loss": 0.7612, "step": 34435 }, { "epoch": 0.38, "learning_rate": 4.364425546992648e-05, "loss": 0.7944, "step": 34440 }, { "epoch": 0.38, "learning_rate": 4.364333274278797e-05, "loss": 0.7566, "step": 34445 }, { "epoch": 0.38, "learning_rate": 4.364241001564946e-05, "loss": 0.7643, "step": 34450 }, { "epoch": 0.38, "learning_rate": 4.364148728851094e-05, "loss": 0.8433, "step": 34455 }, { "epoch": 0.38, "learning_rate": 4.3640564561372426e-05, "loss": 0.7748, "step": 34460 }, { "epoch": 0.38, "learning_rate": 4.363964183423392e-05, "loss": 0.8353, "step": 34465 }, { "epoch": 0.38, "learning_rate": 4.36387191070954e-05, "loss": 0.7574, "step": 34470 }, { "epoch": 0.38, "learning_rate": 4.363779637995689e-05, "loss": 0.7923, "step": 34475 }, { "epoch": 0.38, "learning_rate": 4.363687365281838e-05, "loss": 0.7322, "step": 34480 }, { "epoch": 0.38, "learning_rate": 4.363595092567987e-05, "loss": 0.7977, "step": 34485 }, { "epoch": 0.38, "learning_rate": 4.363502819854135e-05, "loss": 0.8041, "step": 34490 }, { "epoch": 0.38, "learning_rate": 4.363410547140284e-05, "loss": 0.7544, "step": 34495 }, { "epoch": 0.38, "learning_rate": 4.363318274426433e-05, "loss": 0.8, "step": 34500 }, { "epoch": 0.38, "learning_rate": 4.363226001712582e-05, "loss": 0.7446, "step": 34505 }, { "epoch": 0.38, "learning_rate": 4.3631337289987304e-05, "loss": 0.7974, "step": 34510 }, { "epoch": 0.38, "learning_rate": 4.363041456284879e-05, "loss": 0.7176, "step": 34515 }, { "epoch": 0.38, "learning_rate": 4.362949183571028e-05, "loss": 0.7828, "step": 34520 }, { "epoch": 0.38, "learning_rate": 4.362856910857177e-05, "loss": 0.7304, "step": 34525 }, { "epoch": 0.38, "learning_rate": 4.3627646381433256e-05, "loss": 0.7287, "step": 34530 }, { "epoch": 0.38, "learning_rate": 4.3626723654294744e-05, "loss": 0.7984, "step": 34535 }, { "epoch": 0.38, "learning_rate": 4.362580092715623e-05, "loss": 0.7727, "step": 34540 }, { "epoch": 0.38, "learning_rate": 4.362487820001771e-05, "loss": 0.7501, "step": 34545 }, { "epoch": 0.38, "learning_rate": 4.362395547287921e-05, "loss": 0.7782, "step": 34550 }, { "epoch": 0.38, "learning_rate": 4.3623032745740695e-05, "loss": 0.7768, "step": 34555 }, { "epoch": 0.38, "learning_rate": 4.362211001860218e-05, "loss": 0.7299, "step": 34560 }, { "epoch": 0.38, "learning_rate": 4.3621187291463664e-05, "loss": 0.7767, "step": 34565 }, { "epoch": 0.38, "learning_rate": 4.362026456432516e-05, "loss": 0.7496, "step": 34570 }, { "epoch": 0.38, "learning_rate": 4.3619341837186646e-05, "loss": 0.7861, "step": 34575 }, { "epoch": 0.38, "learning_rate": 4.3618419110048134e-05, "loss": 0.7382, "step": 34580 }, { "epoch": 0.38, "learning_rate": 4.3617496382909615e-05, "loss": 0.8093, "step": 34585 }, { "epoch": 0.38, "learning_rate": 4.361657365577111e-05, "loss": 0.7339, "step": 34590 }, { "epoch": 0.38, "learning_rate": 4.36156509286326e-05, "loss": 0.7212, "step": 34595 }, { "epoch": 0.38, "learning_rate": 4.361472820149408e-05, "loss": 0.7778, "step": 34600 }, { "epoch": 0.38, "learning_rate": 4.361380547435557e-05, "loss": 0.7417, "step": 34605 }, { "epoch": 0.38, "learning_rate": 4.361288274721706e-05, "loss": 0.7304, "step": 34610 }, { "epoch": 0.38, "learning_rate": 4.361196002007855e-05, "loss": 0.6911, "step": 34615 }, { "epoch": 0.38, "learning_rate": 4.361103729294003e-05, "loss": 0.7564, "step": 34620 }, { "epoch": 0.38, "learning_rate": 4.361011456580152e-05, "loss": 0.7357, "step": 34625 }, { "epoch": 0.38, "learning_rate": 4.3609191838663006e-05, "loss": 0.7355, "step": 34630 }, { "epoch": 0.38, "learning_rate": 4.3608269111524494e-05, "loss": 0.7711, "step": 34635 }, { "epoch": 0.38, "learning_rate": 4.360734638438598e-05, "loss": 0.7218, "step": 34640 }, { "epoch": 0.38, "learning_rate": 4.360642365724747e-05, "loss": 0.716, "step": 34645 }, { "epoch": 0.38, "learning_rate": 4.360550093010896e-05, "loss": 0.7654, "step": 34650 }, { "epoch": 0.38, "learning_rate": 4.3604578202970445e-05, "loss": 0.7694, "step": 34655 }, { "epoch": 0.38, "learning_rate": 4.360365547583193e-05, "loss": 0.7815, "step": 34660 }, { "epoch": 0.38, "learning_rate": 4.360273274869342e-05, "loss": 0.7336, "step": 34665 }, { "epoch": 0.38, "learning_rate": 4.360181002155491e-05, "loss": 0.7357, "step": 34670 }, { "epoch": 0.38, "learning_rate": 4.3600887294416397e-05, "loss": 0.7573, "step": 34675 }, { "epoch": 0.38, "learning_rate": 4.3599964567277884e-05, "loss": 0.7625, "step": 34680 }, { "epoch": 0.38, "learning_rate": 4.359904184013937e-05, "loss": 0.7566, "step": 34685 }, { "epoch": 0.38, "learning_rate": 4.359811911300086e-05, "loss": 0.7254, "step": 34690 }, { "epoch": 0.38, "learning_rate": 4.359719638586234e-05, "loss": 0.7118, "step": 34695 }, { "epoch": 0.38, "learning_rate": 4.3596273658723836e-05, "loss": 0.741, "step": 34700 }, { "epoch": 0.38, "learning_rate": 4.3595350931585324e-05, "loss": 0.7324, "step": 34705 }, { "epoch": 0.38, "learning_rate": 4.3594428204446805e-05, "loss": 0.7799, "step": 34710 }, { "epoch": 0.38, "learning_rate": 4.359350547730829e-05, "loss": 0.7686, "step": 34715 }, { "epoch": 0.38, "learning_rate": 4.359258275016979e-05, "loss": 0.8303, "step": 34720 }, { "epoch": 0.38, "learning_rate": 4.3591660023031275e-05, "loss": 0.7772, "step": 34725 }, { "epoch": 0.38, "learning_rate": 4.3590737295892756e-05, "loss": 0.7982, "step": 34730 }, { "epoch": 0.38, "learning_rate": 4.3589814568754244e-05, "loss": 0.7704, "step": 34735 }, { "epoch": 0.38, "learning_rate": 4.358889184161574e-05, "loss": 0.7889, "step": 34740 }, { "epoch": 0.38, "learning_rate": 4.358796911447722e-05, "loss": 0.8063, "step": 34745 }, { "epoch": 0.38, "learning_rate": 4.358704638733871e-05, "loss": 0.7853, "step": 34750 }, { "epoch": 0.38, "learning_rate": 4.3586123660200195e-05, "loss": 0.7245, "step": 34755 }, { "epoch": 0.38, "learning_rate": 4.358520093306169e-05, "loss": 0.6749, "step": 34760 }, { "epoch": 0.38, "learning_rate": 4.358427820592317e-05, "loss": 0.756, "step": 34765 }, { "epoch": 0.38, "learning_rate": 4.358335547878466e-05, "loss": 0.7805, "step": 34770 }, { "epoch": 0.39, "learning_rate": 4.358243275164615e-05, "loss": 0.735, "step": 34775 }, { "epoch": 0.39, "learning_rate": 4.3581510024507635e-05, "loss": 0.8361, "step": 34780 }, { "epoch": 0.39, "learning_rate": 4.358058729736912e-05, "loss": 0.7588, "step": 34785 }, { "epoch": 0.39, "learning_rate": 4.357966457023061e-05, "loss": 0.7812, "step": 34790 }, { "epoch": 0.39, "learning_rate": 4.35787418430921e-05, "loss": 0.7557, "step": 34795 }, { "epoch": 0.39, "learning_rate": 4.3577819115953586e-05, "loss": 0.7709, "step": 34800 }, { "epoch": 0.39, "learning_rate": 4.3576896388815074e-05, "loss": 0.7343, "step": 34805 }, { "epoch": 0.39, "learning_rate": 4.357597366167656e-05, "loss": 0.7691, "step": 34810 }, { "epoch": 0.39, "learning_rate": 4.357505093453805e-05, "loss": 0.7189, "step": 34815 }, { "epoch": 0.39, "learning_rate": 4.357412820739953e-05, "loss": 0.7855, "step": 34820 }, { "epoch": 0.39, "learning_rate": 4.3573205480261025e-05, "loss": 0.8121, "step": 34825 }, { "epoch": 0.39, "learning_rate": 4.357228275312251e-05, "loss": 0.8246, "step": 34830 }, { "epoch": 0.39, "learning_rate": 4.3571360025984e-05, "loss": 0.733, "step": 34835 }, { "epoch": 0.39, "learning_rate": 4.357043729884548e-05, "loss": 0.7632, "step": 34840 }, { "epoch": 0.39, "learning_rate": 4.356951457170697e-05, "loss": 0.75, "step": 34845 }, { "epoch": 0.39, "learning_rate": 4.3568591844568465e-05, "loss": 0.7318, "step": 34850 }, { "epoch": 0.39, "learning_rate": 4.3567669117429946e-05, "loss": 0.76, "step": 34855 }, { "epoch": 0.39, "learning_rate": 4.3566746390291433e-05, "loss": 0.7429, "step": 34860 }, { "epoch": 0.39, "learning_rate": 4.356582366315292e-05, "loss": 0.8023, "step": 34865 }, { "epoch": 0.39, "learning_rate": 4.3564900936014416e-05, "loss": 0.7534, "step": 34870 }, { "epoch": 0.39, "learning_rate": 4.35639782088759e-05, "loss": 0.7759, "step": 34875 }, { "epoch": 0.39, "learning_rate": 4.3563055481737385e-05, "loss": 0.7973, "step": 34880 }, { "epoch": 0.39, "learning_rate": 4.356213275459887e-05, "loss": 0.7635, "step": 34885 }, { "epoch": 0.39, "learning_rate": 4.356121002746037e-05, "loss": 0.8318, "step": 34890 }, { "epoch": 0.39, "learning_rate": 4.356028730032185e-05, "loss": 0.6561, "step": 34895 }, { "epoch": 0.39, "learning_rate": 4.3559364573183336e-05, "loss": 0.7836, "step": 34900 }, { "epoch": 0.39, "learning_rate": 4.3558441846044824e-05, "loss": 0.7475, "step": 34905 }, { "epoch": 0.39, "learning_rate": 4.355751911890631e-05, "loss": 0.7729, "step": 34910 }, { "epoch": 0.39, "learning_rate": 4.35565963917678e-05, "loss": 0.7924, "step": 34915 }, { "epoch": 0.39, "learning_rate": 4.355567366462929e-05, "loss": 0.7741, "step": 34920 }, { "epoch": 0.39, "learning_rate": 4.3554750937490775e-05, "loss": 0.7674, "step": 34925 }, { "epoch": 0.39, "learning_rate": 4.3553828210352257e-05, "loss": 0.8423, "step": 34930 }, { "epoch": 0.39, "learning_rate": 4.355290548321375e-05, "loss": 0.7911, "step": 34935 }, { "epoch": 0.39, "learning_rate": 4.355198275607524e-05, "loss": 0.7948, "step": 34940 }, { "epoch": 0.39, "learning_rate": 4.355106002893673e-05, "loss": 0.8154, "step": 34945 }, { "epoch": 0.39, "learning_rate": 4.355013730179821e-05, "loss": 0.7951, "step": 34950 }, { "epoch": 0.39, "learning_rate": 4.35492145746597e-05, "loss": 0.7633, "step": 34955 }, { "epoch": 0.39, "learning_rate": 4.354829184752119e-05, "loss": 0.7527, "step": 34960 }, { "epoch": 0.39, "learning_rate": 4.354736912038268e-05, "loss": 0.7288, "step": 34965 }, { "epoch": 0.39, "learning_rate": 4.354644639324416e-05, "loss": 0.7242, "step": 34970 }, { "epoch": 0.39, "learning_rate": 4.3545523666105654e-05, "loss": 0.719, "step": 34975 }, { "epoch": 0.39, "learning_rate": 4.354460093896714e-05, "loss": 0.7976, "step": 34980 }, { "epoch": 0.39, "learning_rate": 4.354367821182862e-05, "loss": 0.7288, "step": 34985 }, { "epoch": 0.39, "learning_rate": 4.354275548469011e-05, "loss": 0.7356, "step": 34990 }, { "epoch": 0.39, "learning_rate": 4.35418327575516e-05, "loss": 0.7377, "step": 34995 }, { "epoch": 0.39, "learning_rate": 4.354091003041309e-05, "loss": 0.7963, "step": 35000 }, { "epoch": 0.39, "eval_loss": 0.7244875431060791, "eval_runtime": 69.257, "eval_samples_per_second": 28.878, "eval_steps_per_second": 14.439, "step": 35000 }, { "epoch": 0.39, "learning_rate": 4.3539987303274574e-05, "loss": 0.7909, "step": 35005 }, { "epoch": 0.39, "learning_rate": 4.353906457613606e-05, "loss": 0.778, "step": 35010 }, { "epoch": 0.39, "learning_rate": 4.353814184899755e-05, "loss": 0.7234, "step": 35015 }, { "epoch": 0.39, "learning_rate": 4.353721912185904e-05, "loss": 0.7844, "step": 35020 }, { "epoch": 0.39, "learning_rate": 4.3536296394720526e-05, "loss": 0.7743, "step": 35025 }, { "epoch": 0.39, "learning_rate": 4.3535373667582014e-05, "loss": 0.7372, "step": 35030 }, { "epoch": 0.39, "learning_rate": 4.35344509404435e-05, "loss": 0.7316, "step": 35035 }, { "epoch": 0.39, "learning_rate": 4.353352821330499e-05, "loss": 0.7955, "step": 35040 }, { "epoch": 0.39, "learning_rate": 4.353260548616648e-05, "loss": 0.6992, "step": 35045 }, { "epoch": 0.39, "learning_rate": 4.3531682759027965e-05, "loss": 0.7674, "step": 35050 }, { "epoch": 0.39, "learning_rate": 4.353076003188945e-05, "loss": 0.7768, "step": 35055 }, { "epoch": 0.39, "learning_rate": 4.352983730475094e-05, "loss": 0.8242, "step": 35060 }, { "epoch": 0.39, "learning_rate": 4.352891457761243e-05, "loss": 0.6814, "step": 35065 }, { "epoch": 0.39, "learning_rate": 4.3527991850473916e-05, "loss": 0.7174, "step": 35070 }, { "epoch": 0.39, "learning_rate": 4.3527069123335404e-05, "loss": 0.7709, "step": 35075 }, { "epoch": 0.39, "learning_rate": 4.3526146396196885e-05, "loss": 0.7984, "step": 35080 }, { "epoch": 0.39, "learning_rate": 4.352522366905838e-05, "loss": 0.7513, "step": 35085 }, { "epoch": 0.39, "learning_rate": 4.352430094191987e-05, "loss": 0.8156, "step": 35090 }, { "epoch": 0.39, "learning_rate": 4.352337821478135e-05, "loss": 0.7716, "step": 35095 }, { "epoch": 0.39, "learning_rate": 4.352245548764284e-05, "loss": 0.7248, "step": 35100 }, { "epoch": 0.39, "learning_rate": 4.352153276050433e-05, "loss": 0.759, "step": 35105 }, { "epoch": 0.39, "learning_rate": 4.352061003336582e-05, "loss": 0.7676, "step": 35110 }, { "epoch": 0.39, "learning_rate": 4.35196873062273e-05, "loss": 0.747, "step": 35115 }, { "epoch": 0.39, "learning_rate": 4.351876457908879e-05, "loss": 0.7491, "step": 35120 }, { "epoch": 0.39, "learning_rate": 4.351784185195028e-05, "loss": 0.7845, "step": 35125 }, { "epoch": 0.39, "learning_rate": 4.3516919124811764e-05, "loss": 0.7424, "step": 35130 }, { "epoch": 0.39, "learning_rate": 4.351599639767325e-05, "loss": 0.7908, "step": 35135 }, { "epoch": 0.39, "learning_rate": 4.351507367053474e-05, "loss": 0.703, "step": 35140 }, { "epoch": 0.39, "learning_rate": 4.351415094339623e-05, "loss": 0.7863, "step": 35145 }, { "epoch": 0.39, "learning_rate": 4.3513228216257715e-05, "loss": 0.7409, "step": 35150 }, { "epoch": 0.39, "learning_rate": 4.35123054891192e-05, "loss": 0.7613, "step": 35155 }, { "epoch": 0.39, "learning_rate": 4.351138276198069e-05, "loss": 0.79, "step": 35160 }, { "epoch": 0.39, "learning_rate": 4.351046003484218e-05, "loss": 0.7733, "step": 35165 }, { "epoch": 0.39, "learning_rate": 4.3509537307703667e-05, "loss": 0.7644, "step": 35170 }, { "epoch": 0.39, "learning_rate": 4.3508614580565154e-05, "loss": 0.7018, "step": 35175 }, { "epoch": 0.39, "learning_rate": 4.350769185342664e-05, "loss": 0.7048, "step": 35180 }, { "epoch": 0.39, "learning_rate": 4.350676912628813e-05, "loss": 0.7851, "step": 35185 }, { "epoch": 0.39, "learning_rate": 4.350584639914962e-05, "loss": 0.7446, "step": 35190 }, { "epoch": 0.39, "learning_rate": 4.3504923672011106e-05, "loss": 0.7689, "step": 35195 }, { "epoch": 0.39, "learning_rate": 4.3504000944872594e-05, "loss": 0.7128, "step": 35200 }, { "epoch": 0.39, "learning_rate": 4.3503078217734075e-05, "loss": 0.7366, "step": 35205 }, { "epoch": 0.39, "learning_rate": 4.350215549059557e-05, "loss": 0.7426, "step": 35210 }, { "epoch": 0.39, "learning_rate": 4.350123276345706e-05, "loss": 0.8028, "step": 35215 }, { "epoch": 0.39, "learning_rate": 4.3500310036318545e-05, "loss": 0.8201, "step": 35220 }, { "epoch": 0.39, "learning_rate": 4.3499387309180026e-05, "loss": 0.7255, "step": 35225 }, { "epoch": 0.39, "learning_rate": 4.3498464582041514e-05, "loss": 0.8093, "step": 35230 }, { "epoch": 0.39, "learning_rate": 4.349754185490301e-05, "loss": 0.7865, "step": 35235 }, { "epoch": 0.39, "learning_rate": 4.349661912776449e-05, "loss": 0.7372, "step": 35240 }, { "epoch": 0.39, "learning_rate": 4.349569640062598e-05, "loss": 0.7519, "step": 35245 }, { "epoch": 0.39, "learning_rate": 4.3494773673487465e-05, "loss": 0.7342, "step": 35250 }, { "epoch": 0.39, "learning_rate": 4.349385094634896e-05, "loss": 0.7228, "step": 35255 }, { "epoch": 0.39, "learning_rate": 4.349292821921044e-05, "loss": 0.7224, "step": 35260 }, { "epoch": 0.39, "learning_rate": 4.349200549207193e-05, "loss": 0.7569, "step": 35265 }, { "epoch": 0.39, "learning_rate": 4.349108276493342e-05, "loss": 0.7248, "step": 35270 }, { "epoch": 0.39, "learning_rate": 4.349016003779491e-05, "loss": 0.6995, "step": 35275 }, { "epoch": 0.39, "learning_rate": 4.348923731065639e-05, "loss": 0.7427, "step": 35280 }, { "epoch": 0.39, "learning_rate": 4.348831458351788e-05, "loss": 0.7315, "step": 35285 }, { "epoch": 0.39, "learning_rate": 4.348739185637937e-05, "loss": 0.7258, "step": 35290 }, { "epoch": 0.39, "learning_rate": 4.3486469129240856e-05, "loss": 0.7352, "step": 35295 }, { "epoch": 0.39, "learning_rate": 4.3485546402102344e-05, "loss": 0.7917, "step": 35300 }, { "epoch": 0.39, "learning_rate": 4.348462367496383e-05, "loss": 0.709, "step": 35305 }, { "epoch": 0.39, "learning_rate": 4.348370094782532e-05, "loss": 0.7586, "step": 35310 }, { "epoch": 0.39, "learning_rate": 4.34827782206868e-05, "loss": 0.8153, "step": 35315 }, { "epoch": 0.39, "learning_rate": 4.3481855493548295e-05, "loss": 0.7782, "step": 35320 }, { "epoch": 0.39, "learning_rate": 4.348093276640978e-05, "loss": 0.7316, "step": 35325 }, { "epoch": 0.39, "learning_rate": 4.348001003927127e-05, "loss": 0.7822, "step": 35330 }, { "epoch": 0.39, "learning_rate": 4.347908731213275e-05, "loss": 0.7946, "step": 35335 }, { "epoch": 0.39, "learning_rate": 4.3478164584994247e-05, "loss": 0.8095, "step": 35340 }, { "epoch": 0.39, "learning_rate": 4.3477241857855734e-05, "loss": 0.7536, "step": 35345 }, { "epoch": 0.39, "learning_rate": 4.347631913071722e-05, "loss": 0.7119, "step": 35350 }, { "epoch": 0.39, "learning_rate": 4.34753964035787e-05, "loss": 0.8307, "step": 35355 }, { "epoch": 0.39, "learning_rate": 4.34744736764402e-05, "loss": 0.7695, "step": 35360 }, { "epoch": 0.39, "learning_rate": 4.3473550949301686e-05, "loss": 0.7666, "step": 35365 }, { "epoch": 0.39, "learning_rate": 4.347262822216317e-05, "loss": 0.7846, "step": 35370 }, { "epoch": 0.39, "learning_rate": 4.3471705495024655e-05, "loss": 0.7031, "step": 35375 }, { "epoch": 0.39, "learning_rate": 4.347078276788614e-05, "loss": 0.7252, "step": 35380 }, { "epoch": 0.39, "learning_rate": 4.346986004074764e-05, "loss": 0.788, "step": 35385 }, { "epoch": 0.39, "learning_rate": 4.346893731360912e-05, "loss": 0.7486, "step": 35390 }, { "epoch": 0.39, "learning_rate": 4.3468014586470606e-05, "loss": 0.6989, "step": 35395 }, { "epoch": 0.39, "learning_rate": 4.3467091859332094e-05, "loss": 0.8001, "step": 35400 }, { "epoch": 0.39, "learning_rate": 4.346616913219358e-05, "loss": 0.7874, "step": 35405 }, { "epoch": 0.39, "learning_rate": 4.346524640505507e-05, "loss": 0.7879, "step": 35410 }, { "epoch": 0.39, "learning_rate": 4.346432367791656e-05, "loss": 0.7205, "step": 35415 }, { "epoch": 0.39, "learning_rate": 4.3463400950778045e-05, "loss": 0.7533, "step": 35420 }, { "epoch": 0.39, "learning_rate": 4.346247822363953e-05, "loss": 0.7104, "step": 35425 }, { "epoch": 0.39, "learning_rate": 4.346155549650102e-05, "loss": 0.7533, "step": 35430 }, { "epoch": 0.39, "learning_rate": 4.346063276936251e-05, "loss": 0.7471, "step": 35435 }, { "epoch": 0.39, "learning_rate": 4.3459710042224e-05, "loss": 0.8217, "step": 35440 }, { "epoch": 0.39, "learning_rate": 4.3458787315085485e-05, "loss": 0.7557, "step": 35445 }, { "epoch": 0.39, "learning_rate": 4.345786458794697e-05, "loss": 0.7434, "step": 35450 }, { "epoch": 0.39, "learning_rate": 4.345694186080846e-05, "loss": 0.8095, "step": 35455 }, { "epoch": 0.39, "learning_rate": 4.345601913366995e-05, "loss": 0.7569, "step": 35460 }, { "epoch": 0.39, "learning_rate": 4.345509640653143e-05, "loss": 0.7694, "step": 35465 }, { "epoch": 0.39, "learning_rate": 4.3454173679392924e-05, "loss": 0.7323, "step": 35470 }, { "epoch": 0.39, "learning_rate": 4.345325095225441e-05, "loss": 0.7926, "step": 35475 }, { "epoch": 0.39, "learning_rate": 4.345232822511589e-05, "loss": 0.7877, "step": 35480 }, { "epoch": 0.39, "learning_rate": 4.345140549797738e-05, "loss": 0.735, "step": 35485 }, { "epoch": 0.39, "learning_rate": 4.3450482770838875e-05, "loss": 0.7258, "step": 35490 }, { "epoch": 0.39, "learning_rate": 4.344956004370036e-05, "loss": 0.7451, "step": 35495 }, { "epoch": 0.39, "learning_rate": 4.3448637316561844e-05, "loss": 0.7357, "step": 35500 }, { "epoch": 0.39, "learning_rate": 4.344771458942333e-05, "loss": 0.7691, "step": 35505 }, { "epoch": 0.39, "learning_rate": 4.344679186228483e-05, "loss": 0.8125, "step": 35510 }, { "epoch": 0.39, "learning_rate": 4.344586913514631e-05, "loss": 0.7855, "step": 35515 }, { "epoch": 0.39, "learning_rate": 4.3444946408007796e-05, "loss": 0.7867, "step": 35520 }, { "epoch": 0.39, "learning_rate": 4.3444023680869283e-05, "loss": 0.7464, "step": 35525 }, { "epoch": 0.39, "learning_rate": 4.344310095373077e-05, "loss": 0.7939, "step": 35530 }, { "epoch": 0.39, "learning_rate": 4.344217822659226e-05, "loss": 0.7636, "step": 35535 }, { "epoch": 0.39, "learning_rate": 4.344125549945375e-05, "loss": 0.7297, "step": 35540 }, { "epoch": 0.39, "learning_rate": 4.3440332772315235e-05, "loss": 0.806, "step": 35545 }, { "epoch": 0.39, "learning_rate": 4.343941004517672e-05, "loss": 0.7962, "step": 35550 }, { "epoch": 0.39, "learning_rate": 4.343848731803821e-05, "loss": 0.7674, "step": 35555 }, { "epoch": 0.39, "learning_rate": 4.34375645908997e-05, "loss": 0.7835, "step": 35560 }, { "epoch": 0.39, "learning_rate": 4.3436641863761186e-05, "loss": 0.7383, "step": 35565 }, { "epoch": 0.39, "learning_rate": 4.3435719136622674e-05, "loss": 0.7861, "step": 35570 }, { "epoch": 0.39, "learning_rate": 4.343479640948416e-05, "loss": 0.7376, "step": 35575 }, { "epoch": 0.39, "learning_rate": 4.343387368234565e-05, "loss": 0.7619, "step": 35580 }, { "epoch": 0.39, "learning_rate": 4.343295095520714e-05, "loss": 0.8141, "step": 35585 }, { "epoch": 0.39, "learning_rate": 4.343202822806862e-05, "loss": 0.7973, "step": 35590 }, { "epoch": 0.39, "learning_rate": 4.343110550093011e-05, "loss": 0.7246, "step": 35595 }, { "epoch": 0.39, "learning_rate": 4.34301827737916e-05, "loss": 0.7454, "step": 35600 }, { "epoch": 0.39, "learning_rate": 4.342926004665309e-05, "loss": 0.7686, "step": 35605 }, { "epoch": 0.39, "learning_rate": 4.342833731951457e-05, "loss": 0.7651, "step": 35610 }, { "epoch": 0.39, "learning_rate": 4.342741459237606e-05, "loss": 0.7738, "step": 35615 }, { "epoch": 0.39, "learning_rate": 4.342649186523755e-05, "loss": 0.7497, "step": 35620 }, { "epoch": 0.39, "learning_rate": 4.342556913809904e-05, "loss": 0.7714, "step": 35625 }, { "epoch": 0.39, "learning_rate": 4.342464641096052e-05, "loss": 0.8046, "step": 35630 }, { "epoch": 0.39, "learning_rate": 4.342372368382201e-05, "loss": 0.8103, "step": 35635 }, { "epoch": 0.39, "learning_rate": 4.3422800956683504e-05, "loss": 0.8428, "step": 35640 }, { "epoch": 0.39, "learning_rate": 4.3421878229544985e-05, "loss": 0.7654, "step": 35645 }, { "epoch": 0.39, "learning_rate": 4.342095550240647e-05, "loss": 0.7522, "step": 35650 }, { "epoch": 0.39, "learning_rate": 4.342003277526796e-05, "loss": 0.7148, "step": 35655 }, { "epoch": 0.39, "learning_rate": 4.3419110048129455e-05, "loss": 0.7012, "step": 35660 }, { "epoch": 0.39, "learning_rate": 4.3418187320990936e-05, "loss": 0.7657, "step": 35665 }, { "epoch": 0.39, "learning_rate": 4.3417264593852424e-05, "loss": 0.7437, "step": 35670 }, { "epoch": 0.4, "learning_rate": 4.341634186671391e-05, "loss": 0.8158, "step": 35675 }, { "epoch": 0.4, "learning_rate": 4.34154191395754e-05, "loss": 0.6881, "step": 35680 }, { "epoch": 0.4, "learning_rate": 4.341449641243689e-05, "loss": 0.7648, "step": 35685 }, { "epoch": 0.4, "learning_rate": 4.3413573685298376e-05, "loss": 0.7901, "step": 35690 }, { "epoch": 0.4, "learning_rate": 4.3412650958159864e-05, "loss": 0.7479, "step": 35695 }, { "epoch": 0.4, "learning_rate": 4.341172823102135e-05, "loss": 0.7573, "step": 35700 }, { "epoch": 0.4, "learning_rate": 4.341080550388284e-05, "loss": 0.7562, "step": 35705 }, { "epoch": 0.4, "learning_rate": 4.340988277674433e-05, "loss": 0.7234, "step": 35710 }, { "epoch": 0.4, "learning_rate": 4.3408960049605815e-05, "loss": 0.7077, "step": 35715 }, { "epoch": 0.4, "learning_rate": 4.3408037322467296e-05, "loss": 0.7768, "step": 35720 }, { "epoch": 0.4, "learning_rate": 4.340711459532879e-05, "loss": 0.7042, "step": 35725 }, { "epoch": 0.4, "learning_rate": 4.340619186819028e-05, "loss": 0.7464, "step": 35730 }, { "epoch": 0.4, "learning_rate": 4.3405269141051766e-05, "loss": 0.6683, "step": 35735 }, { "epoch": 0.4, "learning_rate": 4.340434641391325e-05, "loss": 0.7536, "step": 35740 }, { "epoch": 0.4, "learning_rate": 4.340342368677474e-05, "loss": 0.8325, "step": 35745 }, { "epoch": 0.4, "learning_rate": 4.340250095963623e-05, "loss": 0.754, "step": 35750 }, { "epoch": 0.4, "learning_rate": 4.340157823249771e-05, "loss": 0.7374, "step": 35755 }, { "epoch": 0.4, "learning_rate": 4.34006555053592e-05, "loss": 0.7983, "step": 35760 }, { "epoch": 0.4, "learning_rate": 4.339973277822069e-05, "loss": 0.7808, "step": 35765 }, { "epoch": 0.4, "learning_rate": 4.339881005108218e-05, "loss": 0.7888, "step": 35770 }, { "epoch": 0.4, "learning_rate": 4.339788732394366e-05, "loss": 0.8054, "step": 35775 }, { "epoch": 0.4, "learning_rate": 4.339696459680515e-05, "loss": 0.7971, "step": 35780 }, { "epoch": 0.4, "learning_rate": 4.339604186966664e-05, "loss": 0.8397, "step": 35785 }, { "epoch": 0.4, "learning_rate": 4.3395119142528126e-05, "loss": 0.7376, "step": 35790 }, { "epoch": 0.4, "learning_rate": 4.3394196415389614e-05, "loss": 0.8017, "step": 35795 }, { "epoch": 0.4, "learning_rate": 4.33932736882511e-05, "loss": 0.7419, "step": 35800 }, { "epoch": 0.4, "learning_rate": 4.339235096111259e-05, "loss": 0.7718, "step": 35805 }, { "epoch": 0.4, "learning_rate": 4.339142823397408e-05, "loss": 0.7591, "step": 35810 }, { "epoch": 0.4, "learning_rate": 4.3390505506835565e-05, "loss": 0.7203, "step": 35815 }, { "epoch": 0.4, "learning_rate": 4.338958277969705e-05, "loss": 0.7561, "step": 35820 }, { "epoch": 0.4, "learning_rate": 4.338866005255854e-05, "loss": 0.8119, "step": 35825 }, { "epoch": 0.4, "learning_rate": 4.338773732542002e-05, "loss": 0.7664, "step": 35830 }, { "epoch": 0.4, "learning_rate": 4.3386814598281517e-05, "loss": 0.7818, "step": 35835 }, { "epoch": 0.4, "learning_rate": 4.3385891871143004e-05, "loss": 0.7245, "step": 35840 }, { "epoch": 0.4, "learning_rate": 4.338496914400449e-05, "loss": 0.7551, "step": 35845 }, { "epoch": 0.4, "learning_rate": 4.338404641686597e-05, "loss": 0.8045, "step": 35850 }, { "epoch": 0.4, "learning_rate": 4.338312368972747e-05, "loss": 0.7877, "step": 35855 }, { "epoch": 0.4, "learning_rate": 4.3382200962588956e-05, "loss": 0.7865, "step": 35860 }, { "epoch": 0.4, "learning_rate": 4.338127823545044e-05, "loss": 0.7662, "step": 35865 }, { "epoch": 0.4, "learning_rate": 4.3380355508311925e-05, "loss": 0.7675, "step": 35870 }, { "epoch": 0.4, "learning_rate": 4.337943278117342e-05, "loss": 0.787, "step": 35875 }, { "epoch": 0.4, "learning_rate": 4.337851005403491e-05, "loss": 0.7071, "step": 35880 }, { "epoch": 0.4, "learning_rate": 4.337758732689639e-05, "loss": 0.7873, "step": 35885 }, { "epoch": 0.4, "learning_rate": 4.3376664599757876e-05, "loss": 0.8257, "step": 35890 }, { "epoch": 0.4, "learning_rate": 4.337574187261937e-05, "loss": 0.7483, "step": 35895 }, { "epoch": 0.4, "learning_rate": 4.337481914548085e-05, "loss": 0.783, "step": 35900 }, { "epoch": 0.4, "learning_rate": 4.337389641834234e-05, "loss": 0.822, "step": 35905 }, { "epoch": 0.4, "learning_rate": 4.337297369120383e-05, "loss": 0.7531, "step": 35910 }, { "epoch": 0.4, "learning_rate": 4.3372050964065315e-05, "loss": 0.7697, "step": 35915 }, { "epoch": 0.4, "learning_rate": 4.33711282369268e-05, "loss": 0.7406, "step": 35920 }, { "epoch": 0.4, "learning_rate": 4.337020550978829e-05, "loss": 0.7878, "step": 35925 }, { "epoch": 0.4, "learning_rate": 4.336928278264978e-05, "loss": 0.7825, "step": 35930 }, { "epoch": 0.4, "learning_rate": 4.336836005551127e-05, "loss": 0.744, "step": 35935 }, { "epoch": 0.4, "learning_rate": 4.3367437328372755e-05, "loss": 0.7382, "step": 35940 }, { "epoch": 0.4, "learning_rate": 4.336651460123424e-05, "loss": 0.7709, "step": 35945 }, { "epoch": 0.4, "learning_rate": 4.336559187409573e-05, "loss": 0.7652, "step": 35950 }, { "epoch": 0.4, "learning_rate": 4.336466914695722e-05, "loss": 0.7647, "step": 35955 }, { "epoch": 0.4, "learning_rate": 4.3363746419818706e-05, "loss": 0.7679, "step": 35960 }, { "epoch": 0.4, "learning_rate": 4.3362823692680194e-05, "loss": 0.7933, "step": 35965 }, { "epoch": 0.4, "learning_rate": 4.336190096554168e-05, "loss": 0.7993, "step": 35970 }, { "epoch": 0.4, "learning_rate": 4.336097823840316e-05, "loss": 0.7597, "step": 35975 }, { "epoch": 0.4, "learning_rate": 4.336005551126465e-05, "loss": 0.8002, "step": 35980 }, { "epoch": 0.4, "learning_rate": 4.3359132784126145e-05, "loss": 0.7874, "step": 35985 }, { "epoch": 0.4, "learning_rate": 4.335821005698763e-05, "loss": 0.7368, "step": 35990 }, { "epoch": 0.4, "learning_rate": 4.3357287329849114e-05, "loss": 0.8076, "step": 35995 }, { "epoch": 0.4, "learning_rate": 4.33563646027106e-05, "loss": 0.786, "step": 36000 }, { "epoch": 0.4, "eval_loss": 0.7310543060302734, "eval_runtime": 69.2269, "eval_samples_per_second": 28.891, "eval_steps_per_second": 14.445, "step": 36000 }, { "epoch": 0.4, "learning_rate": 4.3355441875572097e-05, "loss": 0.8078, "step": 36005 }, { "epoch": 0.4, "learning_rate": 4.3354519148433584e-05, "loss": 0.727, "step": 36010 }, { "epoch": 0.4, "learning_rate": 4.3353596421295066e-05, "loss": 0.7992, "step": 36015 }, { "epoch": 0.4, "learning_rate": 4.335267369415655e-05, "loss": 0.7883, "step": 36020 }, { "epoch": 0.4, "learning_rate": 4.335175096701805e-05, "loss": 0.753, "step": 36025 }, { "epoch": 0.4, "learning_rate": 4.335082823987953e-05, "loss": 0.7963, "step": 36030 }, { "epoch": 0.4, "learning_rate": 4.334990551274102e-05, "loss": 0.7808, "step": 36035 }, { "epoch": 0.4, "learning_rate": 4.3348982785602505e-05, "loss": 0.8092, "step": 36040 }, { "epoch": 0.4, "learning_rate": 4.3348060058464e-05, "loss": 0.7564, "step": 36045 }, { "epoch": 0.4, "learning_rate": 4.334713733132548e-05, "loss": 0.7593, "step": 36050 }, { "epoch": 0.4, "learning_rate": 4.334621460418697e-05, "loss": 0.7831, "step": 36055 }, { "epoch": 0.4, "learning_rate": 4.3345291877048456e-05, "loss": 0.7978, "step": 36060 }, { "epoch": 0.4, "learning_rate": 4.3344369149909944e-05, "loss": 0.6974, "step": 36065 }, { "epoch": 0.4, "learning_rate": 4.334344642277143e-05, "loss": 0.7187, "step": 36070 }, { "epoch": 0.4, "learning_rate": 4.334252369563292e-05, "loss": 0.7714, "step": 36075 }, { "epoch": 0.4, "learning_rate": 4.334160096849441e-05, "loss": 0.7806, "step": 36080 }, { "epoch": 0.4, "learning_rate": 4.3340678241355895e-05, "loss": 0.7226, "step": 36085 }, { "epoch": 0.4, "learning_rate": 4.333975551421738e-05, "loss": 0.7897, "step": 36090 }, { "epoch": 0.4, "learning_rate": 4.333883278707887e-05, "loss": 0.7464, "step": 36095 }, { "epoch": 0.4, "learning_rate": 4.333791005994036e-05, "loss": 0.7726, "step": 36100 }, { "epoch": 0.4, "learning_rate": 4.333698733280184e-05, "loss": 0.7603, "step": 36105 }, { "epoch": 0.4, "learning_rate": 4.3336064605663335e-05, "loss": 0.7363, "step": 36110 }, { "epoch": 0.4, "learning_rate": 4.333514187852482e-05, "loss": 0.7814, "step": 36115 }, { "epoch": 0.4, "learning_rate": 4.333421915138631e-05, "loss": 0.778, "step": 36120 }, { "epoch": 0.4, "learning_rate": 4.333329642424779e-05, "loss": 0.7284, "step": 36125 }, { "epoch": 0.4, "learning_rate": 4.333237369710928e-05, "loss": 0.7971, "step": 36130 }, { "epoch": 0.4, "learning_rate": 4.3331450969970774e-05, "loss": 0.7391, "step": 36135 }, { "epoch": 0.4, "learning_rate": 4.3330528242832255e-05, "loss": 0.7742, "step": 36140 }, { "epoch": 0.4, "learning_rate": 4.332960551569374e-05, "loss": 0.8048, "step": 36145 }, { "epoch": 0.4, "learning_rate": 4.332868278855523e-05, "loss": 0.7161, "step": 36150 }, { "epoch": 0.4, "learning_rate": 4.3327760061416725e-05, "loss": 0.7152, "step": 36155 }, { "epoch": 0.4, "learning_rate": 4.3326837334278206e-05, "loss": 0.7297, "step": 36160 }, { "epoch": 0.4, "learning_rate": 4.3325914607139694e-05, "loss": 0.6853, "step": 36165 }, { "epoch": 0.4, "learning_rate": 4.332499188000118e-05, "loss": 0.8003, "step": 36170 }, { "epoch": 0.4, "learning_rate": 4.332406915286267e-05, "loss": 0.7402, "step": 36175 }, { "epoch": 0.4, "learning_rate": 4.332314642572416e-05, "loss": 0.7811, "step": 36180 }, { "epoch": 0.4, "learning_rate": 4.3322223698585646e-05, "loss": 0.7444, "step": 36185 }, { "epoch": 0.4, "learning_rate": 4.3321300971447133e-05, "loss": 0.7567, "step": 36190 }, { "epoch": 0.4, "learning_rate": 4.332037824430862e-05, "loss": 0.704, "step": 36195 }, { "epoch": 0.4, "learning_rate": 4.331945551717011e-05, "loss": 0.7166, "step": 36200 }, { "epoch": 0.4, "learning_rate": 4.33185327900316e-05, "loss": 0.7864, "step": 36205 }, { "epoch": 0.4, "learning_rate": 4.3317610062893085e-05, "loss": 0.8382, "step": 36210 }, { "epoch": 0.4, "learning_rate": 4.3316687335754566e-05, "loss": 0.7919, "step": 36215 }, { "epoch": 0.4, "learning_rate": 4.331576460861606e-05, "loss": 0.7495, "step": 36220 }, { "epoch": 0.4, "learning_rate": 4.331484188147755e-05, "loss": 0.8004, "step": 36225 }, { "epoch": 0.4, "learning_rate": 4.3313919154339036e-05, "loss": 0.8088, "step": 36230 }, { "epoch": 0.4, "learning_rate": 4.331299642720052e-05, "loss": 0.7638, "step": 36235 }, { "epoch": 0.4, "learning_rate": 4.331207370006201e-05, "loss": 0.7235, "step": 36240 }, { "epoch": 0.4, "learning_rate": 4.33111509729235e-05, "loss": 0.7512, "step": 36245 }, { "epoch": 0.4, "learning_rate": 4.331022824578498e-05, "loss": 0.7808, "step": 36250 }, { "epoch": 0.4, "learning_rate": 4.330930551864647e-05, "loss": 0.791, "step": 36255 }, { "epoch": 0.4, "learning_rate": 4.330838279150796e-05, "loss": 0.7554, "step": 36260 }, { "epoch": 0.4, "learning_rate": 4.330746006436945e-05, "loss": 0.7924, "step": 36265 }, { "epoch": 0.4, "learning_rate": 4.330653733723093e-05, "loss": 0.7545, "step": 36270 }, { "epoch": 0.4, "learning_rate": 4.330561461009242e-05, "loss": 0.7858, "step": 36275 }, { "epoch": 0.4, "learning_rate": 4.3304691882953915e-05, "loss": 0.6937, "step": 36280 }, { "epoch": 0.4, "learning_rate": 4.3303769155815396e-05, "loss": 0.768, "step": 36285 }, { "epoch": 0.4, "learning_rate": 4.3302846428676884e-05, "loss": 0.8215, "step": 36290 }, { "epoch": 0.4, "learning_rate": 4.330192370153837e-05, "loss": 0.7254, "step": 36295 }, { "epoch": 0.4, "learning_rate": 4.330100097439986e-05, "loss": 0.7927, "step": 36300 }, { "epoch": 0.4, "learning_rate": 4.330007824726135e-05, "loss": 0.7713, "step": 36305 }, { "epoch": 0.4, "learning_rate": 4.3299155520122835e-05, "loss": 0.7505, "step": 36310 }, { "epoch": 0.4, "learning_rate": 4.329823279298432e-05, "loss": 0.7361, "step": 36315 }, { "epoch": 0.4, "learning_rate": 4.329731006584581e-05, "loss": 0.708, "step": 36320 }, { "epoch": 0.4, "learning_rate": 4.32963873387073e-05, "loss": 0.8529, "step": 36325 }, { "epoch": 0.4, "learning_rate": 4.3295464611568786e-05, "loss": 0.7778, "step": 36330 }, { "epoch": 0.4, "learning_rate": 4.3294541884430274e-05, "loss": 0.787, "step": 36335 }, { "epoch": 0.4, "learning_rate": 4.329361915729176e-05, "loss": 0.8492, "step": 36340 }, { "epoch": 0.4, "learning_rate": 4.329269643015325e-05, "loss": 0.7333, "step": 36345 }, { "epoch": 0.4, "learning_rate": 4.329177370301474e-05, "loss": 0.7625, "step": 36350 }, { "epoch": 0.4, "learning_rate": 4.3290850975876226e-05, "loss": 0.7598, "step": 36355 }, { "epoch": 0.4, "learning_rate": 4.328992824873771e-05, "loss": 0.7549, "step": 36360 }, { "epoch": 0.4, "learning_rate": 4.3289005521599195e-05, "loss": 0.8116, "step": 36365 }, { "epoch": 0.4, "learning_rate": 4.328808279446069e-05, "loss": 0.761, "step": 36370 }, { "epoch": 0.4, "learning_rate": 4.328716006732218e-05, "loss": 0.7933, "step": 36375 }, { "epoch": 0.4, "learning_rate": 4.328623734018366e-05, "loss": 0.7115, "step": 36380 }, { "epoch": 0.4, "learning_rate": 4.3285314613045146e-05, "loss": 0.7312, "step": 36385 }, { "epoch": 0.4, "learning_rate": 4.328439188590664e-05, "loss": 0.7625, "step": 36390 }, { "epoch": 0.4, "learning_rate": 4.328346915876813e-05, "loss": 0.8146, "step": 36395 }, { "epoch": 0.4, "learning_rate": 4.328254643162961e-05, "loss": 0.7579, "step": 36400 }, { "epoch": 0.4, "learning_rate": 4.32816237044911e-05, "loss": 0.7966, "step": 36405 }, { "epoch": 0.4, "learning_rate": 4.328070097735259e-05, "loss": 0.7047, "step": 36410 }, { "epoch": 0.4, "learning_rate": 4.327977825021407e-05, "loss": 0.7267, "step": 36415 }, { "epoch": 0.4, "learning_rate": 4.327885552307556e-05, "loss": 0.827, "step": 36420 }, { "epoch": 0.4, "learning_rate": 4.327793279593705e-05, "loss": 0.8311, "step": 36425 }, { "epoch": 0.4, "learning_rate": 4.3277010068798543e-05, "loss": 0.771, "step": 36430 }, { "epoch": 0.4, "learning_rate": 4.3276087341660024e-05, "loss": 0.7868, "step": 36435 }, { "epoch": 0.4, "learning_rate": 4.327516461452151e-05, "loss": 0.8181, "step": 36440 }, { "epoch": 0.4, "learning_rate": 4.3274241887383e-05, "loss": 0.7638, "step": 36445 }, { "epoch": 0.4, "learning_rate": 4.327331916024449e-05, "loss": 0.808, "step": 36450 }, { "epoch": 0.4, "learning_rate": 4.3272396433105976e-05, "loss": 0.7916, "step": 36455 }, { "epoch": 0.4, "learning_rate": 4.3271473705967464e-05, "loss": 0.735, "step": 36460 }, { "epoch": 0.4, "learning_rate": 4.327055097882895e-05, "loss": 0.7315, "step": 36465 }, { "epoch": 0.4, "learning_rate": 4.326962825169044e-05, "loss": 0.7596, "step": 36470 }, { "epoch": 0.4, "learning_rate": 4.326870552455193e-05, "loss": 0.7375, "step": 36475 }, { "epoch": 0.4, "learning_rate": 4.3267782797413415e-05, "loss": 0.765, "step": 36480 }, { "epoch": 0.4, "learning_rate": 4.32668600702749e-05, "loss": 0.7723, "step": 36485 }, { "epoch": 0.4, "learning_rate": 4.3265937343136384e-05, "loss": 0.7964, "step": 36490 }, { "epoch": 0.4, "learning_rate": 4.326501461599788e-05, "loss": 0.6827, "step": 36495 }, { "epoch": 0.4, "learning_rate": 4.3264091888859366e-05, "loss": 0.747, "step": 36500 }, { "epoch": 0.4, "learning_rate": 4.3263169161720854e-05, "loss": 0.7623, "step": 36505 }, { "epoch": 0.4, "learning_rate": 4.3262246434582335e-05, "loss": 0.7916, "step": 36510 }, { "epoch": 0.4, "learning_rate": 4.326132370744382e-05, "loss": 0.7813, "step": 36515 }, { "epoch": 0.4, "learning_rate": 4.326040098030532e-05, "loss": 0.7752, "step": 36520 }, { "epoch": 0.4, "learning_rate": 4.32594782531668e-05, "loss": 0.7886, "step": 36525 }, { "epoch": 0.4, "learning_rate": 4.325855552602829e-05, "loss": 0.7651, "step": 36530 }, { "epoch": 0.4, "learning_rate": 4.3257632798889775e-05, "loss": 0.7882, "step": 36535 }, { "epoch": 0.4, "learning_rate": 4.325671007175127e-05, "loss": 0.7738, "step": 36540 }, { "epoch": 0.4, "learning_rate": 4.325578734461275e-05, "loss": 0.8247, "step": 36545 }, { "epoch": 0.4, "learning_rate": 4.325486461747424e-05, "loss": 0.7362, "step": 36550 }, { "epoch": 0.4, "learning_rate": 4.3253941890335726e-05, "loss": 0.7208, "step": 36555 }, { "epoch": 0.4, "learning_rate": 4.3253019163197214e-05, "loss": 0.7708, "step": 36560 }, { "epoch": 0.4, "learning_rate": 4.32520964360587e-05, "loss": 0.7789, "step": 36565 }, { "epoch": 0.4, "learning_rate": 4.325117370892019e-05, "loss": 0.7675, "step": 36570 }, { "epoch": 0.4, "learning_rate": 4.325025098178168e-05, "loss": 0.7999, "step": 36575 }, { "epoch": 0.41, "learning_rate": 4.3249328254643165e-05, "loss": 0.7587, "step": 36580 }, { "epoch": 0.41, "learning_rate": 4.324840552750465e-05, "loss": 0.745, "step": 36585 }, { "epoch": 0.41, "learning_rate": 4.324748280036614e-05, "loss": 0.7069, "step": 36590 }, { "epoch": 0.41, "learning_rate": 4.324656007322763e-05, "loss": 0.7665, "step": 36595 }, { "epoch": 0.41, "learning_rate": 4.324563734608911e-05, "loss": 0.7619, "step": 36600 }, { "epoch": 0.41, "learning_rate": 4.3244714618950605e-05, "loss": 0.7757, "step": 36605 }, { "epoch": 0.41, "learning_rate": 4.324379189181209e-05, "loss": 0.7682, "step": 36610 }, { "epoch": 0.41, "learning_rate": 4.324286916467358e-05, "loss": 0.7795, "step": 36615 }, { "epoch": 0.41, "learning_rate": 4.324194643753506e-05, "loss": 0.76, "step": 36620 }, { "epoch": 0.41, "learning_rate": 4.3241023710396556e-05, "loss": 0.7543, "step": 36625 }, { "epoch": 0.41, "learning_rate": 4.3240100983258044e-05, "loss": 0.7633, "step": 36630 }, { "epoch": 0.41, "learning_rate": 4.3239178256119525e-05, "loss": 0.7884, "step": 36635 }, { "epoch": 0.41, "learning_rate": 4.323825552898101e-05, "loss": 0.7694, "step": 36640 }, { "epoch": 0.41, "learning_rate": 4.323733280184251e-05, "loss": 0.7745, "step": 36645 }, { "epoch": 0.41, "learning_rate": 4.3236410074703995e-05, "loss": 0.7592, "step": 36650 }, { "epoch": 0.41, "learning_rate": 4.3235487347565476e-05, "loss": 0.7755, "step": 36655 }, { "epoch": 0.41, "learning_rate": 4.3234564620426964e-05, "loss": 0.785, "step": 36660 }, { "epoch": 0.41, "learning_rate": 4.323364189328845e-05, "loss": 0.7443, "step": 36665 }, { "epoch": 0.41, "learning_rate": 4.323271916614994e-05, "loss": 0.7178, "step": 36670 }, { "epoch": 0.41, "learning_rate": 4.323179643901143e-05, "loss": 0.7522, "step": 36675 }, { "epoch": 0.41, "learning_rate": 4.3230873711872915e-05, "loss": 0.7963, "step": 36680 }, { "epoch": 0.41, "learning_rate": 4.32299509847344e-05, "loss": 0.7536, "step": 36685 }, { "epoch": 0.41, "learning_rate": 4.322902825759589e-05, "loss": 0.7482, "step": 36690 }, { "epoch": 0.41, "learning_rate": 4.322810553045738e-05, "loss": 0.7654, "step": 36695 }, { "epoch": 0.41, "learning_rate": 4.322718280331887e-05, "loss": 0.7488, "step": 36700 }, { "epoch": 0.41, "learning_rate": 4.3226260076180355e-05, "loss": 0.7877, "step": 36705 }, { "epoch": 0.41, "learning_rate": 4.322533734904184e-05, "loss": 0.7809, "step": 36710 }, { "epoch": 0.41, "learning_rate": 4.322441462190333e-05, "loss": 0.7686, "step": 36715 }, { "epoch": 0.41, "learning_rate": 4.322349189476482e-05, "loss": 0.7781, "step": 36720 }, { "epoch": 0.41, "learning_rate": 4.3222569167626306e-05, "loss": 0.737, "step": 36725 }, { "epoch": 0.41, "learning_rate": 4.3221646440487794e-05, "loss": 0.7849, "step": 36730 }, { "epoch": 0.41, "learning_rate": 4.322072371334928e-05, "loss": 0.8043, "step": 36735 }, { "epoch": 0.41, "learning_rate": 4.321980098621077e-05, "loss": 0.7974, "step": 36740 }, { "epoch": 0.41, "learning_rate": 4.321887825907225e-05, "loss": 0.799, "step": 36745 }, { "epoch": 0.41, "learning_rate": 4.321795553193374e-05, "loss": 0.7908, "step": 36750 }, { "epoch": 0.41, "learning_rate": 4.321703280479523e-05, "loss": 0.8005, "step": 36755 }, { "epoch": 0.41, "learning_rate": 4.321611007765672e-05, "loss": 0.7894, "step": 36760 }, { "epoch": 0.41, "learning_rate": 4.32151873505182e-05, "loss": 0.7608, "step": 36765 }, { "epoch": 0.41, "learning_rate": 4.321426462337969e-05, "loss": 0.7585, "step": 36770 }, { "epoch": 0.41, "learning_rate": 4.3213341896241185e-05, "loss": 0.8207, "step": 36775 }, { "epoch": 0.41, "learning_rate": 4.321241916910267e-05, "loss": 0.7774, "step": 36780 }, { "epoch": 0.41, "learning_rate": 4.3211496441964154e-05, "loss": 0.743, "step": 36785 }, { "epoch": 0.41, "learning_rate": 4.321057371482564e-05, "loss": 0.8662, "step": 36790 }, { "epoch": 0.41, "learning_rate": 4.3209650987687136e-05, "loss": 0.7273, "step": 36795 }, { "epoch": 0.41, "learning_rate": 4.320872826054862e-05, "loss": 0.7254, "step": 36800 }, { "epoch": 0.41, "learning_rate": 4.3207805533410105e-05, "loss": 0.8018, "step": 36805 }, { "epoch": 0.41, "learning_rate": 4.320688280627159e-05, "loss": 0.7949, "step": 36810 }, { "epoch": 0.41, "learning_rate": 4.320596007913308e-05, "loss": 0.7653, "step": 36815 }, { "epoch": 0.41, "learning_rate": 4.320503735199457e-05, "loss": 0.7382, "step": 36820 }, { "epoch": 0.41, "learning_rate": 4.3204114624856056e-05, "loss": 0.7812, "step": 36825 }, { "epoch": 0.41, "learning_rate": 4.3203191897717544e-05, "loss": 0.7792, "step": 36830 }, { "epoch": 0.41, "learning_rate": 4.320226917057903e-05, "loss": 0.7445, "step": 36835 }, { "epoch": 0.41, "learning_rate": 4.320134644344052e-05, "loss": 0.8064, "step": 36840 }, { "epoch": 0.41, "learning_rate": 4.320042371630201e-05, "loss": 0.7227, "step": 36845 }, { "epoch": 0.41, "learning_rate": 4.3199500989163496e-05, "loss": 0.7264, "step": 36850 }, { "epoch": 0.41, "learning_rate": 4.3198578262024983e-05, "loss": 0.7261, "step": 36855 }, { "epoch": 0.41, "learning_rate": 4.319765553488647e-05, "loss": 0.7216, "step": 36860 }, { "epoch": 0.41, "learning_rate": 4.319673280774796e-05, "loss": 0.7976, "step": 36865 }, { "epoch": 0.41, "learning_rate": 4.319581008060945e-05, "loss": 0.733, "step": 36870 }, { "epoch": 0.41, "learning_rate": 4.319488735347093e-05, "loss": 0.813, "step": 36875 }, { "epoch": 0.41, "learning_rate": 4.319396462633242e-05, "loss": 0.7969, "step": 36880 }, { "epoch": 0.41, "learning_rate": 4.319304189919391e-05, "loss": 0.7337, "step": 36885 }, { "epoch": 0.41, "learning_rate": 4.31921191720554e-05, "loss": 0.7543, "step": 36890 }, { "epoch": 0.41, "learning_rate": 4.319119644491688e-05, "loss": 0.7041, "step": 36895 }, { "epoch": 0.41, "learning_rate": 4.319027371777837e-05, "loss": 0.8367, "step": 36900 }, { "epoch": 0.41, "learning_rate": 4.318935099063986e-05, "loss": 0.7851, "step": 36905 }, { "epoch": 0.41, "learning_rate": 4.318842826350134e-05, "loss": 0.7739, "step": 36910 }, { "epoch": 0.41, "learning_rate": 4.318750553636283e-05, "loss": 0.7785, "step": 36915 }, { "epoch": 0.41, "learning_rate": 4.318658280922432e-05, "loss": 0.8081, "step": 36920 }, { "epoch": 0.41, "learning_rate": 4.318566008208581e-05, "loss": 0.7611, "step": 36925 }, { "epoch": 0.41, "learning_rate": 4.3184737354947294e-05, "loss": 0.7612, "step": 36930 }, { "epoch": 0.41, "learning_rate": 4.318381462780878e-05, "loss": 0.7011, "step": 36935 }, { "epoch": 0.41, "learning_rate": 4.318289190067027e-05, "loss": 0.7658, "step": 36940 }, { "epoch": 0.41, "learning_rate": 4.318196917353176e-05, "loss": 0.7712, "step": 36945 }, { "epoch": 0.41, "learning_rate": 4.3181046446393246e-05, "loss": 0.7542, "step": 36950 }, { "epoch": 0.41, "learning_rate": 4.3180123719254734e-05, "loss": 0.7673, "step": 36955 }, { "epoch": 0.41, "learning_rate": 4.317920099211622e-05, "loss": 0.7552, "step": 36960 }, { "epoch": 0.41, "learning_rate": 4.317827826497771e-05, "loss": 0.74, "step": 36965 }, { "epoch": 0.41, "learning_rate": 4.31773555378392e-05, "loss": 0.7894, "step": 36970 }, { "epoch": 0.41, "learning_rate": 4.3176432810700685e-05, "loss": 0.7176, "step": 36975 }, { "epoch": 0.41, "learning_rate": 4.317551008356217e-05, "loss": 0.816, "step": 36980 }, { "epoch": 0.41, "learning_rate": 4.3174587356423654e-05, "loss": 0.7512, "step": 36985 }, { "epoch": 0.41, "learning_rate": 4.317366462928515e-05, "loss": 0.754, "step": 36990 }, { "epoch": 0.41, "learning_rate": 4.3172741902146636e-05, "loss": 0.7668, "step": 36995 }, { "epoch": 0.41, "learning_rate": 4.3171819175008124e-05, "loss": 0.7991, "step": 37000 }, { "epoch": 0.41, "eval_loss": 0.7229691743850708, "eval_runtime": 69.2464, "eval_samples_per_second": 28.882, "eval_steps_per_second": 14.441, "step": 37000 }, { "epoch": 0.41, "learning_rate": 4.3170896447869605e-05, "loss": 0.7547, "step": 37005 }, { "epoch": 0.41, "learning_rate": 4.31699737207311e-05, "loss": 0.7743, "step": 37010 }, { "epoch": 0.41, "learning_rate": 4.316905099359259e-05, "loss": 0.6816, "step": 37015 }, { "epoch": 0.41, "learning_rate": 4.316812826645407e-05, "loss": 0.6907, "step": 37020 }, { "epoch": 0.41, "learning_rate": 4.316720553931556e-05, "loss": 0.7472, "step": 37025 }, { "epoch": 0.41, "learning_rate": 4.316628281217705e-05, "loss": 0.8014, "step": 37030 }, { "epoch": 0.41, "learning_rate": 4.316536008503854e-05, "loss": 0.8734, "step": 37035 }, { "epoch": 0.41, "learning_rate": 4.316443735790002e-05, "loss": 0.7332, "step": 37040 }, { "epoch": 0.41, "learning_rate": 4.316351463076151e-05, "loss": 0.7223, "step": 37045 }, { "epoch": 0.41, "learning_rate": 4.3162591903622996e-05, "loss": 0.7413, "step": 37050 }, { "epoch": 0.41, "learning_rate": 4.3161669176484484e-05, "loss": 0.8006, "step": 37055 }, { "epoch": 0.41, "learning_rate": 4.316074644934597e-05, "loss": 0.7121, "step": 37060 }, { "epoch": 0.41, "learning_rate": 4.315982372220746e-05, "loss": 0.7877, "step": 37065 }, { "epoch": 0.41, "learning_rate": 4.315890099506895e-05, "loss": 0.7764, "step": 37070 }, { "epoch": 0.41, "learning_rate": 4.3157978267930435e-05, "loss": 0.82, "step": 37075 }, { "epoch": 0.41, "learning_rate": 4.315705554079192e-05, "loss": 0.7914, "step": 37080 }, { "epoch": 0.41, "learning_rate": 4.315613281365341e-05, "loss": 0.8122, "step": 37085 }, { "epoch": 0.41, "learning_rate": 4.31552100865149e-05, "loss": 0.7737, "step": 37090 }, { "epoch": 0.41, "learning_rate": 4.3154287359376387e-05, "loss": 0.7038, "step": 37095 }, { "epoch": 0.41, "learning_rate": 4.3153364632237874e-05, "loss": 0.7749, "step": 37100 }, { "epoch": 0.41, "learning_rate": 4.315244190509936e-05, "loss": 0.8264, "step": 37105 }, { "epoch": 0.41, "learning_rate": 4.315151917796085e-05, "loss": 0.8347, "step": 37110 }, { "epoch": 0.41, "learning_rate": 4.315059645082234e-05, "loss": 0.8043, "step": 37115 }, { "epoch": 0.41, "learning_rate": 4.3149673723683826e-05, "loss": 0.703, "step": 37120 }, { "epoch": 0.41, "learning_rate": 4.3148750996545314e-05, "loss": 0.7533, "step": 37125 }, { "epoch": 0.41, "learning_rate": 4.3147828269406795e-05, "loss": 0.7924, "step": 37130 }, { "epoch": 0.41, "learning_rate": 4.314690554226828e-05, "loss": 0.7577, "step": 37135 }, { "epoch": 0.41, "learning_rate": 4.314598281512978e-05, "loss": 0.8451, "step": 37140 }, { "epoch": 0.41, "learning_rate": 4.3145060087991265e-05, "loss": 0.7923, "step": 37145 }, { "epoch": 0.41, "learning_rate": 4.3144137360852746e-05, "loss": 0.7598, "step": 37150 }, { "epoch": 0.41, "learning_rate": 4.3143214633714234e-05, "loss": 0.7575, "step": 37155 }, { "epoch": 0.41, "learning_rate": 4.314229190657573e-05, "loss": 0.7813, "step": 37160 }, { "epoch": 0.41, "learning_rate": 4.3141369179437216e-05, "loss": 0.7476, "step": 37165 }, { "epoch": 0.41, "learning_rate": 4.31404464522987e-05, "loss": 0.7804, "step": 37170 }, { "epoch": 0.41, "learning_rate": 4.3139523725160185e-05, "loss": 0.6766, "step": 37175 }, { "epoch": 0.41, "learning_rate": 4.313860099802168e-05, "loss": 0.7389, "step": 37180 }, { "epoch": 0.41, "learning_rate": 4.313767827088316e-05, "loss": 0.7685, "step": 37185 }, { "epoch": 0.41, "learning_rate": 4.313675554374465e-05, "loss": 0.7497, "step": 37190 }, { "epoch": 0.41, "learning_rate": 4.313583281660614e-05, "loss": 0.786, "step": 37195 }, { "epoch": 0.41, "learning_rate": 4.3134910089467625e-05, "loss": 0.7628, "step": 37200 }, { "epoch": 0.41, "learning_rate": 4.313398736232911e-05, "loss": 0.7728, "step": 37205 }, { "epoch": 0.41, "learning_rate": 4.31330646351906e-05, "loss": 0.7482, "step": 37210 }, { "epoch": 0.41, "learning_rate": 4.313214190805209e-05, "loss": 0.7972, "step": 37215 }, { "epoch": 0.41, "learning_rate": 4.3131219180913576e-05, "loss": 0.7294, "step": 37220 }, { "epoch": 0.41, "learning_rate": 4.3130296453775064e-05, "loss": 0.819, "step": 37225 }, { "epoch": 0.41, "learning_rate": 4.312937372663655e-05, "loss": 0.8228, "step": 37230 }, { "epoch": 0.41, "learning_rate": 4.312845099949804e-05, "loss": 0.7814, "step": 37235 }, { "epoch": 0.41, "learning_rate": 4.312752827235953e-05, "loss": 0.781, "step": 37240 }, { "epoch": 0.41, "learning_rate": 4.3126605545221015e-05, "loss": 0.7014, "step": 37245 }, { "epoch": 0.41, "learning_rate": 4.31256828180825e-05, "loss": 0.764, "step": 37250 }, { "epoch": 0.41, "learning_rate": 4.312476009094399e-05, "loss": 0.7201, "step": 37255 }, { "epoch": 0.41, "learning_rate": 4.312383736380547e-05, "loss": 0.7591, "step": 37260 }, { "epoch": 0.41, "learning_rate": 4.312291463666697e-05, "loss": 0.8036, "step": 37265 }, { "epoch": 0.41, "learning_rate": 4.3121991909528455e-05, "loss": 0.7386, "step": 37270 }, { "epoch": 0.41, "learning_rate": 4.312106918238994e-05, "loss": 0.8277, "step": 37275 }, { "epoch": 0.41, "learning_rate": 4.3120146455251423e-05, "loss": 0.7531, "step": 37280 }, { "epoch": 0.41, "learning_rate": 4.311922372811291e-05, "loss": 0.8246, "step": 37285 }, { "epoch": 0.41, "learning_rate": 4.3118301000974406e-05, "loss": 0.7917, "step": 37290 }, { "epoch": 0.41, "learning_rate": 4.311737827383589e-05, "loss": 0.8231, "step": 37295 }, { "epoch": 0.41, "learning_rate": 4.3116455546697375e-05, "loss": 0.7362, "step": 37300 }, { "epoch": 0.41, "learning_rate": 4.311553281955886e-05, "loss": 0.7405, "step": 37305 }, { "epoch": 0.41, "learning_rate": 4.311461009242036e-05, "loss": 0.7687, "step": 37310 }, { "epoch": 0.41, "learning_rate": 4.311368736528184e-05, "loss": 0.7297, "step": 37315 }, { "epoch": 0.41, "learning_rate": 4.3112764638143326e-05, "loss": 0.7726, "step": 37320 }, { "epoch": 0.41, "learning_rate": 4.3111841911004814e-05, "loss": 0.7647, "step": 37325 }, { "epoch": 0.41, "learning_rate": 4.31109191838663e-05, "loss": 0.8322, "step": 37330 }, { "epoch": 0.41, "learning_rate": 4.310999645672779e-05, "loss": 0.7421, "step": 37335 }, { "epoch": 0.41, "learning_rate": 4.310907372958928e-05, "loss": 0.7343, "step": 37340 }, { "epoch": 0.41, "learning_rate": 4.3108151002450765e-05, "loss": 0.7926, "step": 37345 }, { "epoch": 0.41, "learning_rate": 4.310722827531225e-05, "loss": 0.7318, "step": 37350 }, { "epoch": 0.41, "learning_rate": 4.310630554817374e-05, "loss": 0.7618, "step": 37355 }, { "epoch": 0.41, "learning_rate": 4.310538282103523e-05, "loss": 0.7492, "step": 37360 }, { "epoch": 0.41, "learning_rate": 4.310446009389672e-05, "loss": 0.7844, "step": 37365 }, { "epoch": 0.41, "learning_rate": 4.31035373667582e-05, "loss": 0.8154, "step": 37370 }, { "epoch": 0.41, "learning_rate": 4.310261463961969e-05, "loss": 0.7499, "step": 37375 }, { "epoch": 0.41, "learning_rate": 4.310169191248118e-05, "loss": 0.7255, "step": 37380 }, { "epoch": 0.41, "learning_rate": 4.310076918534267e-05, "loss": 0.7908, "step": 37385 }, { "epoch": 0.41, "learning_rate": 4.309984645820415e-05, "loss": 0.7625, "step": 37390 }, { "epoch": 0.41, "learning_rate": 4.3098923731065644e-05, "loss": 0.7112, "step": 37395 }, { "epoch": 0.41, "learning_rate": 4.309800100392713e-05, "loss": 0.8206, "step": 37400 }, { "epoch": 0.41, "learning_rate": 4.309707827678861e-05, "loss": 0.7236, "step": 37405 }, { "epoch": 0.41, "learning_rate": 4.30961555496501e-05, "loss": 0.7554, "step": 37410 }, { "epoch": 0.41, "learning_rate": 4.3095232822511595e-05, "loss": 0.7326, "step": 37415 }, { "epoch": 0.41, "learning_rate": 4.309431009537308e-05, "loss": 0.8089, "step": 37420 }, { "epoch": 0.41, "learning_rate": 4.3093387368234564e-05, "loss": 0.7352, "step": 37425 }, { "epoch": 0.41, "learning_rate": 4.309246464109605e-05, "loss": 0.742, "step": 37430 }, { "epoch": 0.41, "learning_rate": 4.309154191395754e-05, "loss": 0.7278, "step": 37435 }, { "epoch": 0.41, "learning_rate": 4.309061918681903e-05, "loss": 0.7465, "step": 37440 }, { "epoch": 0.41, "learning_rate": 4.3089696459680516e-05, "loss": 0.7718, "step": 37445 }, { "epoch": 0.41, "learning_rate": 4.3088773732542004e-05, "loss": 0.8189, "step": 37450 }, { "epoch": 0.41, "learning_rate": 4.308785100540349e-05, "loss": 0.8116, "step": 37455 }, { "epoch": 0.41, "learning_rate": 4.308692827826498e-05, "loss": 0.7392, "step": 37460 }, { "epoch": 0.41, "learning_rate": 4.308600555112647e-05, "loss": 0.7659, "step": 37465 }, { "epoch": 0.41, "learning_rate": 4.3085082823987955e-05, "loss": 0.8081, "step": 37470 }, { "epoch": 0.41, "learning_rate": 4.308416009684944e-05, "loss": 0.7704, "step": 37475 }, { "epoch": 0.42, "learning_rate": 4.308323736971093e-05, "loss": 0.7349, "step": 37480 }, { "epoch": 0.42, "learning_rate": 4.308231464257242e-05, "loss": 0.7605, "step": 37485 }, { "epoch": 0.42, "learning_rate": 4.3081391915433906e-05, "loss": 0.7357, "step": 37490 }, { "epoch": 0.42, "learning_rate": 4.3080469188295394e-05, "loss": 0.8233, "step": 37495 }, { "epoch": 0.42, "learning_rate": 4.3079546461156875e-05, "loss": 0.7137, "step": 37500 }, { "epoch": 0.42, "learning_rate": 4.307862373401837e-05, "loss": 0.7139, "step": 37505 }, { "epoch": 0.42, "learning_rate": 4.307770100687986e-05, "loss": 0.7938, "step": 37510 }, { "epoch": 0.42, "learning_rate": 4.307677827974134e-05, "loss": 0.8089, "step": 37515 }, { "epoch": 0.42, "learning_rate": 4.307585555260283e-05, "loss": 0.7482, "step": 37520 }, { "epoch": 0.42, "learning_rate": 4.307493282546432e-05, "loss": 0.7486, "step": 37525 }, { "epoch": 0.42, "learning_rate": 4.307401009832581e-05, "loss": 0.727, "step": 37530 }, { "epoch": 0.42, "learning_rate": 4.307308737118729e-05, "loss": 0.7574, "step": 37535 }, { "epoch": 0.42, "learning_rate": 4.307216464404878e-05, "loss": 0.7224, "step": 37540 }, { "epoch": 0.42, "learning_rate": 4.307124191691027e-05, "loss": 0.7672, "step": 37545 }, { "epoch": 0.42, "learning_rate": 4.307031918977176e-05, "loss": 0.7672, "step": 37550 }, { "epoch": 0.42, "learning_rate": 4.306939646263324e-05, "loss": 0.7855, "step": 37555 }, { "epoch": 0.42, "learning_rate": 4.306847373549473e-05, "loss": 0.8246, "step": 37560 }, { "epoch": 0.42, "learning_rate": 4.3067551008356224e-05, "loss": 0.7173, "step": 37565 }, { "epoch": 0.42, "learning_rate": 4.3066628281217705e-05, "loss": 0.7997, "step": 37570 }, { "epoch": 0.42, "learning_rate": 4.306570555407919e-05, "loss": 0.7725, "step": 37575 }, { "epoch": 0.42, "learning_rate": 4.306478282694068e-05, "loss": 0.7773, "step": 37580 }, { "epoch": 0.42, "learning_rate": 4.306386009980217e-05, "loss": 0.7757, "step": 37585 }, { "epoch": 0.42, "learning_rate": 4.3062937372663657e-05, "loss": 0.7737, "step": 37590 }, { "epoch": 0.42, "learning_rate": 4.3062014645525144e-05, "loss": 0.7861, "step": 37595 }, { "epoch": 0.42, "learning_rate": 4.306109191838663e-05, "loss": 0.7996, "step": 37600 }, { "epoch": 0.42, "learning_rate": 4.306016919124812e-05, "loss": 0.7565, "step": 37605 }, { "epoch": 0.42, "learning_rate": 4.305924646410961e-05, "loss": 0.7281, "step": 37610 }, { "epoch": 0.42, "learning_rate": 4.3058323736971096e-05, "loss": 0.7751, "step": 37615 }, { "epoch": 0.42, "learning_rate": 4.3057401009832584e-05, "loss": 0.7726, "step": 37620 }, { "epoch": 0.42, "learning_rate": 4.305647828269407e-05, "loss": 0.7371, "step": 37625 }, { "epoch": 0.42, "learning_rate": 4.305555555555556e-05, "loss": 0.7879, "step": 37630 }, { "epoch": 0.42, "learning_rate": 4.305463282841705e-05, "loss": 0.7526, "step": 37635 }, { "epoch": 0.42, "learning_rate": 4.3053710101278535e-05, "loss": 0.7906, "step": 37640 }, { "epoch": 0.42, "learning_rate": 4.3052787374140016e-05, "loss": 0.704, "step": 37645 }, { "epoch": 0.42, "learning_rate": 4.3051864647001504e-05, "loss": 0.6892, "step": 37650 }, { "epoch": 0.42, "learning_rate": 4.3050941919863e-05, "loss": 0.7143, "step": 37655 }, { "epoch": 0.42, "learning_rate": 4.3050019192724486e-05, "loss": 0.838, "step": 37660 }, { "epoch": 0.42, "learning_rate": 4.304909646558597e-05, "loss": 0.8023, "step": 37665 }, { "epoch": 0.42, "learning_rate": 4.3048173738447455e-05, "loss": 0.7088, "step": 37670 }, { "epoch": 0.42, "learning_rate": 4.304725101130895e-05, "loss": 0.7949, "step": 37675 }, { "epoch": 0.42, "learning_rate": 4.304632828417043e-05, "loss": 0.7084, "step": 37680 }, { "epoch": 0.42, "learning_rate": 4.304540555703192e-05, "loss": 0.7484, "step": 37685 }, { "epoch": 0.42, "learning_rate": 4.304448282989341e-05, "loss": 0.7078, "step": 37690 }, { "epoch": 0.42, "learning_rate": 4.30435601027549e-05, "loss": 0.7499, "step": 37695 }, { "epoch": 0.42, "learning_rate": 4.304263737561638e-05, "loss": 0.8399, "step": 37700 }, { "epoch": 0.42, "learning_rate": 4.304171464847787e-05, "loss": 0.7726, "step": 37705 }, { "epoch": 0.42, "learning_rate": 4.304079192133936e-05, "loss": 0.7737, "step": 37710 }, { "epoch": 0.42, "learning_rate": 4.3039869194200846e-05, "loss": 0.7537, "step": 37715 }, { "epoch": 0.42, "learning_rate": 4.3038946467062334e-05, "loss": 0.7777, "step": 37720 }, { "epoch": 0.42, "learning_rate": 4.303802373992382e-05, "loss": 0.7659, "step": 37725 }, { "epoch": 0.42, "learning_rate": 4.303710101278531e-05, "loss": 0.7876, "step": 37730 }, { "epoch": 0.42, "learning_rate": 4.30361782856468e-05, "loss": 0.7597, "step": 37735 }, { "epoch": 0.42, "learning_rate": 4.3035255558508285e-05, "loss": 0.7759, "step": 37740 }, { "epoch": 0.42, "learning_rate": 4.303433283136977e-05, "loss": 0.7672, "step": 37745 }, { "epoch": 0.42, "learning_rate": 4.303341010423126e-05, "loss": 0.7494, "step": 37750 }, { "epoch": 0.42, "learning_rate": 4.303248737709274e-05, "loss": 0.6931, "step": 37755 }, { "epoch": 0.42, "learning_rate": 4.3031564649954237e-05, "loss": 0.7613, "step": 37760 }, { "epoch": 0.42, "learning_rate": 4.3030641922815724e-05, "loss": 0.7819, "step": 37765 }, { "epoch": 0.42, "learning_rate": 4.302971919567721e-05, "loss": 0.818, "step": 37770 }, { "epoch": 0.42, "learning_rate": 4.302879646853869e-05, "loss": 0.7857, "step": 37775 }, { "epoch": 0.42, "learning_rate": 4.302787374140019e-05, "loss": 0.7985, "step": 37780 }, { "epoch": 0.42, "learning_rate": 4.3026951014261676e-05, "loss": 0.7985, "step": 37785 }, { "epoch": 0.42, "learning_rate": 4.302602828712316e-05, "loss": 0.7613, "step": 37790 }, { "epoch": 0.42, "learning_rate": 4.3025105559984645e-05, "loss": 0.7649, "step": 37795 }, { "epoch": 0.42, "learning_rate": 4.302418283284613e-05, "loss": 0.7412, "step": 37800 }, { "epoch": 0.42, "learning_rate": 4.302326010570763e-05, "loss": 0.7498, "step": 37805 }, { "epoch": 0.42, "learning_rate": 4.302233737856911e-05, "loss": 0.7758, "step": 37810 }, { "epoch": 0.42, "learning_rate": 4.3021414651430596e-05, "loss": 0.8022, "step": 37815 }, { "epoch": 0.42, "learning_rate": 4.3020491924292084e-05, "loss": 0.7529, "step": 37820 }, { "epoch": 0.42, "learning_rate": 4.301956919715358e-05, "loss": 0.7729, "step": 37825 }, { "epoch": 0.42, "learning_rate": 4.301864647001506e-05, "loss": 0.7821, "step": 37830 }, { "epoch": 0.42, "learning_rate": 4.301772374287655e-05, "loss": 0.7555, "step": 37835 }, { "epoch": 0.42, "learning_rate": 4.3016801015738035e-05, "loss": 0.7269, "step": 37840 }, { "epoch": 0.42, "learning_rate": 4.301587828859952e-05, "loss": 0.7546, "step": 37845 }, { "epoch": 0.42, "learning_rate": 4.301495556146101e-05, "loss": 0.7158, "step": 37850 }, { "epoch": 0.42, "learning_rate": 4.30140328343225e-05, "loss": 0.7348, "step": 37855 }, { "epoch": 0.42, "learning_rate": 4.301311010718399e-05, "loss": 0.761, "step": 37860 }, { "epoch": 0.42, "learning_rate": 4.3012187380045475e-05, "loss": 0.7356, "step": 37865 }, { "epoch": 0.42, "learning_rate": 4.301126465290696e-05, "loss": 0.7695, "step": 37870 }, { "epoch": 0.42, "learning_rate": 4.301034192576845e-05, "loss": 0.7388, "step": 37875 }, { "epoch": 0.42, "learning_rate": 4.300941919862994e-05, "loss": 0.8099, "step": 37880 }, { "epoch": 0.42, "learning_rate": 4.300849647149142e-05, "loss": 0.7941, "step": 37885 }, { "epoch": 0.42, "learning_rate": 4.3007573744352914e-05, "loss": 0.7136, "step": 37890 }, { "epoch": 0.42, "learning_rate": 4.30066510172144e-05, "loss": 0.6769, "step": 37895 }, { "epoch": 0.42, "learning_rate": 4.300572829007589e-05, "loss": 0.7499, "step": 37900 }, { "epoch": 0.42, "learning_rate": 4.300480556293737e-05, "loss": 0.7215, "step": 37905 }, { "epoch": 0.42, "learning_rate": 4.3003882835798865e-05, "loss": 0.7381, "step": 37910 }, { "epoch": 0.42, "learning_rate": 4.300296010866035e-05, "loss": 0.7533, "step": 37915 }, { "epoch": 0.42, "learning_rate": 4.3002037381521834e-05, "loss": 0.7443, "step": 37920 }, { "epoch": 0.42, "learning_rate": 4.300111465438332e-05, "loss": 0.7545, "step": 37925 }, { "epoch": 0.42, "learning_rate": 4.300019192724482e-05, "loss": 0.783, "step": 37930 }, { "epoch": 0.42, "learning_rate": 4.2999269200106305e-05, "loss": 0.7286, "step": 37935 }, { "epoch": 0.42, "learning_rate": 4.2998346472967786e-05, "loss": 0.7283, "step": 37940 }, { "epoch": 0.42, "learning_rate": 4.2997423745829273e-05, "loss": 0.7473, "step": 37945 }, { "epoch": 0.42, "learning_rate": 4.299650101869077e-05, "loss": 0.7081, "step": 37950 }, { "epoch": 0.42, "learning_rate": 4.299557829155225e-05, "loss": 0.7692, "step": 37955 }, { "epoch": 0.42, "learning_rate": 4.299465556441374e-05, "loss": 0.8161, "step": 37960 }, { "epoch": 0.42, "learning_rate": 4.2993732837275225e-05, "loss": 0.7452, "step": 37965 }, { "epoch": 0.42, "learning_rate": 4.299281011013671e-05, "loss": 0.7742, "step": 37970 }, { "epoch": 0.42, "learning_rate": 4.29918873829982e-05, "loss": 0.7869, "step": 37975 }, { "epoch": 0.42, "learning_rate": 4.299096465585969e-05, "loss": 0.7676, "step": 37980 }, { "epoch": 0.42, "learning_rate": 4.2990041928721176e-05, "loss": 0.7537, "step": 37985 }, { "epoch": 0.42, "learning_rate": 4.2989119201582664e-05, "loss": 0.7486, "step": 37990 }, { "epoch": 0.42, "learning_rate": 4.298819647444415e-05, "loss": 0.7304, "step": 37995 }, { "epoch": 0.42, "learning_rate": 4.298727374730564e-05, "loss": 0.723, "step": 38000 }, { "epoch": 0.42, "eval_loss": 0.720878541469574, "eval_runtime": 69.2789, "eval_samples_per_second": 28.869, "eval_steps_per_second": 14.434, "step": 38000 }, { "epoch": 0.42, "learning_rate": 4.298635102016713e-05, "loss": 0.7721, "step": 38005 }, { "epoch": 0.42, "learning_rate": 4.2985428293028615e-05, "loss": 0.752, "step": 38010 }, { "epoch": 0.42, "learning_rate": 4.29845055658901e-05, "loss": 0.8198, "step": 38015 }, { "epoch": 0.42, "learning_rate": 4.298358283875159e-05, "loss": 0.7501, "step": 38020 }, { "epoch": 0.42, "learning_rate": 4.298266011161308e-05, "loss": 0.7545, "step": 38025 }, { "epoch": 0.42, "learning_rate": 4.298173738447456e-05, "loss": 0.7564, "step": 38030 }, { "epoch": 0.42, "learning_rate": 4.298081465733605e-05, "loss": 0.7744, "step": 38035 }, { "epoch": 0.42, "learning_rate": 4.297989193019754e-05, "loss": 0.7941, "step": 38040 }, { "epoch": 0.42, "learning_rate": 4.297896920305903e-05, "loss": 0.7913, "step": 38045 }, { "epoch": 0.42, "learning_rate": 4.297804647592051e-05, "loss": 0.769, "step": 38050 }, { "epoch": 0.42, "learning_rate": 4.2977123748782e-05, "loss": 0.7547, "step": 38055 }, { "epoch": 0.42, "learning_rate": 4.2976201021643494e-05, "loss": 0.7698, "step": 38060 }, { "epoch": 0.42, "learning_rate": 4.2975278294504975e-05, "loss": 0.8383, "step": 38065 }, { "epoch": 0.42, "learning_rate": 4.297435556736646e-05, "loss": 0.7769, "step": 38070 }, { "epoch": 0.42, "learning_rate": 4.297343284022795e-05, "loss": 0.7802, "step": 38075 }, { "epoch": 0.42, "learning_rate": 4.2972510113089445e-05, "loss": 0.7978, "step": 38080 }, { "epoch": 0.42, "learning_rate": 4.2971587385950926e-05, "loss": 0.7565, "step": 38085 }, { "epoch": 0.42, "learning_rate": 4.2970664658812414e-05, "loss": 0.8307, "step": 38090 }, { "epoch": 0.42, "learning_rate": 4.29697419316739e-05, "loss": 0.7687, "step": 38095 }, { "epoch": 0.42, "learning_rate": 4.296881920453539e-05, "loss": 0.8148, "step": 38100 }, { "epoch": 0.42, "learning_rate": 4.296789647739688e-05, "loss": 0.765, "step": 38105 }, { "epoch": 0.42, "learning_rate": 4.2966973750258366e-05, "loss": 0.8369, "step": 38110 }, { "epoch": 0.42, "learning_rate": 4.2966051023119854e-05, "loss": 0.7814, "step": 38115 }, { "epoch": 0.42, "learning_rate": 4.296512829598134e-05, "loss": 0.6987, "step": 38120 }, { "epoch": 0.42, "learning_rate": 4.296420556884283e-05, "loss": 0.7975, "step": 38125 }, { "epoch": 0.42, "learning_rate": 4.296328284170432e-05, "loss": 0.7852, "step": 38130 }, { "epoch": 0.42, "learning_rate": 4.2962360114565805e-05, "loss": 0.692, "step": 38135 }, { "epoch": 0.42, "learning_rate": 4.2961437387427286e-05, "loss": 0.734, "step": 38140 }, { "epoch": 0.42, "learning_rate": 4.296051466028878e-05, "loss": 0.8174, "step": 38145 }, { "epoch": 0.42, "learning_rate": 4.295959193315027e-05, "loss": 0.7285, "step": 38150 }, { "epoch": 0.42, "learning_rate": 4.2958669206011756e-05, "loss": 0.7966, "step": 38155 }, { "epoch": 0.42, "learning_rate": 4.295774647887324e-05, "loss": 0.7581, "step": 38160 }, { "epoch": 0.42, "learning_rate": 4.295682375173473e-05, "loss": 0.7769, "step": 38165 }, { "epoch": 0.42, "learning_rate": 4.295590102459622e-05, "loss": 0.7941, "step": 38170 }, { "epoch": 0.42, "learning_rate": 4.29549782974577e-05, "loss": 0.7747, "step": 38175 }, { "epoch": 0.42, "learning_rate": 4.295405557031919e-05, "loss": 0.7668, "step": 38180 }, { "epoch": 0.42, "learning_rate": 4.295313284318068e-05, "loss": 0.6881, "step": 38185 }, { "epoch": 0.42, "learning_rate": 4.295221011604217e-05, "loss": 0.6901, "step": 38190 }, { "epoch": 0.42, "learning_rate": 4.295128738890365e-05, "loss": 0.774, "step": 38195 }, { "epoch": 0.42, "learning_rate": 4.295036466176514e-05, "loss": 0.704, "step": 38200 }, { "epoch": 0.42, "learning_rate": 4.294944193462663e-05, "loss": 0.7812, "step": 38205 }, { "epoch": 0.42, "learning_rate": 4.294851920748812e-05, "loss": 0.7722, "step": 38210 }, { "epoch": 0.42, "learning_rate": 4.2947596480349604e-05, "loss": 0.7335, "step": 38215 }, { "epoch": 0.42, "learning_rate": 4.294667375321109e-05, "loss": 0.7406, "step": 38220 }, { "epoch": 0.42, "learning_rate": 4.294575102607258e-05, "loss": 0.784, "step": 38225 }, { "epoch": 0.42, "learning_rate": 4.294482829893407e-05, "loss": 0.7945, "step": 38230 }, { "epoch": 0.42, "learning_rate": 4.2943905571795555e-05, "loss": 0.7784, "step": 38235 }, { "epoch": 0.42, "learning_rate": 4.294298284465704e-05, "loss": 0.7662, "step": 38240 }, { "epoch": 0.42, "learning_rate": 4.294206011751853e-05, "loss": 0.7991, "step": 38245 }, { "epoch": 0.42, "learning_rate": 4.294113739038002e-05, "loss": 0.769, "step": 38250 }, { "epoch": 0.42, "learning_rate": 4.2940214663241507e-05, "loss": 0.7613, "step": 38255 }, { "epoch": 0.42, "learning_rate": 4.2939291936102994e-05, "loss": 0.7674, "step": 38260 }, { "epoch": 0.42, "learning_rate": 4.293836920896448e-05, "loss": 0.7673, "step": 38265 }, { "epoch": 0.42, "learning_rate": 4.293744648182596e-05, "loss": 0.8136, "step": 38270 }, { "epoch": 0.42, "learning_rate": 4.293652375468746e-05, "loss": 0.8018, "step": 38275 }, { "epoch": 0.42, "learning_rate": 4.2935601027548946e-05, "loss": 0.7261, "step": 38280 }, { "epoch": 0.42, "learning_rate": 4.2934678300410434e-05, "loss": 0.7222, "step": 38285 }, { "epoch": 0.42, "learning_rate": 4.2933755573271915e-05, "loss": 0.7519, "step": 38290 }, { "epoch": 0.42, "learning_rate": 4.293283284613341e-05, "loss": 0.7761, "step": 38295 }, { "epoch": 0.42, "learning_rate": 4.29319101189949e-05, "loss": 0.7199, "step": 38300 }, { "epoch": 0.42, "learning_rate": 4.293098739185638e-05, "loss": 0.7697, "step": 38305 }, { "epoch": 0.42, "learning_rate": 4.2930064664717866e-05, "loss": 0.8237, "step": 38310 }, { "epoch": 0.42, "learning_rate": 4.292914193757936e-05, "loss": 0.6652, "step": 38315 }, { "epoch": 0.42, "learning_rate": 4.292821921044085e-05, "loss": 0.7631, "step": 38320 }, { "epoch": 0.42, "learning_rate": 4.292729648330233e-05, "loss": 0.8497, "step": 38325 }, { "epoch": 0.42, "learning_rate": 4.292637375616382e-05, "loss": 0.7945, "step": 38330 }, { "epoch": 0.42, "learning_rate": 4.2925451029025305e-05, "loss": 0.7755, "step": 38335 }, { "epoch": 0.42, "learning_rate": 4.292452830188679e-05, "loss": 0.7552, "step": 38340 }, { "epoch": 0.42, "learning_rate": 4.292360557474828e-05, "loss": 0.7591, "step": 38345 }, { "epoch": 0.42, "learning_rate": 4.292268284760977e-05, "loss": 0.7631, "step": 38350 }, { "epoch": 0.42, "learning_rate": 4.292176012047126e-05, "loss": 0.7231, "step": 38355 }, { "epoch": 0.42, "learning_rate": 4.2920837393332745e-05, "loss": 0.8256, "step": 38360 }, { "epoch": 0.42, "learning_rate": 4.291991466619423e-05, "loss": 0.7166, "step": 38365 }, { "epoch": 0.42, "learning_rate": 4.291899193905572e-05, "loss": 0.7769, "step": 38370 }, { "epoch": 0.42, "learning_rate": 4.291806921191721e-05, "loss": 0.7625, "step": 38375 }, { "epoch": 0.42, "learning_rate": 4.2917146484778696e-05, "loss": 0.7547, "step": 38380 }, { "epoch": 0.43, "learning_rate": 4.2916223757640184e-05, "loss": 0.7913, "step": 38385 }, { "epoch": 0.43, "learning_rate": 4.291530103050167e-05, "loss": 0.8113, "step": 38390 }, { "epoch": 0.43, "learning_rate": 4.291437830336316e-05, "loss": 0.7663, "step": 38395 }, { "epoch": 0.43, "learning_rate": 4.291345557622465e-05, "loss": 0.7523, "step": 38400 }, { "epoch": 0.43, "learning_rate": 4.2912532849086135e-05, "loss": 0.7735, "step": 38405 }, { "epoch": 0.43, "learning_rate": 4.291161012194762e-05, "loss": 0.7327, "step": 38410 }, { "epoch": 0.43, "learning_rate": 4.2910687394809104e-05, "loss": 0.7956, "step": 38415 }, { "epoch": 0.43, "learning_rate": 4.290976466767059e-05, "loss": 0.7224, "step": 38420 }, { "epoch": 0.43, "learning_rate": 4.2908841940532087e-05, "loss": 0.7703, "step": 38425 }, { "epoch": 0.43, "learning_rate": 4.2907919213393574e-05, "loss": 0.7808, "step": 38430 }, { "epoch": 0.43, "learning_rate": 4.2906996486255056e-05, "loss": 0.7483, "step": 38435 }, { "epoch": 0.43, "learning_rate": 4.290607375911654e-05, "loss": 0.7586, "step": 38440 }, { "epoch": 0.43, "learning_rate": 4.290515103197804e-05, "loss": 0.7914, "step": 38445 }, { "epoch": 0.43, "learning_rate": 4.290422830483952e-05, "loss": 0.7938, "step": 38450 }, { "epoch": 0.43, "learning_rate": 4.290330557770101e-05, "loss": 0.7165, "step": 38455 }, { "epoch": 0.43, "learning_rate": 4.2902382850562495e-05, "loss": 0.773, "step": 38460 }, { "epoch": 0.43, "learning_rate": 4.290146012342399e-05, "loss": 0.7709, "step": 38465 }, { "epoch": 0.43, "learning_rate": 4.290053739628547e-05, "loss": 0.7661, "step": 38470 }, { "epoch": 0.43, "learning_rate": 4.289961466914696e-05, "loss": 0.7438, "step": 38475 }, { "epoch": 0.43, "learning_rate": 4.2898691942008446e-05, "loss": 0.7068, "step": 38480 }, { "epoch": 0.43, "learning_rate": 4.2897769214869934e-05, "loss": 0.7806, "step": 38485 }, { "epoch": 0.43, "learning_rate": 4.289684648773142e-05, "loss": 0.8318, "step": 38490 }, { "epoch": 0.43, "learning_rate": 4.289592376059291e-05, "loss": 0.7482, "step": 38495 }, { "epoch": 0.43, "learning_rate": 4.28950010334544e-05, "loss": 0.8243, "step": 38500 }, { "epoch": 0.43, "learning_rate": 4.2894078306315885e-05, "loss": 0.802, "step": 38505 }, { "epoch": 0.43, "learning_rate": 4.289315557917737e-05, "loss": 0.7879, "step": 38510 }, { "epoch": 0.43, "learning_rate": 4.289223285203886e-05, "loss": 0.7955, "step": 38515 }, { "epoch": 0.43, "learning_rate": 4.289131012490035e-05, "loss": 0.7057, "step": 38520 }, { "epoch": 0.43, "learning_rate": 4.289038739776183e-05, "loss": 0.785, "step": 38525 }, { "epoch": 0.43, "learning_rate": 4.2889464670623325e-05, "loss": 0.7131, "step": 38530 }, { "epoch": 0.43, "learning_rate": 4.288854194348481e-05, "loss": 0.7488, "step": 38535 }, { "epoch": 0.43, "learning_rate": 4.28876192163463e-05, "loss": 0.7634, "step": 38540 }, { "epoch": 0.43, "learning_rate": 4.288669648920778e-05, "loss": 0.757, "step": 38545 }, { "epoch": 0.43, "learning_rate": 4.2885773762069276e-05, "loss": 0.7051, "step": 38550 }, { "epoch": 0.43, "learning_rate": 4.2884851034930764e-05, "loss": 0.7089, "step": 38555 }, { "epoch": 0.43, "learning_rate": 4.2883928307792245e-05, "loss": 0.7631, "step": 38560 }, { "epoch": 0.43, "learning_rate": 4.288300558065373e-05, "loss": 0.7672, "step": 38565 }, { "epoch": 0.43, "learning_rate": 4.288208285351522e-05, "loss": 0.8103, "step": 38570 }, { "epoch": 0.43, "learning_rate": 4.2881160126376715e-05, "loss": 0.7109, "step": 38575 }, { "epoch": 0.43, "learning_rate": 4.2880237399238196e-05, "loss": 0.7338, "step": 38580 }, { "epoch": 0.43, "learning_rate": 4.2879314672099684e-05, "loss": 0.6988, "step": 38585 }, { "epoch": 0.43, "learning_rate": 4.287839194496117e-05, "loss": 0.6934, "step": 38590 }, { "epoch": 0.43, "learning_rate": 4.287746921782267e-05, "loss": 0.7423, "step": 38595 }, { "epoch": 0.43, "learning_rate": 4.287654649068415e-05, "loss": 0.7307, "step": 38600 }, { "epoch": 0.43, "learning_rate": 4.2875623763545636e-05, "loss": 0.7886, "step": 38605 }, { "epoch": 0.43, "learning_rate": 4.2874701036407123e-05, "loss": 0.6788, "step": 38610 }, { "epoch": 0.43, "learning_rate": 4.287377830926861e-05, "loss": 0.7811, "step": 38615 }, { "epoch": 0.43, "learning_rate": 4.28728555821301e-05, "loss": 0.7331, "step": 38620 }, { "epoch": 0.43, "learning_rate": 4.287193285499159e-05, "loss": 0.7962, "step": 38625 }, { "epoch": 0.43, "learning_rate": 4.2871010127853075e-05, "loss": 0.7586, "step": 38630 }, { "epoch": 0.43, "learning_rate": 4.2870087400714556e-05, "loss": 0.8141, "step": 38635 }, { "epoch": 0.43, "learning_rate": 4.286916467357605e-05, "loss": 0.7182, "step": 38640 }, { "epoch": 0.43, "learning_rate": 4.286824194643754e-05, "loss": 0.7577, "step": 38645 }, { "epoch": 0.43, "learning_rate": 4.2867319219299026e-05, "loss": 0.8045, "step": 38650 }, { "epoch": 0.43, "learning_rate": 4.286639649216051e-05, "loss": 0.8047, "step": 38655 }, { "epoch": 0.43, "learning_rate": 4.2865473765022e-05, "loss": 0.7622, "step": 38660 }, { "epoch": 0.43, "learning_rate": 4.286455103788349e-05, "loss": 0.7587, "step": 38665 }, { "epoch": 0.43, "learning_rate": 4.286362831074498e-05, "loss": 0.767, "step": 38670 }, { "epoch": 0.43, "learning_rate": 4.286270558360646e-05, "loss": 0.725, "step": 38675 }, { "epoch": 0.43, "learning_rate": 4.286178285646795e-05, "loss": 0.7838, "step": 38680 }, { "epoch": 0.43, "learning_rate": 4.286086012932944e-05, "loss": 0.7733, "step": 38685 }, { "epoch": 0.43, "learning_rate": 4.285993740219092e-05, "loss": 0.7321, "step": 38690 }, { "epoch": 0.43, "learning_rate": 4.285901467505241e-05, "loss": 0.7191, "step": 38695 }, { "epoch": 0.43, "learning_rate": 4.2858091947913905e-05, "loss": 0.7033, "step": 38700 }, { "epoch": 0.43, "learning_rate": 4.285716922077539e-05, "loss": 0.7485, "step": 38705 }, { "epoch": 0.43, "learning_rate": 4.2856246493636874e-05, "loss": 0.7794, "step": 38710 }, { "epoch": 0.43, "learning_rate": 4.285532376649836e-05, "loss": 0.6906, "step": 38715 }, { "epoch": 0.43, "learning_rate": 4.285440103935985e-05, "loss": 0.7465, "step": 38720 }, { "epoch": 0.43, "learning_rate": 4.285347831222134e-05, "loss": 0.8304, "step": 38725 }, { "epoch": 0.43, "learning_rate": 4.2852555585082825e-05, "loss": 0.808, "step": 38730 }, { "epoch": 0.43, "learning_rate": 4.285163285794431e-05, "loss": 0.7652, "step": 38735 }, { "epoch": 0.43, "learning_rate": 4.28507101308058e-05, "loss": 0.7089, "step": 38740 }, { "epoch": 0.43, "learning_rate": 4.284978740366729e-05, "loss": 0.7998, "step": 38745 }, { "epoch": 0.43, "learning_rate": 4.2848864676528776e-05, "loss": 0.772, "step": 38750 }, { "epoch": 0.43, "learning_rate": 4.2847941949390264e-05, "loss": 0.7455, "step": 38755 }, { "epoch": 0.43, "learning_rate": 4.284701922225175e-05, "loss": 0.756, "step": 38760 }, { "epoch": 0.43, "learning_rate": 4.284609649511324e-05, "loss": 0.7695, "step": 38765 }, { "epoch": 0.43, "learning_rate": 4.284517376797473e-05, "loss": 0.7181, "step": 38770 }, { "epoch": 0.43, "learning_rate": 4.2844251040836216e-05, "loss": 0.7575, "step": 38775 }, { "epoch": 0.43, "learning_rate": 4.2843328313697704e-05, "loss": 0.8278, "step": 38780 }, { "epoch": 0.43, "learning_rate": 4.284240558655919e-05, "loss": 0.7408, "step": 38785 }, { "epoch": 0.43, "learning_rate": 4.284148285942068e-05, "loss": 0.744, "step": 38790 }, { "epoch": 0.43, "learning_rate": 4.284056013228217e-05, "loss": 0.773, "step": 38795 }, { "epoch": 0.43, "learning_rate": 4.283963740514365e-05, "loss": 0.7658, "step": 38800 }, { "epoch": 0.43, "learning_rate": 4.2838714678005136e-05, "loss": 0.7416, "step": 38805 }, { "epoch": 0.43, "learning_rate": 4.283779195086663e-05, "loss": 0.7368, "step": 38810 }, { "epoch": 0.43, "learning_rate": 4.283686922372812e-05, "loss": 0.7584, "step": 38815 }, { "epoch": 0.43, "learning_rate": 4.28359464965896e-05, "loss": 0.7594, "step": 38820 }, { "epoch": 0.43, "learning_rate": 4.283502376945109e-05, "loss": 0.7996, "step": 38825 }, { "epoch": 0.43, "learning_rate": 4.283410104231258e-05, "loss": 0.7418, "step": 38830 }, { "epoch": 0.43, "learning_rate": 4.283317831517406e-05, "loss": 0.7526, "step": 38835 }, { "epoch": 0.43, "learning_rate": 4.283225558803555e-05, "loss": 0.752, "step": 38840 }, { "epoch": 0.43, "learning_rate": 4.283133286089704e-05, "loss": 0.7794, "step": 38845 }, { "epoch": 0.43, "learning_rate": 4.2830410133758533e-05, "loss": 0.759, "step": 38850 }, { "epoch": 0.43, "learning_rate": 4.2829487406620014e-05, "loss": 0.7717, "step": 38855 }, { "epoch": 0.43, "learning_rate": 4.28285646794815e-05, "loss": 0.7153, "step": 38860 }, { "epoch": 0.43, "learning_rate": 4.282764195234299e-05, "loss": 0.8139, "step": 38865 }, { "epoch": 0.43, "learning_rate": 4.282671922520448e-05, "loss": 0.7909, "step": 38870 }, { "epoch": 0.43, "learning_rate": 4.2825796498065966e-05, "loss": 0.7643, "step": 38875 }, { "epoch": 0.43, "learning_rate": 4.2824873770927454e-05, "loss": 0.7723, "step": 38880 }, { "epoch": 0.43, "learning_rate": 4.282395104378894e-05, "loss": 0.7503, "step": 38885 }, { "epoch": 0.43, "learning_rate": 4.282302831665043e-05, "loss": 0.8081, "step": 38890 }, { "epoch": 0.43, "learning_rate": 4.282210558951192e-05, "loss": 0.7456, "step": 38895 }, { "epoch": 0.43, "learning_rate": 4.2821182862373405e-05, "loss": 0.7646, "step": 38900 }, { "epoch": 0.43, "learning_rate": 4.282026013523489e-05, "loss": 0.7688, "step": 38905 }, { "epoch": 0.43, "learning_rate": 4.2819337408096374e-05, "loss": 0.7031, "step": 38910 }, { "epoch": 0.43, "learning_rate": 4.281841468095787e-05, "loss": 0.7272, "step": 38915 }, { "epoch": 0.43, "learning_rate": 4.2817491953819357e-05, "loss": 0.834, "step": 38920 }, { "epoch": 0.43, "learning_rate": 4.2816569226680844e-05, "loss": 0.7939, "step": 38925 }, { "epoch": 0.43, "learning_rate": 4.2815646499542325e-05, "loss": 0.7529, "step": 38930 }, { "epoch": 0.43, "learning_rate": 4.281472377240382e-05, "loss": 0.7876, "step": 38935 }, { "epoch": 0.43, "learning_rate": 4.281380104526531e-05, "loss": 0.8008, "step": 38940 }, { "epoch": 0.43, "learning_rate": 4.281287831812679e-05, "loss": 0.7983, "step": 38945 }, { "epoch": 0.43, "learning_rate": 4.281195559098828e-05, "loss": 0.7611, "step": 38950 }, { "epoch": 0.43, "learning_rate": 4.2811032863849765e-05, "loss": 0.7546, "step": 38955 }, { "epoch": 0.43, "learning_rate": 4.281011013671126e-05, "loss": 0.8154, "step": 38960 }, { "epoch": 0.43, "learning_rate": 4.280918740957274e-05, "loss": 0.7995, "step": 38965 }, { "epoch": 0.43, "learning_rate": 4.280826468243423e-05, "loss": 0.7597, "step": 38970 }, { "epoch": 0.43, "learning_rate": 4.2807341955295716e-05, "loss": 0.7699, "step": 38975 }, { "epoch": 0.43, "learning_rate": 4.280641922815721e-05, "loss": 0.6933, "step": 38980 }, { "epoch": 0.43, "learning_rate": 4.280549650101869e-05, "loss": 0.7535, "step": 38985 }, { "epoch": 0.43, "learning_rate": 4.280457377388018e-05, "loss": 0.7742, "step": 38990 }, { "epoch": 0.43, "learning_rate": 4.280365104674167e-05, "loss": 0.7449, "step": 38995 }, { "epoch": 0.43, "learning_rate": 4.2802728319603155e-05, "loss": 0.789, "step": 39000 }, { "epoch": 0.43, "eval_loss": 0.741820216178894, "eval_runtime": 69.2578, "eval_samples_per_second": 28.878, "eval_steps_per_second": 14.439, "step": 39000 }, { "epoch": 0.43, "learning_rate": 4.280180559246464e-05, "loss": 0.7665, "step": 39005 }, { "epoch": 0.43, "learning_rate": 4.280088286532613e-05, "loss": 0.76, "step": 39010 }, { "epoch": 0.43, "learning_rate": 4.279996013818762e-05, "loss": 0.7823, "step": 39015 }, { "epoch": 0.43, "learning_rate": 4.27990374110491e-05, "loss": 0.8283, "step": 39020 }, { "epoch": 0.43, "learning_rate": 4.2798114683910595e-05, "loss": 0.7511, "step": 39025 }, { "epoch": 0.43, "learning_rate": 4.279719195677208e-05, "loss": 0.7621, "step": 39030 }, { "epoch": 0.43, "learning_rate": 4.279626922963357e-05, "loss": 0.7181, "step": 39035 }, { "epoch": 0.43, "learning_rate": 4.279534650249505e-05, "loss": 0.7881, "step": 39040 }, { "epoch": 0.43, "learning_rate": 4.2794423775356546e-05, "loss": 0.6804, "step": 39045 }, { "epoch": 0.43, "learning_rate": 4.2793501048218034e-05, "loss": 0.7948, "step": 39050 }, { "epoch": 0.43, "learning_rate": 4.279257832107952e-05, "loss": 0.7115, "step": 39055 }, { "epoch": 0.43, "learning_rate": 4.2791655593941e-05, "loss": 0.7652, "step": 39060 }, { "epoch": 0.43, "learning_rate": 4.27907328668025e-05, "loss": 0.8123, "step": 39065 }, { "epoch": 0.43, "learning_rate": 4.2789810139663985e-05, "loss": 0.7446, "step": 39070 }, { "epoch": 0.43, "learning_rate": 4.2788887412525466e-05, "loss": 0.7787, "step": 39075 }, { "epoch": 0.43, "learning_rate": 4.2787964685386954e-05, "loss": 0.7033, "step": 39080 }, { "epoch": 0.43, "learning_rate": 4.278704195824845e-05, "loss": 0.749, "step": 39085 }, { "epoch": 0.43, "learning_rate": 4.2786119231109937e-05, "loss": 0.7857, "step": 39090 }, { "epoch": 0.43, "learning_rate": 4.278519650397142e-05, "loss": 0.7515, "step": 39095 }, { "epoch": 0.43, "learning_rate": 4.2784273776832906e-05, "loss": 0.7309, "step": 39100 }, { "epoch": 0.43, "learning_rate": 4.278335104969439e-05, "loss": 0.7937, "step": 39105 }, { "epoch": 0.43, "learning_rate": 4.278242832255588e-05, "loss": 0.6817, "step": 39110 }, { "epoch": 0.43, "learning_rate": 4.278150559541737e-05, "loss": 0.7323, "step": 39115 }, { "epoch": 0.43, "learning_rate": 4.278058286827886e-05, "loss": 0.8012, "step": 39120 }, { "epoch": 0.43, "learning_rate": 4.2779660141140345e-05, "loss": 0.7411, "step": 39125 }, { "epoch": 0.43, "learning_rate": 4.277873741400183e-05, "loss": 0.7813, "step": 39130 }, { "epoch": 0.43, "learning_rate": 4.277781468686332e-05, "loss": 0.7844, "step": 39135 }, { "epoch": 0.43, "learning_rate": 4.277689195972481e-05, "loss": 0.7462, "step": 39140 }, { "epoch": 0.43, "learning_rate": 4.2775969232586296e-05, "loss": 0.8049, "step": 39145 }, { "epoch": 0.43, "learning_rate": 4.2775046505447784e-05, "loss": 0.7363, "step": 39150 }, { "epoch": 0.43, "learning_rate": 4.277412377830927e-05, "loss": 0.7608, "step": 39155 }, { "epoch": 0.43, "learning_rate": 4.277320105117076e-05, "loss": 0.7507, "step": 39160 }, { "epoch": 0.43, "learning_rate": 4.277227832403225e-05, "loss": 0.7941, "step": 39165 }, { "epoch": 0.43, "learning_rate": 4.277135559689373e-05, "loss": 0.821, "step": 39170 }, { "epoch": 0.43, "learning_rate": 4.277043286975522e-05, "loss": 0.8065, "step": 39175 }, { "epoch": 0.43, "learning_rate": 4.276951014261671e-05, "loss": 0.7922, "step": 39180 }, { "epoch": 0.43, "learning_rate": 4.276858741547819e-05, "loss": 0.7851, "step": 39185 }, { "epoch": 0.43, "learning_rate": 4.276766468833968e-05, "loss": 0.7795, "step": 39190 }, { "epoch": 0.43, "learning_rate": 4.2766741961201175e-05, "loss": 0.7322, "step": 39195 }, { "epoch": 0.43, "learning_rate": 4.276581923406266e-05, "loss": 0.7702, "step": 39200 }, { "epoch": 0.43, "learning_rate": 4.2764896506924144e-05, "loss": 0.7708, "step": 39205 }, { "epoch": 0.43, "learning_rate": 4.276397377978563e-05, "loss": 0.7256, "step": 39210 }, { "epoch": 0.43, "learning_rate": 4.2763051052647126e-05, "loss": 0.7918, "step": 39215 }, { "epoch": 0.43, "learning_rate": 4.276212832550861e-05, "loss": 0.7373, "step": 39220 }, { "epoch": 0.43, "learning_rate": 4.2761205598370095e-05, "loss": 0.7537, "step": 39225 }, { "epoch": 0.43, "learning_rate": 4.276028287123158e-05, "loss": 0.8174, "step": 39230 }, { "epoch": 0.43, "learning_rate": 4.275936014409308e-05, "loss": 0.7083, "step": 39235 }, { "epoch": 0.43, "learning_rate": 4.275843741695456e-05, "loss": 0.7329, "step": 39240 }, { "epoch": 0.43, "learning_rate": 4.2757514689816046e-05, "loss": 0.8163, "step": 39245 }, { "epoch": 0.43, "learning_rate": 4.2756591962677534e-05, "loss": 0.7667, "step": 39250 }, { "epoch": 0.43, "learning_rate": 4.275566923553902e-05, "loss": 0.7598, "step": 39255 }, { "epoch": 0.43, "learning_rate": 4.275474650840051e-05, "loss": 0.7847, "step": 39260 }, { "epoch": 0.43, "learning_rate": 4.2753823781262e-05, "loss": 0.7559, "step": 39265 }, { "epoch": 0.43, "learning_rate": 4.2752901054123486e-05, "loss": 0.7441, "step": 39270 }, { "epoch": 0.43, "learning_rate": 4.2751978326984973e-05, "loss": 0.7631, "step": 39275 }, { "epoch": 0.43, "learning_rate": 4.275105559984646e-05, "loss": 0.7561, "step": 39280 }, { "epoch": 0.43, "learning_rate": 4.275013287270795e-05, "loss": 0.7305, "step": 39285 }, { "epoch": 0.44, "learning_rate": 4.274921014556944e-05, "loss": 0.7356, "step": 39290 }, { "epoch": 0.44, "learning_rate": 4.274828741843092e-05, "loss": 0.7681, "step": 39295 }, { "epoch": 0.44, "learning_rate": 4.274736469129241e-05, "loss": 0.7445, "step": 39300 }, { "epoch": 0.44, "learning_rate": 4.27464419641539e-05, "loss": 0.7213, "step": 39305 }, { "epoch": 0.44, "learning_rate": 4.274551923701539e-05, "loss": 0.7523, "step": 39310 }, { "epoch": 0.44, "learning_rate": 4.274459650987687e-05, "loss": 0.7613, "step": 39315 }, { "epoch": 0.44, "learning_rate": 4.274367378273836e-05, "loss": 0.7436, "step": 39320 }, { "epoch": 0.44, "learning_rate": 4.274275105559985e-05, "loss": 0.7575, "step": 39325 }, { "epoch": 0.44, "learning_rate": 4.274182832846133e-05, "loss": 0.7206, "step": 39330 }, { "epoch": 0.44, "learning_rate": 4.274090560132282e-05, "loss": 0.7566, "step": 39335 }, { "epoch": 0.44, "learning_rate": 4.273998287418431e-05, "loss": 0.7812, "step": 39340 }, { "epoch": 0.44, "learning_rate": 4.27390601470458e-05, "loss": 0.702, "step": 39345 }, { "epoch": 0.44, "learning_rate": 4.2738137419907284e-05, "loss": 0.7881, "step": 39350 }, { "epoch": 0.44, "learning_rate": 4.273721469276877e-05, "loss": 0.7916, "step": 39355 }, { "epoch": 0.44, "learning_rate": 4.273629196563026e-05, "loss": 0.8144, "step": 39360 }, { "epoch": 0.44, "learning_rate": 4.2735369238491755e-05, "loss": 0.7926, "step": 39365 }, { "epoch": 0.44, "learning_rate": 4.2734446511353236e-05, "loss": 0.7233, "step": 39370 }, { "epoch": 0.44, "learning_rate": 4.2733523784214724e-05, "loss": 0.6973, "step": 39375 }, { "epoch": 0.44, "learning_rate": 4.273260105707621e-05, "loss": 0.7432, "step": 39380 }, { "epoch": 0.44, "learning_rate": 4.27316783299377e-05, "loss": 0.7917, "step": 39385 }, { "epoch": 0.44, "learning_rate": 4.273075560279919e-05, "loss": 0.7847, "step": 39390 }, { "epoch": 0.44, "learning_rate": 4.2729832875660675e-05, "loss": 0.7385, "step": 39395 }, { "epoch": 0.44, "learning_rate": 4.272891014852216e-05, "loss": 0.7665, "step": 39400 }, { "epoch": 0.44, "learning_rate": 4.2727987421383644e-05, "loss": 0.7209, "step": 39405 }, { "epoch": 0.44, "learning_rate": 4.272706469424514e-05, "loss": 0.7724, "step": 39410 }, { "epoch": 0.44, "learning_rate": 4.2726141967106626e-05, "loss": 0.7384, "step": 39415 }, { "epoch": 0.44, "learning_rate": 4.2725219239968114e-05, "loss": 0.772, "step": 39420 }, { "epoch": 0.44, "learning_rate": 4.2724296512829595e-05, "loss": 0.7744, "step": 39425 }, { "epoch": 0.44, "learning_rate": 4.272337378569109e-05, "loss": 0.7322, "step": 39430 }, { "epoch": 0.44, "learning_rate": 4.272245105855258e-05, "loss": 0.7585, "step": 39435 }, { "epoch": 0.44, "learning_rate": 4.2721528331414066e-05, "loss": 0.7647, "step": 39440 }, { "epoch": 0.44, "learning_rate": 4.272060560427555e-05, "loss": 0.7866, "step": 39445 }, { "epoch": 0.44, "learning_rate": 4.271968287713704e-05, "loss": 0.7288, "step": 39450 }, { "epoch": 0.44, "learning_rate": 4.271876014999853e-05, "loss": 0.6592, "step": 39455 }, { "epoch": 0.44, "learning_rate": 4.271783742286001e-05, "loss": 0.745, "step": 39460 }, { "epoch": 0.44, "learning_rate": 4.27169146957215e-05, "loss": 0.8269, "step": 39465 }, { "epoch": 0.44, "learning_rate": 4.2715991968582986e-05, "loss": 0.7867, "step": 39470 }, { "epoch": 0.44, "learning_rate": 4.271506924144448e-05, "loss": 0.8052, "step": 39475 }, { "epoch": 0.44, "learning_rate": 4.271414651430596e-05, "loss": 0.7884, "step": 39480 }, { "epoch": 0.44, "learning_rate": 4.271322378716745e-05, "loss": 0.7581, "step": 39485 }, { "epoch": 0.44, "learning_rate": 4.271230106002894e-05, "loss": 0.766, "step": 39490 }, { "epoch": 0.44, "learning_rate": 4.2711378332890425e-05, "loss": 0.7349, "step": 39495 }, { "epoch": 0.44, "learning_rate": 4.271045560575191e-05, "loss": 0.7997, "step": 39500 }, { "epoch": 0.44, "learning_rate": 4.27095328786134e-05, "loss": 0.6656, "step": 39505 }, { "epoch": 0.44, "learning_rate": 4.270861015147489e-05, "loss": 0.7475, "step": 39510 }, { "epoch": 0.44, "learning_rate": 4.270768742433638e-05, "loss": 0.7808, "step": 39515 }, { "epoch": 0.44, "learning_rate": 4.2706764697197864e-05, "loss": 0.7945, "step": 39520 }, { "epoch": 0.44, "learning_rate": 4.270584197005935e-05, "loss": 0.8045, "step": 39525 }, { "epoch": 0.44, "learning_rate": 4.270491924292084e-05, "loss": 0.7968, "step": 39530 }, { "epoch": 0.44, "learning_rate": 4.270399651578233e-05, "loss": 0.7673, "step": 39535 }, { "epoch": 0.44, "learning_rate": 4.2703073788643816e-05, "loss": 0.7703, "step": 39540 }, { "epoch": 0.44, "learning_rate": 4.2702151061505304e-05, "loss": 0.7112, "step": 39545 }, { "epoch": 0.44, "learning_rate": 4.270122833436679e-05, "loss": 0.7261, "step": 39550 }, { "epoch": 0.44, "learning_rate": 4.270030560722827e-05, "loss": 0.7392, "step": 39555 }, { "epoch": 0.44, "learning_rate": 4.269938288008977e-05, "loss": 0.7599, "step": 39560 }, { "epoch": 0.44, "learning_rate": 4.2698460152951255e-05, "loss": 0.7652, "step": 39565 }, { "epoch": 0.44, "learning_rate": 4.2697537425812736e-05, "loss": 0.7075, "step": 39570 }, { "epoch": 0.44, "learning_rate": 4.2696614698674224e-05, "loss": 0.7828, "step": 39575 }, { "epoch": 0.44, "learning_rate": 4.269569197153572e-05, "loss": 0.7921, "step": 39580 }, { "epoch": 0.44, "learning_rate": 4.2694769244397207e-05, "loss": 0.7784, "step": 39585 }, { "epoch": 0.44, "learning_rate": 4.269384651725869e-05, "loss": 0.7492, "step": 39590 }, { "epoch": 0.44, "learning_rate": 4.2692923790120175e-05, "loss": 0.7211, "step": 39595 }, { "epoch": 0.44, "learning_rate": 4.269200106298167e-05, "loss": 0.7697, "step": 39600 }, { "epoch": 0.44, "learning_rate": 4.269107833584315e-05, "loss": 0.7341, "step": 39605 }, { "epoch": 0.44, "learning_rate": 4.269015560870464e-05, "loss": 0.755, "step": 39610 }, { "epoch": 0.44, "learning_rate": 4.268923288156613e-05, "loss": 0.7787, "step": 39615 }, { "epoch": 0.44, "learning_rate": 4.268831015442762e-05, "loss": 0.7015, "step": 39620 }, { "epoch": 0.44, "learning_rate": 4.26873874272891e-05, "loss": 0.776, "step": 39625 }, { "epoch": 0.44, "learning_rate": 4.268646470015059e-05, "loss": 0.767, "step": 39630 }, { "epoch": 0.44, "learning_rate": 4.268554197301208e-05, "loss": 0.747, "step": 39635 }, { "epoch": 0.44, "learning_rate": 4.2684619245873566e-05, "loss": 0.8064, "step": 39640 }, { "epoch": 0.44, "learning_rate": 4.2683696518735054e-05, "loss": 0.7418, "step": 39645 }, { "epoch": 0.44, "learning_rate": 4.268277379159654e-05, "loss": 0.7217, "step": 39650 }, { "epoch": 0.44, "learning_rate": 4.268185106445803e-05, "loss": 0.8775, "step": 39655 }, { "epoch": 0.44, "learning_rate": 4.268092833731952e-05, "loss": 0.7918, "step": 39660 }, { "epoch": 0.44, "learning_rate": 4.2680005610181005e-05, "loss": 0.7842, "step": 39665 }, { "epoch": 0.44, "learning_rate": 4.267908288304249e-05, "loss": 0.7277, "step": 39670 }, { "epoch": 0.44, "learning_rate": 4.267816015590398e-05, "loss": 0.7535, "step": 39675 }, { "epoch": 0.44, "learning_rate": 4.267723742876546e-05, "loss": 0.7486, "step": 39680 }, { "epoch": 0.44, "learning_rate": 4.267631470162696e-05, "loss": 0.7487, "step": 39685 }, { "epoch": 0.44, "learning_rate": 4.2675391974488445e-05, "loss": 0.785, "step": 39690 }, { "epoch": 0.44, "learning_rate": 4.267446924734993e-05, "loss": 0.7841, "step": 39695 }, { "epoch": 0.44, "learning_rate": 4.2673546520211413e-05, "loss": 0.7589, "step": 39700 }, { "epoch": 0.44, "learning_rate": 4.26726237930729e-05, "loss": 0.7268, "step": 39705 }, { "epoch": 0.44, "learning_rate": 4.2671701065934396e-05, "loss": 0.801, "step": 39710 }, { "epoch": 0.44, "learning_rate": 4.267077833879588e-05, "loss": 0.7982, "step": 39715 }, { "epoch": 0.44, "learning_rate": 4.2669855611657365e-05, "loss": 0.726, "step": 39720 }, { "epoch": 0.44, "learning_rate": 4.266893288451885e-05, "loss": 0.6939, "step": 39725 }, { "epoch": 0.44, "learning_rate": 4.266801015738035e-05, "loss": 0.7973, "step": 39730 }, { "epoch": 0.44, "learning_rate": 4.266708743024183e-05, "loss": 0.7452, "step": 39735 }, { "epoch": 0.44, "learning_rate": 4.2666164703103316e-05, "loss": 0.7572, "step": 39740 }, { "epoch": 0.44, "learning_rate": 4.2665241975964804e-05, "loss": 0.7253, "step": 39745 }, { "epoch": 0.44, "learning_rate": 4.26643192488263e-05, "loss": 0.829, "step": 39750 }, { "epoch": 0.44, "learning_rate": 4.266339652168778e-05, "loss": 0.739, "step": 39755 }, { "epoch": 0.44, "learning_rate": 4.266247379454927e-05, "loss": 0.7291, "step": 39760 }, { "epoch": 0.44, "learning_rate": 4.2661551067410756e-05, "loss": 0.8044, "step": 39765 }, { "epoch": 0.44, "learning_rate": 4.266062834027224e-05, "loss": 0.7706, "step": 39770 }, { "epoch": 0.44, "learning_rate": 4.265970561313373e-05, "loss": 0.7405, "step": 39775 }, { "epoch": 0.44, "learning_rate": 4.265878288599522e-05, "loss": 0.7899, "step": 39780 }, { "epoch": 0.44, "learning_rate": 4.265786015885671e-05, "loss": 0.7125, "step": 39785 }, { "epoch": 0.44, "learning_rate": 4.265693743171819e-05, "loss": 0.7349, "step": 39790 }, { "epoch": 0.44, "learning_rate": 4.265601470457968e-05, "loss": 0.7617, "step": 39795 }, { "epoch": 0.44, "learning_rate": 4.265509197744117e-05, "loss": 0.7292, "step": 39800 }, { "epoch": 0.44, "learning_rate": 4.265416925030266e-05, "loss": 0.7913, "step": 39805 }, { "epoch": 0.44, "learning_rate": 4.265324652316414e-05, "loss": 0.7793, "step": 39810 }, { "epoch": 0.44, "learning_rate": 4.2652323796025634e-05, "loss": 0.7695, "step": 39815 }, { "epoch": 0.44, "learning_rate": 4.265140106888712e-05, "loss": 0.7829, "step": 39820 }, { "epoch": 0.44, "learning_rate": 4.265047834174861e-05, "loss": 0.7266, "step": 39825 }, { "epoch": 0.44, "learning_rate": 4.264955561461009e-05, "loss": 0.7938, "step": 39830 }, { "epoch": 0.44, "learning_rate": 4.2648632887471585e-05, "loss": 0.8006, "step": 39835 }, { "epoch": 0.44, "learning_rate": 4.264771016033307e-05, "loss": 0.76, "step": 39840 }, { "epoch": 0.44, "learning_rate": 4.2646787433194554e-05, "loss": 0.6777, "step": 39845 }, { "epoch": 0.44, "learning_rate": 4.264586470605604e-05, "loss": 0.8308, "step": 39850 }, { "epoch": 0.44, "learning_rate": 4.264494197891753e-05, "loss": 0.7643, "step": 39855 }, { "epoch": 0.44, "learning_rate": 4.2644019251779025e-05, "loss": 0.8004, "step": 39860 }, { "epoch": 0.44, "learning_rate": 4.2643096524640506e-05, "loss": 0.7816, "step": 39865 }, { "epoch": 0.44, "learning_rate": 4.2642173797501994e-05, "loss": 0.7745, "step": 39870 }, { "epoch": 0.44, "learning_rate": 4.264125107036348e-05, "loss": 0.7606, "step": 39875 }, { "epoch": 0.44, "learning_rate": 4.264032834322497e-05, "loss": 0.8445, "step": 39880 }, { "epoch": 0.44, "learning_rate": 4.263940561608646e-05, "loss": 0.7391, "step": 39885 }, { "epoch": 0.44, "learning_rate": 4.2638482888947945e-05, "loss": 0.7908, "step": 39890 }, { "epoch": 0.44, "learning_rate": 4.263756016180943e-05, "loss": 0.7768, "step": 39895 }, { "epoch": 0.44, "learning_rate": 4.263663743467092e-05, "loss": 0.7576, "step": 39900 }, { "epoch": 0.44, "learning_rate": 4.263571470753241e-05, "loss": 0.7707, "step": 39905 }, { "epoch": 0.44, "learning_rate": 4.2634791980393896e-05, "loss": 0.6843, "step": 39910 }, { "epoch": 0.44, "learning_rate": 4.2633869253255384e-05, "loss": 0.7719, "step": 39915 }, { "epoch": 0.44, "learning_rate": 4.263294652611687e-05, "loss": 0.7639, "step": 39920 }, { "epoch": 0.44, "learning_rate": 4.263202379897836e-05, "loss": 0.7635, "step": 39925 }, { "epoch": 0.44, "learning_rate": 4.263110107183985e-05, "loss": 0.7884, "step": 39930 }, { "epoch": 0.44, "learning_rate": 4.2630178344701336e-05, "loss": 0.8131, "step": 39935 }, { "epoch": 0.44, "learning_rate": 4.262925561756282e-05, "loss": 0.7689, "step": 39940 }, { "epoch": 0.44, "learning_rate": 4.262833289042431e-05, "loss": 0.7463, "step": 39945 }, { "epoch": 0.44, "learning_rate": 4.26274101632858e-05, "loss": 0.7752, "step": 39950 }, { "epoch": 0.44, "learning_rate": 4.262648743614728e-05, "loss": 0.7157, "step": 39955 }, { "epoch": 0.44, "learning_rate": 4.262556470900877e-05, "loss": 0.7537, "step": 39960 }, { "epoch": 0.44, "learning_rate": 4.262464198187026e-05, "loss": 0.7396, "step": 39965 }, { "epoch": 0.44, "learning_rate": 4.262371925473175e-05, "loss": 0.6863, "step": 39970 }, { "epoch": 0.44, "learning_rate": 4.262279652759323e-05, "loss": 0.7337, "step": 39975 }, { "epoch": 0.44, "learning_rate": 4.262187380045472e-05, "loss": 0.7164, "step": 39980 }, { "epoch": 0.44, "learning_rate": 4.2620951073316214e-05, "loss": 0.7576, "step": 39985 }, { "epoch": 0.44, "learning_rate": 4.2620028346177695e-05, "loss": 0.7586, "step": 39990 }, { "epoch": 0.44, "learning_rate": 4.261910561903918e-05, "loss": 0.7743, "step": 39995 }, { "epoch": 0.44, "learning_rate": 4.261818289190067e-05, "loss": 0.7296, "step": 40000 }, { "epoch": 0.44, "eval_loss": 0.7325079441070557, "eval_runtime": 69.315, "eval_samples_per_second": 28.854, "eval_steps_per_second": 14.427, "step": 40000 }, { "epoch": 0.44, "learning_rate": 4.261726016476216e-05, "loss": 0.7983, "step": 40005 }, { "epoch": 0.44, "learning_rate": 4.2616337437623647e-05, "loss": 0.7846, "step": 40010 }, { "epoch": 0.44, "learning_rate": 4.2615414710485134e-05, "loss": 0.7854, "step": 40015 }, { "epoch": 0.44, "learning_rate": 4.261449198334662e-05, "loss": 0.777, "step": 40020 }, { "epoch": 0.44, "learning_rate": 4.261356925620811e-05, "loss": 0.7419, "step": 40025 }, { "epoch": 0.44, "learning_rate": 4.26126465290696e-05, "loss": 0.7253, "step": 40030 }, { "epoch": 0.44, "learning_rate": 4.2611723801931086e-05, "loss": 0.725, "step": 40035 }, { "epoch": 0.44, "learning_rate": 4.2610801074792574e-05, "loss": 0.7294, "step": 40040 }, { "epoch": 0.44, "learning_rate": 4.260987834765406e-05, "loss": 0.7859, "step": 40045 }, { "epoch": 0.44, "learning_rate": 4.260895562051555e-05, "loss": 0.7395, "step": 40050 }, { "epoch": 0.44, "learning_rate": 4.260803289337704e-05, "loss": 0.7875, "step": 40055 }, { "epoch": 0.44, "learning_rate": 4.2607110166238525e-05, "loss": 0.8029, "step": 40060 }, { "epoch": 0.44, "learning_rate": 4.2606187439100006e-05, "loss": 0.7297, "step": 40065 }, { "epoch": 0.44, "learning_rate": 4.26052647119615e-05, "loss": 0.8152, "step": 40070 }, { "epoch": 0.44, "learning_rate": 4.260434198482299e-05, "loss": 0.7842, "step": 40075 }, { "epoch": 0.44, "learning_rate": 4.2603419257684476e-05, "loss": 0.8302, "step": 40080 }, { "epoch": 0.44, "learning_rate": 4.260249653054596e-05, "loss": 0.7387, "step": 40085 }, { "epoch": 0.44, "learning_rate": 4.2601573803407445e-05, "loss": 0.7646, "step": 40090 }, { "epoch": 0.44, "learning_rate": 4.260065107626894e-05, "loss": 0.7628, "step": 40095 }, { "epoch": 0.44, "learning_rate": 4.259972834913043e-05, "loss": 0.7243, "step": 40100 }, { "epoch": 0.44, "learning_rate": 4.259880562199191e-05, "loss": 0.7671, "step": 40105 }, { "epoch": 0.44, "learning_rate": 4.25978828948534e-05, "loss": 0.7855, "step": 40110 }, { "epoch": 0.44, "learning_rate": 4.259696016771489e-05, "loss": 0.7074, "step": 40115 }, { "epoch": 0.44, "learning_rate": 4.259603744057637e-05, "loss": 0.8146, "step": 40120 }, { "epoch": 0.44, "learning_rate": 4.259511471343786e-05, "loss": 0.7364, "step": 40125 }, { "epoch": 0.44, "learning_rate": 4.259419198629935e-05, "loss": 0.7767, "step": 40130 }, { "epoch": 0.44, "learning_rate": 4.259326925916084e-05, "loss": 0.7665, "step": 40135 }, { "epoch": 0.44, "learning_rate": 4.2592346532022324e-05, "loss": 0.8116, "step": 40140 }, { "epoch": 0.44, "learning_rate": 4.259142380488381e-05, "loss": 0.7283, "step": 40145 }, { "epoch": 0.44, "learning_rate": 4.25905010777453e-05, "loss": 0.7474, "step": 40150 }, { "epoch": 0.44, "learning_rate": 4.258957835060679e-05, "loss": 0.8025, "step": 40155 }, { "epoch": 0.44, "learning_rate": 4.2588655623468275e-05, "loss": 0.7648, "step": 40160 }, { "epoch": 0.44, "learning_rate": 4.258773289632976e-05, "loss": 0.744, "step": 40165 }, { "epoch": 0.44, "learning_rate": 4.258681016919125e-05, "loss": 0.7122, "step": 40170 }, { "epoch": 0.44, "learning_rate": 4.258588744205274e-05, "loss": 0.737, "step": 40175 }, { "epoch": 0.44, "learning_rate": 4.258496471491423e-05, "loss": 0.7344, "step": 40180 }, { "epoch": 0.44, "learning_rate": 4.2584041987775714e-05, "loss": 0.77, "step": 40185 }, { "epoch": 0.45, "learning_rate": 4.25831192606372e-05, "loss": 0.7778, "step": 40190 }, { "epoch": 0.45, "learning_rate": 4.258219653349868e-05, "loss": 0.7524, "step": 40195 }, { "epoch": 0.45, "learning_rate": 4.258127380636018e-05, "loss": 0.7506, "step": 40200 }, { "epoch": 0.45, "learning_rate": 4.2580351079221666e-05, "loss": 0.794, "step": 40205 }, { "epoch": 0.45, "learning_rate": 4.2579428352083154e-05, "loss": 0.7327, "step": 40210 }, { "epoch": 0.45, "learning_rate": 4.2578505624944635e-05, "loss": 0.8092, "step": 40215 }, { "epoch": 0.45, "learning_rate": 4.257758289780613e-05, "loss": 0.6799, "step": 40220 }, { "epoch": 0.45, "learning_rate": 4.257666017066762e-05, "loss": 0.7477, "step": 40225 }, { "epoch": 0.45, "learning_rate": 4.25757374435291e-05, "loss": 0.8081, "step": 40230 }, { "epoch": 0.45, "learning_rate": 4.2574814716390586e-05, "loss": 0.8037, "step": 40235 }, { "epoch": 0.45, "learning_rate": 4.2573891989252074e-05, "loss": 0.7664, "step": 40240 }, { "epoch": 0.45, "learning_rate": 4.257296926211357e-05, "loss": 0.7801, "step": 40245 }, { "epoch": 0.45, "learning_rate": 4.257204653497505e-05, "loss": 0.7787, "step": 40250 }, { "epoch": 0.45, "learning_rate": 4.257112380783654e-05, "loss": 0.759, "step": 40255 }, { "epoch": 0.45, "learning_rate": 4.2570201080698025e-05, "loss": 0.801, "step": 40260 }, { "epoch": 0.45, "learning_rate": 4.256927835355951e-05, "loss": 0.7821, "step": 40265 }, { "epoch": 0.45, "learning_rate": 4.2568355626421e-05, "loss": 0.7139, "step": 40270 }, { "epoch": 0.45, "learning_rate": 4.256743289928249e-05, "loss": 0.7916, "step": 40275 }, { "epoch": 0.45, "learning_rate": 4.256651017214398e-05, "loss": 0.7169, "step": 40280 }, { "epoch": 0.45, "learning_rate": 4.2565587445005465e-05, "loss": 0.76, "step": 40285 }, { "epoch": 0.45, "learning_rate": 4.256466471786695e-05, "loss": 0.7355, "step": 40290 }, { "epoch": 0.45, "learning_rate": 4.256374199072844e-05, "loss": 0.7087, "step": 40295 }, { "epoch": 0.45, "learning_rate": 4.256281926358993e-05, "loss": 0.7261, "step": 40300 }, { "epoch": 0.45, "learning_rate": 4.256189653645141e-05, "loss": 0.7971, "step": 40305 }, { "epoch": 0.45, "learning_rate": 4.2560973809312904e-05, "loss": 0.7505, "step": 40310 }, { "epoch": 0.45, "learning_rate": 4.256005108217439e-05, "loss": 0.7999, "step": 40315 }, { "epoch": 0.45, "learning_rate": 4.255912835503588e-05, "loss": 0.7548, "step": 40320 }, { "epoch": 0.45, "learning_rate": 4.255820562789736e-05, "loss": 0.7669, "step": 40325 }, { "epoch": 0.45, "learning_rate": 4.2557282900758855e-05, "loss": 0.7783, "step": 40330 }, { "epoch": 0.45, "learning_rate": 4.255636017362034e-05, "loss": 0.7423, "step": 40335 }, { "epoch": 0.45, "learning_rate": 4.2555437446481824e-05, "loss": 0.7731, "step": 40340 }, { "epoch": 0.45, "learning_rate": 4.255451471934331e-05, "loss": 0.8012, "step": 40345 }, { "epoch": 0.45, "learning_rate": 4.255359199220481e-05, "loss": 0.7976, "step": 40350 }, { "epoch": 0.45, "learning_rate": 4.2552669265066295e-05, "loss": 0.7748, "step": 40355 }, { "epoch": 0.45, "learning_rate": 4.2551746537927776e-05, "loss": 0.7119, "step": 40360 }, { "epoch": 0.45, "learning_rate": 4.2550823810789263e-05, "loss": 0.7701, "step": 40365 }, { "epoch": 0.45, "learning_rate": 4.254990108365076e-05, "loss": 0.7035, "step": 40370 }, { "epoch": 0.45, "learning_rate": 4.254897835651224e-05, "loss": 0.7835, "step": 40375 }, { "epoch": 0.45, "learning_rate": 4.254805562937373e-05, "loss": 0.7216, "step": 40380 }, { "epoch": 0.45, "learning_rate": 4.2547132902235215e-05, "loss": 0.7281, "step": 40385 }, { "epoch": 0.45, "learning_rate": 4.25462101750967e-05, "loss": 0.7453, "step": 40390 }, { "epoch": 0.45, "learning_rate": 4.254528744795819e-05, "loss": 0.7892, "step": 40395 }, { "epoch": 0.45, "learning_rate": 4.254436472081968e-05, "loss": 0.7731, "step": 40400 }, { "epoch": 0.45, "learning_rate": 4.2543441993681166e-05, "loss": 0.7554, "step": 40405 }, { "epoch": 0.45, "learning_rate": 4.2542519266542654e-05, "loss": 0.7535, "step": 40410 }, { "epoch": 0.45, "learning_rate": 4.254159653940414e-05, "loss": 0.7687, "step": 40415 }, { "epoch": 0.45, "learning_rate": 4.254067381226563e-05, "loss": 0.7785, "step": 40420 }, { "epoch": 0.45, "learning_rate": 4.253975108512712e-05, "loss": 0.7464, "step": 40425 }, { "epoch": 0.45, "learning_rate": 4.2538828357988605e-05, "loss": 0.741, "step": 40430 }, { "epoch": 0.45, "learning_rate": 4.253790563085009e-05, "loss": 0.8066, "step": 40435 }, { "epoch": 0.45, "learning_rate": 4.253698290371158e-05, "loss": 0.7642, "step": 40440 }, { "epoch": 0.45, "learning_rate": 4.253606017657307e-05, "loss": 0.6956, "step": 40445 }, { "epoch": 0.45, "learning_rate": 4.253513744943455e-05, "loss": 0.8013, "step": 40450 }, { "epoch": 0.45, "learning_rate": 4.2534214722296045e-05, "loss": 0.8131, "step": 40455 }, { "epoch": 0.45, "learning_rate": 4.253329199515753e-05, "loss": 0.723, "step": 40460 }, { "epoch": 0.45, "learning_rate": 4.253236926801902e-05, "loss": 0.7948, "step": 40465 }, { "epoch": 0.45, "learning_rate": 4.25314465408805e-05, "loss": 0.7095, "step": 40470 }, { "epoch": 0.45, "learning_rate": 4.253052381374199e-05, "loss": 0.7088, "step": 40475 }, { "epoch": 0.45, "learning_rate": 4.2529601086603484e-05, "loss": 0.7071, "step": 40480 }, { "epoch": 0.45, "learning_rate": 4.252867835946497e-05, "loss": 0.7806, "step": 40485 }, { "epoch": 0.45, "learning_rate": 4.252775563232645e-05, "loss": 0.7727, "step": 40490 }, { "epoch": 0.45, "learning_rate": 4.252683290518794e-05, "loss": 0.7579, "step": 40495 }, { "epoch": 0.45, "learning_rate": 4.2525910178049435e-05, "loss": 0.7375, "step": 40500 }, { "epoch": 0.45, "learning_rate": 4.2524987450910916e-05, "loss": 0.7679, "step": 40505 }, { "epoch": 0.45, "learning_rate": 4.2524064723772404e-05, "loss": 0.7158, "step": 40510 }, { "epoch": 0.45, "learning_rate": 4.252314199663389e-05, "loss": 0.7541, "step": 40515 }, { "epoch": 0.45, "learning_rate": 4.252221926949539e-05, "loss": 0.7356, "step": 40520 }, { "epoch": 0.45, "learning_rate": 4.252129654235687e-05, "loss": 0.7507, "step": 40525 }, { "epoch": 0.45, "learning_rate": 4.2520373815218356e-05, "loss": 0.7669, "step": 40530 }, { "epoch": 0.45, "learning_rate": 4.2519451088079844e-05, "loss": 0.7536, "step": 40535 }, { "epoch": 0.45, "learning_rate": 4.251852836094133e-05, "loss": 0.7729, "step": 40540 }, { "epoch": 0.45, "learning_rate": 4.251760563380282e-05, "loss": 0.7449, "step": 40545 }, { "epoch": 0.45, "learning_rate": 4.251668290666431e-05, "loss": 0.779, "step": 40550 }, { "epoch": 0.45, "learning_rate": 4.2515760179525795e-05, "loss": 0.7601, "step": 40555 }, { "epoch": 0.45, "learning_rate": 4.251483745238728e-05, "loss": 0.7971, "step": 40560 }, { "epoch": 0.45, "learning_rate": 4.251391472524877e-05, "loss": 0.7186, "step": 40565 }, { "epoch": 0.45, "learning_rate": 4.251299199811026e-05, "loss": 0.7427, "step": 40570 }, { "epoch": 0.45, "learning_rate": 4.2512069270971746e-05, "loss": 0.6879, "step": 40575 }, { "epoch": 0.45, "learning_rate": 4.251114654383323e-05, "loss": 0.7469, "step": 40580 }, { "epoch": 0.45, "learning_rate": 4.251022381669472e-05, "loss": 0.7309, "step": 40585 }, { "epoch": 0.45, "learning_rate": 4.250930108955621e-05, "loss": 0.7422, "step": 40590 }, { "epoch": 0.45, "learning_rate": 4.25083783624177e-05, "loss": 0.7718, "step": 40595 }, { "epoch": 0.45, "learning_rate": 4.250745563527918e-05, "loss": 0.7282, "step": 40600 }, { "epoch": 0.45, "learning_rate": 4.2506532908140673e-05, "loss": 0.7704, "step": 40605 }, { "epoch": 0.45, "learning_rate": 4.250561018100216e-05, "loss": 0.7375, "step": 40610 }, { "epoch": 0.45, "learning_rate": 4.250468745386364e-05, "loss": 0.7631, "step": 40615 }, { "epoch": 0.45, "learning_rate": 4.250376472672513e-05, "loss": 0.7477, "step": 40620 }, { "epoch": 0.45, "learning_rate": 4.250284199958662e-05, "loss": 0.7364, "step": 40625 }, { "epoch": 0.45, "learning_rate": 4.250191927244811e-05, "loss": 0.7333, "step": 40630 }, { "epoch": 0.45, "learning_rate": 4.2500996545309594e-05, "loss": 0.8001, "step": 40635 }, { "epoch": 0.45, "learning_rate": 4.250007381817108e-05, "loss": 0.7508, "step": 40640 }, { "epoch": 0.45, "learning_rate": 4.249915109103257e-05, "loss": 0.7304, "step": 40645 }, { "epoch": 0.45, "learning_rate": 4.249822836389406e-05, "loss": 0.7503, "step": 40650 }, { "epoch": 0.45, "learning_rate": 4.2497305636755545e-05, "loss": 0.7477, "step": 40655 }, { "epoch": 0.45, "learning_rate": 4.249638290961703e-05, "loss": 0.7277, "step": 40660 }, { "epoch": 0.45, "learning_rate": 4.249546018247852e-05, "loss": 0.7374, "step": 40665 }, { "epoch": 0.45, "learning_rate": 4.249453745534001e-05, "loss": 0.7103, "step": 40670 }, { "epoch": 0.45, "learning_rate": 4.2493614728201497e-05, "loss": 0.7504, "step": 40675 }, { "epoch": 0.45, "learning_rate": 4.2492692001062984e-05, "loss": 0.7451, "step": 40680 }, { "epoch": 0.45, "learning_rate": 4.249176927392447e-05, "loss": 0.7181, "step": 40685 }, { "epoch": 0.45, "learning_rate": 4.249084654678595e-05, "loss": 0.7681, "step": 40690 }, { "epoch": 0.45, "learning_rate": 4.248992381964745e-05, "loss": 0.762, "step": 40695 }, { "epoch": 0.45, "learning_rate": 4.2489001092508936e-05, "loss": 0.729, "step": 40700 }, { "epoch": 0.45, "learning_rate": 4.2488078365370424e-05, "loss": 0.7707, "step": 40705 }, { "epoch": 0.45, "learning_rate": 4.2487155638231905e-05, "loss": 0.7796, "step": 40710 }, { "epoch": 0.45, "learning_rate": 4.24862329110934e-05, "loss": 0.7311, "step": 40715 }, { "epoch": 0.45, "learning_rate": 4.248531018395489e-05, "loss": 0.7629, "step": 40720 }, { "epoch": 0.45, "learning_rate": 4.248438745681637e-05, "loss": 0.7659, "step": 40725 }, { "epoch": 0.45, "learning_rate": 4.2483464729677856e-05, "loss": 0.755, "step": 40730 }, { "epoch": 0.45, "learning_rate": 4.248254200253935e-05, "loss": 0.7961, "step": 40735 }, { "epoch": 0.45, "learning_rate": 4.248161927540084e-05, "loss": 0.6923, "step": 40740 }, { "epoch": 0.45, "learning_rate": 4.248069654826232e-05, "loss": 0.7552, "step": 40745 }, { "epoch": 0.45, "learning_rate": 4.247977382112381e-05, "loss": 0.7939, "step": 40750 }, { "epoch": 0.45, "learning_rate": 4.24788510939853e-05, "loss": 0.7776, "step": 40755 }, { "epoch": 0.45, "learning_rate": 4.247792836684678e-05, "loss": 0.6821, "step": 40760 }, { "epoch": 0.45, "learning_rate": 4.247700563970827e-05, "loss": 0.7656, "step": 40765 }, { "epoch": 0.45, "learning_rate": 4.247608291256976e-05, "loss": 0.7251, "step": 40770 }, { "epoch": 0.45, "learning_rate": 4.247516018543125e-05, "loss": 0.7574, "step": 40775 }, { "epoch": 0.45, "learning_rate": 4.2474237458292735e-05, "loss": 0.7902, "step": 40780 }, { "epoch": 0.45, "learning_rate": 4.247331473115422e-05, "loss": 0.7461, "step": 40785 }, { "epoch": 0.45, "learning_rate": 4.247239200401571e-05, "loss": 0.8089, "step": 40790 }, { "epoch": 0.45, "learning_rate": 4.24714692768772e-05, "loss": 0.7961, "step": 40795 }, { "epoch": 0.45, "learning_rate": 4.2470546549738686e-05, "loss": 0.7332, "step": 40800 }, { "epoch": 0.45, "learning_rate": 4.2469623822600174e-05, "loss": 0.7493, "step": 40805 }, { "epoch": 0.45, "learning_rate": 4.246870109546166e-05, "loss": 0.7891, "step": 40810 }, { "epoch": 0.45, "learning_rate": 4.246777836832315e-05, "loss": 0.762, "step": 40815 }, { "epoch": 0.45, "learning_rate": 4.246685564118464e-05, "loss": 0.7267, "step": 40820 }, { "epoch": 0.45, "learning_rate": 4.2465932914046125e-05, "loss": 0.8002, "step": 40825 }, { "epoch": 0.45, "learning_rate": 4.246501018690761e-05, "loss": 0.7505, "step": 40830 }, { "epoch": 0.45, "learning_rate": 4.2464087459769094e-05, "loss": 0.7325, "step": 40835 }, { "epoch": 0.45, "learning_rate": 4.246316473263058e-05, "loss": 0.6888, "step": 40840 }, { "epoch": 0.45, "learning_rate": 4.2462242005492077e-05, "loss": 0.7971, "step": 40845 }, { "epoch": 0.45, "learning_rate": 4.2461319278353564e-05, "loss": 0.8108, "step": 40850 }, { "epoch": 0.45, "learning_rate": 4.2460396551215046e-05, "loss": 0.795, "step": 40855 }, { "epoch": 0.45, "learning_rate": 4.245947382407653e-05, "loss": 0.7399, "step": 40860 }, { "epoch": 0.45, "learning_rate": 4.245855109693803e-05, "loss": 0.7666, "step": 40865 }, { "epoch": 0.45, "learning_rate": 4.2457628369799516e-05, "loss": 0.7413, "step": 40870 }, { "epoch": 0.45, "learning_rate": 4.2456705642661e-05, "loss": 0.8086, "step": 40875 }, { "epoch": 0.45, "learning_rate": 4.2455782915522485e-05, "loss": 0.6626, "step": 40880 }, { "epoch": 0.45, "learning_rate": 4.245486018838398e-05, "loss": 0.7806, "step": 40885 }, { "epoch": 0.45, "learning_rate": 4.245393746124546e-05, "loss": 0.7852, "step": 40890 }, { "epoch": 0.45, "learning_rate": 4.245301473410695e-05, "loss": 0.6635, "step": 40895 }, { "epoch": 0.45, "learning_rate": 4.2452092006968436e-05, "loss": 0.7446, "step": 40900 }, { "epoch": 0.45, "learning_rate": 4.245116927982993e-05, "loss": 0.7247, "step": 40905 }, { "epoch": 0.45, "learning_rate": 4.245024655269141e-05, "loss": 0.7989, "step": 40910 }, { "epoch": 0.45, "learning_rate": 4.24493238255529e-05, "loss": 0.7316, "step": 40915 }, { "epoch": 0.45, "learning_rate": 4.244840109841439e-05, "loss": 0.8155, "step": 40920 }, { "epoch": 0.45, "learning_rate": 4.2447478371275875e-05, "loss": 0.776, "step": 40925 }, { "epoch": 0.45, "learning_rate": 4.244655564413736e-05, "loss": 0.7262, "step": 40930 }, { "epoch": 0.45, "learning_rate": 4.244563291699885e-05, "loss": 0.7307, "step": 40935 }, { "epoch": 0.45, "learning_rate": 4.244471018986034e-05, "loss": 0.7832, "step": 40940 }, { "epoch": 0.45, "learning_rate": 4.244378746272183e-05, "loss": 0.7862, "step": 40945 }, { "epoch": 0.45, "learning_rate": 4.2442864735583315e-05, "loss": 0.7337, "step": 40950 }, { "epoch": 0.45, "learning_rate": 4.24419420084448e-05, "loss": 0.7613, "step": 40955 }, { "epoch": 0.45, "learning_rate": 4.244101928130629e-05, "loss": 0.7364, "step": 40960 }, { "epoch": 0.45, "learning_rate": 4.244009655416777e-05, "loss": 0.8204, "step": 40965 }, { "epoch": 0.45, "learning_rate": 4.2439173827029266e-05, "loss": 0.7259, "step": 40970 }, { "epoch": 0.45, "learning_rate": 4.2438251099890754e-05, "loss": 0.7504, "step": 40975 }, { "epoch": 0.45, "learning_rate": 4.243732837275224e-05, "loss": 0.8151, "step": 40980 }, { "epoch": 0.45, "learning_rate": 4.243640564561372e-05, "loss": 0.6967, "step": 40985 }, { "epoch": 0.45, "learning_rate": 4.243548291847521e-05, "loss": 0.7355, "step": 40990 }, { "epoch": 0.45, "learning_rate": 4.2434560191336705e-05, "loss": 0.7542, "step": 40995 }, { "epoch": 0.45, "learning_rate": 4.2433637464198186e-05, "loss": 0.7363, "step": 41000 }, { "epoch": 0.45, "eval_loss": 0.7134051322937012, "eval_runtime": 69.317, "eval_samples_per_second": 28.853, "eval_steps_per_second": 14.426, "step": 41000 }, { "epoch": 0.45, "learning_rate": 4.2432714737059674e-05, "loss": 0.7289, "step": 41005 }, { "epoch": 0.45, "learning_rate": 4.243179200992116e-05, "loss": 0.7416, "step": 41010 }, { "epoch": 0.45, "learning_rate": 4.243086928278266e-05, "loss": 0.7751, "step": 41015 }, { "epoch": 0.45, "learning_rate": 4.242994655564414e-05, "loss": 0.806, "step": 41020 }, { "epoch": 0.45, "learning_rate": 4.2429023828505626e-05, "loss": 0.7695, "step": 41025 }, { "epoch": 0.45, "learning_rate": 4.2428101101367113e-05, "loss": 0.6906, "step": 41030 }, { "epoch": 0.45, "learning_rate": 4.24271783742286e-05, "loss": 0.8058, "step": 41035 }, { "epoch": 0.45, "learning_rate": 4.242625564709009e-05, "loss": 0.736, "step": 41040 }, { "epoch": 0.45, "learning_rate": 4.242533291995158e-05, "loss": 0.8124, "step": 41045 }, { "epoch": 0.45, "learning_rate": 4.2424410192813065e-05, "loss": 0.7645, "step": 41050 }, { "epoch": 0.45, "learning_rate": 4.242348746567455e-05, "loss": 0.7393, "step": 41055 }, { "epoch": 0.45, "learning_rate": 4.242256473853604e-05, "loss": 0.7491, "step": 41060 }, { "epoch": 0.45, "learning_rate": 4.242164201139753e-05, "loss": 0.8089, "step": 41065 }, { "epoch": 0.45, "learning_rate": 4.2420719284259016e-05, "loss": 0.7559, "step": 41070 }, { "epoch": 0.45, "learning_rate": 4.24197965571205e-05, "loss": 0.7877, "step": 41075 }, { "epoch": 0.45, "learning_rate": 4.241887382998199e-05, "loss": 0.7504, "step": 41080 }, { "epoch": 0.45, "learning_rate": 4.241795110284348e-05, "loss": 0.6763, "step": 41085 }, { "epoch": 0.45, "learning_rate": 4.241702837570497e-05, "loss": 0.7399, "step": 41090 }, { "epoch": 0.46, "learning_rate": 4.241610564856645e-05, "loss": 0.718, "step": 41095 }, { "epoch": 0.46, "learning_rate": 4.241518292142794e-05, "loss": 0.7597, "step": 41100 }, { "epoch": 0.46, "learning_rate": 4.241426019428943e-05, "loss": 0.7739, "step": 41105 }, { "epoch": 0.46, "learning_rate": 4.241333746715091e-05, "loss": 0.7466, "step": 41110 }, { "epoch": 0.46, "learning_rate": 4.24124147400124e-05, "loss": 0.7843, "step": 41115 }, { "epoch": 0.46, "learning_rate": 4.2411492012873895e-05, "loss": 0.785, "step": 41120 }, { "epoch": 0.46, "learning_rate": 4.241056928573538e-05, "loss": 0.7337, "step": 41125 }, { "epoch": 0.46, "learning_rate": 4.2409646558596864e-05, "loss": 0.7416, "step": 41130 }, { "epoch": 0.46, "learning_rate": 4.240872383145835e-05, "loss": 0.6917, "step": 41135 }, { "epoch": 0.46, "learning_rate": 4.240780110431984e-05, "loss": 0.8139, "step": 41140 }, { "epoch": 0.46, "learning_rate": 4.240687837718133e-05, "loss": 0.7755, "step": 41145 }, { "epoch": 0.46, "learning_rate": 4.2405955650042815e-05, "loss": 0.7455, "step": 41150 }, { "epoch": 0.46, "learning_rate": 4.24050329229043e-05, "loss": 0.7449, "step": 41155 }, { "epoch": 0.46, "learning_rate": 4.240411019576579e-05, "loss": 0.8097, "step": 41160 }, { "epoch": 0.46, "learning_rate": 4.240318746862728e-05, "loss": 0.7674, "step": 41165 }, { "epoch": 0.46, "learning_rate": 4.2402264741488766e-05, "loss": 0.7472, "step": 41170 }, { "epoch": 0.46, "learning_rate": 4.2401342014350254e-05, "loss": 0.6947, "step": 41175 }, { "epoch": 0.46, "learning_rate": 4.240041928721174e-05, "loss": 0.7628, "step": 41180 }, { "epoch": 0.46, "learning_rate": 4.239949656007323e-05, "loss": 0.7887, "step": 41185 }, { "epoch": 0.46, "learning_rate": 4.239857383293472e-05, "loss": 0.7681, "step": 41190 }, { "epoch": 0.46, "learning_rate": 4.2397651105796206e-05, "loss": 0.7833, "step": 41195 }, { "epoch": 0.46, "learning_rate": 4.2396728378657694e-05, "loss": 0.7038, "step": 41200 }, { "epoch": 0.46, "learning_rate": 4.239580565151918e-05, "loss": 0.768, "step": 41205 }, { "epoch": 0.46, "learning_rate": 4.239488292438067e-05, "loss": 0.7112, "step": 41210 }, { "epoch": 0.46, "learning_rate": 4.239396019724216e-05, "loss": 0.7218, "step": 41215 }, { "epoch": 0.46, "learning_rate": 4.239303747010364e-05, "loss": 0.7703, "step": 41220 }, { "epoch": 0.46, "learning_rate": 4.2392114742965126e-05, "loss": 0.6962, "step": 41225 }, { "epoch": 0.46, "learning_rate": 4.239119201582662e-05, "loss": 0.7391, "step": 41230 }, { "epoch": 0.46, "learning_rate": 4.239026928868811e-05, "loss": 0.7602, "step": 41235 }, { "epoch": 0.46, "learning_rate": 4.238934656154959e-05, "loss": 0.7837, "step": 41240 }, { "epoch": 0.46, "learning_rate": 4.238842383441108e-05, "loss": 0.6964, "step": 41245 }, { "epoch": 0.46, "learning_rate": 4.238750110727257e-05, "loss": 0.7467, "step": 41250 }, { "epoch": 0.46, "learning_rate": 4.238657838013406e-05, "loss": 0.78, "step": 41255 }, { "epoch": 0.46, "learning_rate": 4.238565565299554e-05, "loss": 0.7957, "step": 41260 }, { "epoch": 0.46, "learning_rate": 4.238473292585703e-05, "loss": 0.6567, "step": 41265 }, { "epoch": 0.46, "learning_rate": 4.2383810198718523e-05, "loss": 0.7118, "step": 41270 }, { "epoch": 0.46, "learning_rate": 4.2382887471580004e-05, "loss": 0.7258, "step": 41275 }, { "epoch": 0.46, "learning_rate": 4.238196474444149e-05, "loss": 0.7256, "step": 41280 }, { "epoch": 0.46, "learning_rate": 4.238104201730298e-05, "loss": 0.6961, "step": 41285 }, { "epoch": 0.46, "learning_rate": 4.2380119290164475e-05, "loss": 0.7657, "step": 41290 }, { "epoch": 0.46, "learning_rate": 4.2379196563025956e-05, "loss": 0.8075, "step": 41295 }, { "epoch": 0.46, "learning_rate": 4.2378273835887444e-05, "loss": 0.7331, "step": 41300 }, { "epoch": 0.46, "learning_rate": 4.237735110874893e-05, "loss": 0.7089, "step": 41305 }, { "epoch": 0.46, "learning_rate": 4.237642838161042e-05, "loss": 0.7449, "step": 41310 }, { "epoch": 0.46, "learning_rate": 4.237550565447191e-05, "loss": 0.8362, "step": 41315 }, { "epoch": 0.46, "learning_rate": 4.2374582927333395e-05, "loss": 0.7412, "step": 41320 }, { "epoch": 0.46, "learning_rate": 4.237366020019488e-05, "loss": 0.7388, "step": 41325 }, { "epoch": 0.46, "learning_rate": 4.237273747305637e-05, "loss": 0.7644, "step": 41330 }, { "epoch": 0.46, "learning_rate": 4.237181474591786e-05, "loss": 0.8129, "step": 41335 }, { "epoch": 0.46, "learning_rate": 4.2370892018779347e-05, "loss": 0.7964, "step": 41340 }, { "epoch": 0.46, "learning_rate": 4.2369969291640834e-05, "loss": 0.6718, "step": 41345 }, { "epoch": 0.46, "learning_rate": 4.2369046564502315e-05, "loss": 0.8021, "step": 41350 }, { "epoch": 0.46, "learning_rate": 4.236812383736381e-05, "loss": 0.7593, "step": 41355 }, { "epoch": 0.46, "learning_rate": 4.23672011102253e-05, "loss": 0.7807, "step": 41360 }, { "epoch": 0.46, "learning_rate": 4.2366278383086786e-05, "loss": 0.7119, "step": 41365 }, { "epoch": 0.46, "learning_rate": 4.236535565594827e-05, "loss": 0.7057, "step": 41370 }, { "epoch": 0.46, "learning_rate": 4.2364432928809755e-05, "loss": 0.7512, "step": 41375 }, { "epoch": 0.46, "learning_rate": 4.236351020167125e-05, "loss": 0.7597, "step": 41380 }, { "epoch": 0.46, "learning_rate": 4.236258747453273e-05, "loss": 0.7263, "step": 41385 }, { "epoch": 0.46, "learning_rate": 4.236166474739422e-05, "loss": 0.7149, "step": 41390 }, { "epoch": 0.46, "learning_rate": 4.2360742020255706e-05, "loss": 0.7746, "step": 41395 }, { "epoch": 0.46, "learning_rate": 4.23598192931172e-05, "loss": 0.7648, "step": 41400 }, { "epoch": 0.46, "learning_rate": 4.235889656597868e-05, "loss": 0.7326, "step": 41405 }, { "epoch": 0.46, "learning_rate": 4.235797383884017e-05, "loss": 0.7645, "step": 41410 }, { "epoch": 0.46, "learning_rate": 4.235705111170166e-05, "loss": 0.7109, "step": 41415 }, { "epoch": 0.46, "learning_rate": 4.2356128384563145e-05, "loss": 0.7526, "step": 41420 }, { "epoch": 0.46, "learning_rate": 4.235520565742463e-05, "loss": 0.6701, "step": 41425 }, { "epoch": 0.46, "learning_rate": 4.235428293028612e-05, "loss": 0.8089, "step": 41430 }, { "epoch": 0.46, "learning_rate": 4.235336020314761e-05, "loss": 0.7567, "step": 41435 }, { "epoch": 0.46, "learning_rate": 4.23524374760091e-05, "loss": 0.7898, "step": 41440 }, { "epoch": 0.46, "learning_rate": 4.2351514748870585e-05, "loss": 0.7904, "step": 41445 }, { "epoch": 0.46, "learning_rate": 4.235059202173207e-05, "loss": 0.7753, "step": 41450 }, { "epoch": 0.46, "learning_rate": 4.234966929459356e-05, "loss": 0.7977, "step": 41455 }, { "epoch": 0.46, "learning_rate": 4.234874656745504e-05, "loss": 0.6778, "step": 41460 }, { "epoch": 0.46, "learning_rate": 4.2347823840316536e-05, "loss": 0.7568, "step": 41465 }, { "epoch": 0.46, "learning_rate": 4.2346901113178024e-05, "loss": 0.7631, "step": 41470 }, { "epoch": 0.46, "learning_rate": 4.234597838603951e-05, "loss": 0.711, "step": 41475 }, { "epoch": 0.46, "learning_rate": 4.234505565890099e-05, "loss": 0.8182, "step": 41480 }, { "epoch": 0.46, "learning_rate": 4.234413293176249e-05, "loss": 0.864, "step": 41485 }, { "epoch": 0.46, "learning_rate": 4.2343210204623975e-05, "loss": 0.7501, "step": 41490 }, { "epoch": 0.46, "learning_rate": 4.2342287477485456e-05, "loss": 0.787, "step": 41495 }, { "epoch": 0.46, "learning_rate": 4.2341364750346944e-05, "loss": 0.7415, "step": 41500 }, { "epoch": 0.46, "learning_rate": 4.234044202320844e-05, "loss": 0.8155, "step": 41505 }, { "epoch": 0.46, "learning_rate": 4.2339519296069927e-05, "loss": 0.7337, "step": 41510 }, { "epoch": 0.46, "learning_rate": 4.233859656893141e-05, "loss": 0.7637, "step": 41515 }, { "epoch": 0.46, "learning_rate": 4.2337673841792896e-05, "loss": 0.75, "step": 41520 }, { "epoch": 0.46, "learning_rate": 4.233675111465438e-05, "loss": 0.7139, "step": 41525 }, { "epoch": 0.46, "learning_rate": 4.233582838751587e-05, "loss": 0.8153, "step": 41530 }, { "epoch": 0.46, "learning_rate": 4.233490566037736e-05, "loss": 0.711, "step": 41535 }, { "epoch": 0.46, "learning_rate": 4.233398293323885e-05, "loss": 0.8141, "step": 41540 }, { "epoch": 0.46, "learning_rate": 4.2333060206100335e-05, "loss": 0.7201, "step": 41545 }, { "epoch": 0.46, "learning_rate": 4.233213747896182e-05, "loss": 0.6645, "step": 41550 }, { "epoch": 0.46, "learning_rate": 4.233121475182331e-05, "loss": 0.7354, "step": 41555 }, { "epoch": 0.46, "learning_rate": 4.23302920246848e-05, "loss": 0.7205, "step": 41560 }, { "epoch": 0.46, "learning_rate": 4.2329369297546286e-05, "loss": 0.7317, "step": 41565 }, { "epoch": 0.46, "learning_rate": 4.2328446570407774e-05, "loss": 0.7437, "step": 41570 }, { "epoch": 0.46, "learning_rate": 4.232752384326926e-05, "loss": 0.7424, "step": 41575 }, { "epoch": 0.46, "learning_rate": 4.232660111613075e-05, "loss": 0.7632, "step": 41580 }, { "epoch": 0.46, "learning_rate": 4.232567838899224e-05, "loss": 0.7519, "step": 41585 }, { "epoch": 0.46, "learning_rate": 4.2324755661853725e-05, "loss": 0.7388, "step": 41590 }, { "epoch": 0.46, "learning_rate": 4.232383293471521e-05, "loss": 0.7137, "step": 41595 }, { "epoch": 0.46, "learning_rate": 4.23229102075767e-05, "loss": 0.7597, "step": 41600 }, { "epoch": 0.46, "learning_rate": 4.232198748043818e-05, "loss": 0.7967, "step": 41605 }, { "epoch": 0.46, "learning_rate": 4.232106475329967e-05, "loss": 0.734, "step": 41610 }, { "epoch": 0.46, "learning_rate": 4.2320142026161165e-05, "loss": 0.7742, "step": 41615 }, { "epoch": 0.46, "learning_rate": 4.231921929902265e-05, "loss": 0.797, "step": 41620 }, { "epoch": 0.46, "learning_rate": 4.2318296571884134e-05, "loss": 0.8109, "step": 41625 }, { "epoch": 0.46, "learning_rate": 4.231737384474562e-05, "loss": 0.7738, "step": 41630 }, { "epoch": 0.46, "learning_rate": 4.2316451117607116e-05, "loss": 0.6927, "step": 41635 }, { "epoch": 0.46, "learning_rate": 4.2315528390468604e-05, "loss": 0.8147, "step": 41640 }, { "epoch": 0.46, "learning_rate": 4.2314605663330085e-05, "loss": 0.7957, "step": 41645 }, { "epoch": 0.46, "learning_rate": 4.231368293619157e-05, "loss": 0.7346, "step": 41650 }, { "epoch": 0.46, "learning_rate": 4.231276020905307e-05, "loss": 0.7989, "step": 41655 }, { "epoch": 0.46, "learning_rate": 4.231183748191455e-05, "loss": 0.7672, "step": 41660 }, { "epoch": 0.46, "learning_rate": 4.2310914754776036e-05, "loss": 0.76, "step": 41665 }, { "epoch": 0.46, "learning_rate": 4.2309992027637524e-05, "loss": 0.8045, "step": 41670 }, { "epoch": 0.46, "learning_rate": 4.230906930049901e-05, "loss": 0.7581, "step": 41675 }, { "epoch": 0.46, "learning_rate": 4.23081465733605e-05, "loss": 0.77, "step": 41680 }, { "epoch": 0.46, "learning_rate": 4.230722384622199e-05, "loss": 0.7212, "step": 41685 }, { "epoch": 0.46, "learning_rate": 4.2306301119083476e-05, "loss": 0.7726, "step": 41690 }, { "epoch": 0.46, "learning_rate": 4.2305378391944963e-05, "loss": 0.7831, "step": 41695 }, { "epoch": 0.46, "learning_rate": 4.230445566480645e-05, "loss": 0.7862, "step": 41700 }, { "epoch": 0.46, "learning_rate": 4.230353293766794e-05, "loss": 0.7504, "step": 41705 }, { "epoch": 0.46, "learning_rate": 4.230261021052943e-05, "loss": 0.7134, "step": 41710 }, { "epoch": 0.46, "learning_rate": 4.2301687483390915e-05, "loss": 0.7091, "step": 41715 }, { "epoch": 0.46, "learning_rate": 4.23007647562524e-05, "loss": 0.7642, "step": 41720 }, { "epoch": 0.46, "learning_rate": 4.229984202911389e-05, "loss": 0.7845, "step": 41725 }, { "epoch": 0.46, "learning_rate": 4.229891930197538e-05, "loss": 0.7392, "step": 41730 }, { "epoch": 0.46, "learning_rate": 4.229799657483686e-05, "loss": 0.7866, "step": 41735 }, { "epoch": 0.46, "learning_rate": 4.2297073847698354e-05, "loss": 0.7222, "step": 41740 }, { "epoch": 0.46, "learning_rate": 4.229615112055984e-05, "loss": 0.7297, "step": 41745 }, { "epoch": 0.46, "learning_rate": 4.229522839342133e-05, "loss": 0.7263, "step": 41750 }, { "epoch": 0.46, "learning_rate": 4.229430566628281e-05, "loss": 0.7009, "step": 41755 }, { "epoch": 0.46, "learning_rate": 4.22933829391443e-05, "loss": 0.7904, "step": 41760 }, { "epoch": 0.46, "learning_rate": 4.229246021200579e-05, "loss": 0.7923, "step": 41765 }, { "epoch": 0.46, "learning_rate": 4.2291537484867274e-05, "loss": 0.6422, "step": 41770 }, { "epoch": 0.46, "learning_rate": 4.229061475772876e-05, "loss": 0.7968, "step": 41775 }, { "epoch": 0.46, "learning_rate": 4.228969203059025e-05, "loss": 0.7669, "step": 41780 }, { "epoch": 0.46, "learning_rate": 4.2288769303451745e-05, "loss": 0.7571, "step": 41785 }, { "epoch": 0.46, "learning_rate": 4.2287846576313226e-05, "loss": 0.8002, "step": 41790 }, { "epoch": 0.46, "learning_rate": 4.2286923849174714e-05, "loss": 0.7214, "step": 41795 }, { "epoch": 0.46, "learning_rate": 4.22860011220362e-05, "loss": 0.8052, "step": 41800 }, { "epoch": 0.46, "learning_rate": 4.228507839489769e-05, "loss": 0.7939, "step": 41805 }, { "epoch": 0.46, "learning_rate": 4.228415566775918e-05, "loss": 0.7893, "step": 41810 }, { "epoch": 0.46, "learning_rate": 4.2283232940620665e-05, "loss": 0.7166, "step": 41815 }, { "epoch": 0.46, "learning_rate": 4.228231021348215e-05, "loss": 0.772, "step": 41820 }, { "epoch": 0.46, "learning_rate": 4.228138748634364e-05, "loss": 0.7478, "step": 41825 }, { "epoch": 0.46, "learning_rate": 4.228046475920513e-05, "loss": 0.7429, "step": 41830 }, { "epoch": 0.46, "learning_rate": 4.2279542032066616e-05, "loss": 0.756, "step": 41835 }, { "epoch": 0.46, "learning_rate": 4.2278619304928104e-05, "loss": 0.7155, "step": 41840 }, { "epoch": 0.46, "learning_rate": 4.2277696577789585e-05, "loss": 0.7381, "step": 41845 }, { "epoch": 0.46, "learning_rate": 4.227677385065108e-05, "loss": 0.6796, "step": 41850 }, { "epoch": 0.46, "learning_rate": 4.227585112351257e-05, "loss": 0.7731, "step": 41855 }, { "epoch": 0.46, "learning_rate": 4.2274928396374056e-05, "loss": 0.7961, "step": 41860 }, { "epoch": 0.46, "learning_rate": 4.227400566923554e-05, "loss": 0.6936, "step": 41865 }, { "epoch": 0.46, "learning_rate": 4.227308294209703e-05, "loss": 0.7787, "step": 41870 }, { "epoch": 0.46, "learning_rate": 4.227216021495852e-05, "loss": 0.7975, "step": 41875 }, { "epoch": 0.46, "learning_rate": 4.227123748782e-05, "loss": 0.781, "step": 41880 }, { "epoch": 0.46, "learning_rate": 4.227031476068149e-05, "loss": 0.7421, "step": 41885 }, { "epoch": 0.46, "learning_rate": 4.226939203354298e-05, "loss": 0.7137, "step": 41890 }, { "epoch": 0.46, "learning_rate": 4.226846930640447e-05, "loss": 0.8129, "step": 41895 }, { "epoch": 0.46, "learning_rate": 4.226754657926595e-05, "loss": 0.7621, "step": 41900 }, { "epoch": 0.46, "learning_rate": 4.226662385212744e-05, "loss": 0.7177, "step": 41905 }, { "epoch": 0.46, "learning_rate": 4.226570112498893e-05, "loss": 0.7743, "step": 41910 }, { "epoch": 0.46, "learning_rate": 4.2264778397850415e-05, "loss": 0.8023, "step": 41915 }, { "epoch": 0.46, "learning_rate": 4.22638556707119e-05, "loss": 0.7244, "step": 41920 }, { "epoch": 0.46, "learning_rate": 4.226293294357339e-05, "loss": 0.7841, "step": 41925 }, { "epoch": 0.46, "learning_rate": 4.226201021643488e-05, "loss": 0.7477, "step": 41930 }, { "epoch": 0.46, "learning_rate": 4.226108748929637e-05, "loss": 0.7803, "step": 41935 }, { "epoch": 0.46, "learning_rate": 4.2260164762157854e-05, "loss": 0.7124, "step": 41940 }, { "epoch": 0.46, "learning_rate": 4.225924203501934e-05, "loss": 0.8647, "step": 41945 }, { "epoch": 0.46, "learning_rate": 4.225831930788083e-05, "loss": 0.798, "step": 41950 }, { "epoch": 0.46, "learning_rate": 4.225739658074232e-05, "loss": 0.7589, "step": 41955 }, { "epoch": 0.46, "learning_rate": 4.2256473853603806e-05, "loss": 0.7521, "step": 41960 }, { "epoch": 0.46, "learning_rate": 4.2255551126465294e-05, "loss": 0.7586, "step": 41965 }, { "epoch": 0.46, "learning_rate": 4.225462839932678e-05, "loss": 0.6758, "step": 41970 }, { "epoch": 0.46, "learning_rate": 4.225370567218826e-05, "loss": 0.7199, "step": 41975 }, { "epoch": 0.46, "learning_rate": 4.225278294504976e-05, "loss": 0.7242, "step": 41980 }, { "epoch": 0.46, "learning_rate": 4.2251860217911245e-05, "loss": 0.7711, "step": 41985 }, { "epoch": 0.46, "learning_rate": 4.2250937490772726e-05, "loss": 0.7588, "step": 41990 }, { "epoch": 0.46, "learning_rate": 4.2250014763634214e-05, "loss": 0.7886, "step": 41995 }, { "epoch": 0.47, "learning_rate": 4.224909203649571e-05, "loss": 0.758, "step": 42000 }, { "epoch": 0.47, "eval_loss": 0.6948019862174988, "eval_runtime": 69.2122, "eval_samples_per_second": 28.897, "eval_steps_per_second": 14.448, "step": 42000 }, { "epoch": 0.47, "learning_rate": 4.2248169309357197e-05, "loss": 0.7686, "step": 42005 }, { "epoch": 0.47, "learning_rate": 4.224724658221868e-05, "loss": 0.7739, "step": 42010 }, { "epoch": 0.47, "learning_rate": 4.2246323855080165e-05, "loss": 0.7432, "step": 42015 }, { "epoch": 0.47, "learning_rate": 4.224540112794166e-05, "loss": 0.8232, "step": 42020 }, { "epoch": 0.47, "learning_rate": 4.224447840080315e-05, "loss": 0.7377, "step": 42025 }, { "epoch": 0.47, "learning_rate": 4.224355567366463e-05, "loss": 0.7406, "step": 42030 }, { "epoch": 0.47, "learning_rate": 4.224263294652612e-05, "loss": 0.7481, "step": 42035 }, { "epoch": 0.47, "learning_rate": 4.224171021938761e-05, "loss": 0.7372, "step": 42040 }, { "epoch": 0.47, "learning_rate": 4.224078749224909e-05, "loss": 0.7521, "step": 42045 }, { "epoch": 0.47, "learning_rate": 4.223986476511058e-05, "loss": 0.7408, "step": 42050 }, { "epoch": 0.47, "learning_rate": 4.223894203797207e-05, "loss": 0.833, "step": 42055 }, { "epoch": 0.47, "learning_rate": 4.2238019310833556e-05, "loss": 0.7658, "step": 42060 }, { "epoch": 0.47, "learning_rate": 4.2237096583695044e-05, "loss": 0.8122, "step": 42065 }, { "epoch": 0.47, "learning_rate": 4.223617385655653e-05, "loss": 0.7221, "step": 42070 }, { "epoch": 0.47, "learning_rate": 4.223525112941802e-05, "loss": 0.751, "step": 42075 }, { "epoch": 0.47, "learning_rate": 4.223432840227951e-05, "loss": 0.7415, "step": 42080 }, { "epoch": 0.47, "learning_rate": 4.2233405675140995e-05, "loss": 0.7661, "step": 42085 }, { "epoch": 0.47, "learning_rate": 4.223248294800248e-05, "loss": 0.8405, "step": 42090 }, { "epoch": 0.47, "learning_rate": 4.223156022086397e-05, "loss": 0.8473, "step": 42095 }, { "epoch": 0.47, "learning_rate": 4.223063749372546e-05, "loss": 0.7247, "step": 42100 }, { "epoch": 0.47, "learning_rate": 4.222971476658695e-05, "loss": 0.7784, "step": 42105 }, { "epoch": 0.47, "learning_rate": 4.2228792039448435e-05, "loss": 0.7099, "step": 42110 }, { "epoch": 0.47, "learning_rate": 4.222786931230992e-05, "loss": 0.7718, "step": 42115 }, { "epoch": 0.47, "learning_rate": 4.2226946585171403e-05, "loss": 0.699, "step": 42120 }, { "epoch": 0.47, "learning_rate": 4.22260238580329e-05, "loss": 0.817, "step": 42125 }, { "epoch": 0.47, "learning_rate": 4.2225101130894386e-05, "loss": 0.8129, "step": 42130 }, { "epoch": 0.47, "learning_rate": 4.2224178403755874e-05, "loss": 0.81, "step": 42135 }, { "epoch": 0.47, "learning_rate": 4.2223255676617355e-05, "loss": 0.7481, "step": 42140 }, { "epoch": 0.47, "learning_rate": 4.222233294947884e-05, "loss": 0.7176, "step": 42145 }, { "epoch": 0.47, "learning_rate": 4.222141022234034e-05, "loss": 0.8499, "step": 42150 }, { "epoch": 0.47, "learning_rate": 4.222048749520182e-05, "loss": 0.772, "step": 42155 }, { "epoch": 0.47, "learning_rate": 4.2219564768063306e-05, "loss": 0.7798, "step": 42160 }, { "epoch": 0.47, "learning_rate": 4.2218642040924794e-05, "loss": 0.771, "step": 42165 }, { "epoch": 0.47, "learning_rate": 4.221771931378629e-05, "loss": 0.6958, "step": 42170 }, { "epoch": 0.47, "learning_rate": 4.221679658664777e-05, "loss": 0.8021, "step": 42175 }, { "epoch": 0.47, "learning_rate": 4.221587385950926e-05, "loss": 0.8037, "step": 42180 }, { "epoch": 0.47, "learning_rate": 4.2214951132370746e-05, "loss": 0.7603, "step": 42185 }, { "epoch": 0.47, "learning_rate": 4.221402840523223e-05, "loss": 0.7427, "step": 42190 }, { "epoch": 0.47, "learning_rate": 4.221310567809372e-05, "loss": 0.7333, "step": 42195 }, { "epoch": 0.47, "learning_rate": 4.221218295095521e-05, "loss": 0.7679, "step": 42200 }, { "epoch": 0.47, "learning_rate": 4.22112602238167e-05, "loss": 0.7498, "step": 42205 }, { "epoch": 0.47, "learning_rate": 4.2210337496678185e-05, "loss": 0.7355, "step": 42210 }, { "epoch": 0.47, "learning_rate": 4.220941476953967e-05, "loss": 0.7353, "step": 42215 }, { "epoch": 0.47, "learning_rate": 4.220849204240116e-05, "loss": 0.7071, "step": 42220 }, { "epoch": 0.47, "learning_rate": 4.220756931526265e-05, "loss": 0.7434, "step": 42225 }, { "epoch": 0.47, "learning_rate": 4.220664658812413e-05, "loss": 0.7699, "step": 42230 }, { "epoch": 0.47, "learning_rate": 4.2205723860985624e-05, "loss": 0.7234, "step": 42235 }, { "epoch": 0.47, "learning_rate": 4.220480113384711e-05, "loss": 0.7035, "step": 42240 }, { "epoch": 0.47, "learning_rate": 4.22038784067086e-05, "loss": 0.7164, "step": 42245 }, { "epoch": 0.47, "learning_rate": 4.220295567957008e-05, "loss": 0.7804, "step": 42250 }, { "epoch": 0.47, "learning_rate": 4.2202032952431575e-05, "loss": 0.757, "step": 42255 }, { "epoch": 0.47, "learning_rate": 4.220111022529306e-05, "loss": 0.7081, "step": 42260 }, { "epoch": 0.47, "learning_rate": 4.2200187498154544e-05, "loss": 0.7809, "step": 42265 }, { "epoch": 0.47, "learning_rate": 4.219926477101603e-05, "loss": 0.7867, "step": 42270 }, { "epoch": 0.47, "learning_rate": 4.219834204387753e-05, "loss": 0.7009, "step": 42275 }, { "epoch": 0.47, "learning_rate": 4.2197419316739015e-05, "loss": 0.7878, "step": 42280 }, { "epoch": 0.47, "learning_rate": 4.2196496589600496e-05, "loss": 0.8655, "step": 42285 }, { "epoch": 0.47, "learning_rate": 4.2195573862461984e-05, "loss": 0.7742, "step": 42290 }, { "epoch": 0.47, "learning_rate": 4.219465113532347e-05, "loss": 0.7348, "step": 42295 }, { "epoch": 0.47, "learning_rate": 4.219372840818496e-05, "loss": 0.7277, "step": 42300 }, { "epoch": 0.47, "learning_rate": 4.219280568104645e-05, "loss": 0.7756, "step": 42305 }, { "epoch": 0.47, "learning_rate": 4.2191882953907935e-05, "loss": 0.768, "step": 42310 }, { "epoch": 0.47, "learning_rate": 4.219096022676942e-05, "loss": 0.7497, "step": 42315 }, { "epoch": 0.47, "learning_rate": 4.219003749963091e-05, "loss": 0.7613, "step": 42320 }, { "epoch": 0.47, "learning_rate": 4.21891147724924e-05, "loss": 0.7991, "step": 42325 }, { "epoch": 0.47, "learning_rate": 4.2188192045353886e-05, "loss": 0.753, "step": 42330 }, { "epoch": 0.47, "learning_rate": 4.2187269318215374e-05, "loss": 0.6981, "step": 42335 }, { "epoch": 0.47, "learning_rate": 4.218634659107686e-05, "loss": 0.7219, "step": 42340 }, { "epoch": 0.47, "learning_rate": 4.218542386393835e-05, "loss": 0.7662, "step": 42345 }, { "epoch": 0.47, "learning_rate": 4.218450113679984e-05, "loss": 0.7589, "step": 42350 }, { "epoch": 0.47, "learning_rate": 4.2183578409661326e-05, "loss": 0.7582, "step": 42355 }, { "epoch": 0.47, "learning_rate": 4.218265568252281e-05, "loss": 0.7578, "step": 42360 }, { "epoch": 0.47, "learning_rate": 4.21817329553843e-05, "loss": 0.7556, "step": 42365 }, { "epoch": 0.47, "learning_rate": 4.218081022824579e-05, "loss": 0.7971, "step": 42370 }, { "epoch": 0.47, "learning_rate": 4.217988750110728e-05, "loss": 0.7142, "step": 42375 }, { "epoch": 0.47, "learning_rate": 4.217896477396876e-05, "loss": 0.7706, "step": 42380 }, { "epoch": 0.47, "learning_rate": 4.217804204683025e-05, "loss": 0.755, "step": 42385 }, { "epoch": 0.47, "learning_rate": 4.217711931969174e-05, "loss": 0.7414, "step": 42390 }, { "epoch": 0.47, "learning_rate": 4.217619659255322e-05, "loss": 0.7393, "step": 42395 }, { "epoch": 0.47, "learning_rate": 4.217527386541471e-05, "loss": 0.743, "step": 42400 }, { "epoch": 0.47, "learning_rate": 4.2174351138276204e-05, "loss": 0.7249, "step": 42405 }, { "epoch": 0.47, "learning_rate": 4.217342841113769e-05, "loss": 0.8043, "step": 42410 }, { "epoch": 0.47, "learning_rate": 4.217250568399917e-05, "loss": 0.7328, "step": 42415 }, { "epoch": 0.47, "learning_rate": 4.217158295686066e-05, "loss": 0.7818, "step": 42420 }, { "epoch": 0.47, "learning_rate": 4.2170660229722155e-05, "loss": 0.7537, "step": 42425 }, { "epoch": 0.47, "learning_rate": 4.2169737502583637e-05, "loss": 0.7517, "step": 42430 }, { "epoch": 0.47, "learning_rate": 4.2168814775445124e-05, "loss": 0.7834, "step": 42435 }, { "epoch": 0.47, "learning_rate": 4.216789204830661e-05, "loss": 0.7172, "step": 42440 }, { "epoch": 0.47, "learning_rate": 4.21669693211681e-05, "loss": 0.7313, "step": 42445 }, { "epoch": 0.47, "learning_rate": 4.216604659402959e-05, "loss": 0.7591, "step": 42450 }, { "epoch": 0.47, "learning_rate": 4.2165123866891076e-05, "loss": 0.7937, "step": 42455 }, { "epoch": 0.47, "learning_rate": 4.2164201139752564e-05, "loss": 0.7177, "step": 42460 }, { "epoch": 0.47, "learning_rate": 4.216327841261405e-05, "loss": 0.7481, "step": 42465 }, { "epoch": 0.47, "learning_rate": 4.216235568547554e-05, "loss": 0.693, "step": 42470 }, { "epoch": 0.47, "learning_rate": 4.216143295833703e-05, "loss": 0.7747, "step": 42475 }, { "epoch": 0.47, "learning_rate": 4.2160510231198515e-05, "loss": 0.7548, "step": 42480 }, { "epoch": 0.47, "learning_rate": 4.215958750406e-05, "loss": 0.7537, "step": 42485 }, { "epoch": 0.47, "learning_rate": 4.215866477692149e-05, "loss": 0.7461, "step": 42490 }, { "epoch": 0.47, "learning_rate": 4.215774204978298e-05, "loss": 0.7337, "step": 42495 }, { "epoch": 0.47, "learning_rate": 4.2156819322644466e-05, "loss": 0.7511, "step": 42500 }, { "epoch": 0.47, "learning_rate": 4.215589659550595e-05, "loss": 0.7384, "step": 42505 }, { "epoch": 0.47, "learning_rate": 4.2154973868367435e-05, "loss": 0.7913, "step": 42510 }, { "epoch": 0.47, "learning_rate": 4.215405114122893e-05, "loss": 0.7951, "step": 42515 }, { "epoch": 0.47, "learning_rate": 4.215312841409042e-05, "loss": 0.7243, "step": 42520 }, { "epoch": 0.47, "learning_rate": 4.21522056869519e-05, "loss": 0.7014, "step": 42525 }, { "epoch": 0.47, "learning_rate": 4.215128295981339e-05, "loss": 0.723, "step": 42530 }, { "epoch": 0.47, "learning_rate": 4.215036023267488e-05, "loss": 0.7195, "step": 42535 }, { "epoch": 0.47, "learning_rate": 4.214943750553636e-05, "loss": 0.7818, "step": 42540 }, { "epoch": 0.47, "learning_rate": 4.214851477839785e-05, "loss": 0.7567, "step": 42545 }, { "epoch": 0.47, "learning_rate": 4.214759205125934e-05, "loss": 0.7437, "step": 42550 }, { "epoch": 0.47, "learning_rate": 4.214666932412083e-05, "loss": 0.6798, "step": 42555 }, { "epoch": 0.47, "learning_rate": 4.2145746596982314e-05, "loss": 0.7378, "step": 42560 }, { "epoch": 0.47, "learning_rate": 4.21448238698438e-05, "loss": 0.7753, "step": 42565 }, { "epoch": 0.47, "learning_rate": 4.214390114270529e-05, "loss": 0.7096, "step": 42570 }, { "epoch": 0.47, "learning_rate": 4.214297841556678e-05, "loss": 0.7253, "step": 42575 }, { "epoch": 0.47, "learning_rate": 4.2142055688428265e-05, "loss": 0.7436, "step": 42580 }, { "epoch": 0.47, "learning_rate": 4.214113296128975e-05, "loss": 0.7489, "step": 42585 }, { "epoch": 0.47, "learning_rate": 4.214021023415124e-05, "loss": 0.7706, "step": 42590 }, { "epoch": 0.47, "learning_rate": 4.213928750701273e-05, "loss": 0.7817, "step": 42595 }, { "epoch": 0.47, "learning_rate": 4.213836477987422e-05, "loss": 0.757, "step": 42600 }, { "epoch": 0.47, "learning_rate": 4.2137442052735704e-05, "loss": 0.8204, "step": 42605 }, { "epoch": 0.47, "learning_rate": 4.213651932559719e-05, "loss": 0.759, "step": 42610 }, { "epoch": 0.47, "learning_rate": 4.2135596598458673e-05, "loss": 0.7463, "step": 42615 }, { "epoch": 0.47, "learning_rate": 4.213467387132017e-05, "loss": 0.7079, "step": 42620 }, { "epoch": 0.47, "learning_rate": 4.2133751144181656e-05, "loss": 0.7626, "step": 42625 }, { "epoch": 0.47, "learning_rate": 4.2132828417043144e-05, "loss": 0.7473, "step": 42630 }, { "epoch": 0.47, "learning_rate": 4.2131905689904625e-05, "loss": 0.7884, "step": 42635 }, { "epoch": 0.47, "learning_rate": 4.213098296276612e-05, "loss": 0.743, "step": 42640 }, { "epoch": 0.47, "learning_rate": 4.213006023562761e-05, "loss": 0.771, "step": 42645 }, { "epoch": 0.47, "learning_rate": 4.212913750848909e-05, "loss": 0.7462, "step": 42650 }, { "epoch": 0.47, "learning_rate": 4.2128214781350576e-05, "loss": 0.847, "step": 42655 }, { "epoch": 0.47, "learning_rate": 4.2127292054212064e-05, "loss": 0.7594, "step": 42660 }, { "epoch": 0.47, "learning_rate": 4.212636932707356e-05, "loss": 0.7439, "step": 42665 }, { "epoch": 0.47, "learning_rate": 4.212544659993504e-05, "loss": 0.7382, "step": 42670 }, { "epoch": 0.47, "learning_rate": 4.212452387279653e-05, "loss": 0.7337, "step": 42675 }, { "epoch": 0.47, "learning_rate": 4.2123601145658015e-05, "loss": 0.7566, "step": 42680 }, { "epoch": 0.47, "learning_rate": 4.212267841851951e-05, "loss": 0.8504, "step": 42685 }, { "epoch": 0.47, "learning_rate": 4.212175569138099e-05, "loss": 0.7155, "step": 42690 }, { "epoch": 0.47, "learning_rate": 4.212083296424248e-05, "loss": 0.7698, "step": 42695 }, { "epoch": 0.47, "learning_rate": 4.211991023710397e-05, "loss": 0.7438, "step": 42700 }, { "epoch": 0.47, "learning_rate": 4.2118987509965455e-05, "loss": 0.7455, "step": 42705 }, { "epoch": 0.47, "learning_rate": 4.211806478282694e-05, "loss": 0.7588, "step": 42710 }, { "epoch": 0.47, "learning_rate": 4.211714205568843e-05, "loss": 0.8031, "step": 42715 }, { "epoch": 0.47, "learning_rate": 4.211621932854992e-05, "loss": 0.7195, "step": 42720 }, { "epoch": 0.47, "learning_rate": 4.2115296601411406e-05, "loss": 0.6946, "step": 42725 }, { "epoch": 0.47, "learning_rate": 4.2114373874272894e-05, "loss": 0.7984, "step": 42730 }, { "epoch": 0.47, "learning_rate": 4.211345114713438e-05, "loss": 0.7866, "step": 42735 }, { "epoch": 0.47, "learning_rate": 4.211252841999587e-05, "loss": 0.769, "step": 42740 }, { "epoch": 0.47, "learning_rate": 4.211160569285735e-05, "loss": 0.7304, "step": 42745 }, { "epoch": 0.47, "learning_rate": 4.2110682965718845e-05, "loss": 0.7065, "step": 42750 }, { "epoch": 0.47, "learning_rate": 4.210976023858033e-05, "loss": 0.749, "step": 42755 }, { "epoch": 0.47, "learning_rate": 4.210883751144182e-05, "loss": 0.7943, "step": 42760 }, { "epoch": 0.47, "learning_rate": 4.21079147843033e-05, "loss": 0.746, "step": 42765 }, { "epoch": 0.47, "learning_rate": 4.21069920571648e-05, "loss": 0.7175, "step": 42770 }, { "epoch": 0.47, "learning_rate": 4.2106069330026285e-05, "loss": 0.8405, "step": 42775 }, { "epoch": 0.47, "learning_rate": 4.2105146602887766e-05, "loss": 0.7638, "step": 42780 }, { "epoch": 0.47, "learning_rate": 4.2104223875749253e-05, "loss": 0.7227, "step": 42785 }, { "epoch": 0.47, "learning_rate": 4.210330114861075e-05, "loss": 0.7298, "step": 42790 }, { "epoch": 0.47, "learning_rate": 4.2102378421472236e-05, "loss": 0.707, "step": 42795 }, { "epoch": 0.47, "learning_rate": 4.210145569433372e-05, "loss": 0.7285, "step": 42800 }, { "epoch": 0.47, "learning_rate": 4.2100532967195205e-05, "loss": 0.7674, "step": 42805 }, { "epoch": 0.47, "learning_rate": 4.209961024005669e-05, "loss": 0.732, "step": 42810 }, { "epoch": 0.47, "learning_rate": 4.209868751291818e-05, "loss": 0.7292, "step": 42815 }, { "epoch": 0.47, "learning_rate": 4.209776478577967e-05, "loss": 0.7507, "step": 42820 }, { "epoch": 0.47, "learning_rate": 4.2096842058641156e-05, "loss": 0.7296, "step": 42825 }, { "epoch": 0.47, "learning_rate": 4.2095919331502644e-05, "loss": 0.7446, "step": 42830 }, { "epoch": 0.47, "learning_rate": 4.209499660436413e-05, "loss": 0.7332, "step": 42835 }, { "epoch": 0.47, "learning_rate": 4.209407387722562e-05, "loss": 0.6839, "step": 42840 }, { "epoch": 0.47, "learning_rate": 4.209315115008711e-05, "loss": 0.7941, "step": 42845 }, { "epoch": 0.47, "learning_rate": 4.2092228422948596e-05, "loss": 0.751, "step": 42850 }, { "epoch": 0.47, "learning_rate": 4.209130569581008e-05, "loss": 0.7324, "step": 42855 }, { "epoch": 0.47, "learning_rate": 4.209038296867157e-05, "loss": 0.687, "step": 42860 }, { "epoch": 0.47, "learning_rate": 4.208946024153306e-05, "loss": 0.7466, "step": 42865 }, { "epoch": 0.47, "learning_rate": 4.208853751439455e-05, "loss": 0.7939, "step": 42870 }, { "epoch": 0.47, "learning_rate": 4.2087614787256035e-05, "loss": 0.8089, "step": 42875 }, { "epoch": 0.47, "learning_rate": 4.208669206011752e-05, "loss": 0.7817, "step": 42880 }, { "epoch": 0.47, "learning_rate": 4.208576933297901e-05, "loss": 0.8143, "step": 42885 }, { "epoch": 0.47, "learning_rate": 4.208484660584049e-05, "loss": 0.7825, "step": 42890 }, { "epoch": 0.47, "learning_rate": 4.208392387870198e-05, "loss": 0.7629, "step": 42895 }, { "epoch": 0.48, "learning_rate": 4.2083001151563474e-05, "loss": 0.8014, "step": 42900 }, { "epoch": 0.48, "learning_rate": 4.208207842442496e-05, "loss": 0.7791, "step": 42905 }, { "epoch": 0.48, "learning_rate": 4.208115569728644e-05, "loss": 0.7362, "step": 42910 }, { "epoch": 0.48, "learning_rate": 4.208023297014793e-05, "loss": 0.7596, "step": 42915 }, { "epoch": 0.48, "learning_rate": 4.2079310243009425e-05, "loss": 0.734, "step": 42920 }, { "epoch": 0.48, "learning_rate": 4.2078387515870906e-05, "loss": 0.7041, "step": 42925 }, { "epoch": 0.48, "learning_rate": 4.2077464788732394e-05, "loss": 0.743, "step": 42930 }, { "epoch": 0.48, "learning_rate": 4.207654206159388e-05, "loss": 0.8085, "step": 42935 }, { "epoch": 0.48, "learning_rate": 4.207561933445538e-05, "loss": 0.7373, "step": 42940 }, { "epoch": 0.48, "learning_rate": 4.207469660731686e-05, "loss": 0.7566, "step": 42945 }, { "epoch": 0.48, "learning_rate": 4.2073773880178346e-05, "loss": 0.8333, "step": 42950 }, { "epoch": 0.48, "learning_rate": 4.2072851153039834e-05, "loss": 0.7098, "step": 42955 }, { "epoch": 0.48, "learning_rate": 4.207192842590132e-05, "loss": 0.803, "step": 42960 }, { "epoch": 0.48, "learning_rate": 4.207100569876281e-05, "loss": 0.744, "step": 42965 }, { "epoch": 0.48, "learning_rate": 4.20700829716243e-05, "loss": 0.7724, "step": 42970 }, { "epoch": 0.48, "learning_rate": 4.2069160244485785e-05, "loss": 0.7363, "step": 42975 }, { "epoch": 0.48, "learning_rate": 4.206823751734727e-05, "loss": 0.7811, "step": 42980 }, { "epoch": 0.48, "learning_rate": 4.206731479020876e-05, "loss": 0.7461, "step": 42985 }, { "epoch": 0.48, "learning_rate": 4.206639206307025e-05, "loss": 0.7721, "step": 42990 }, { "epoch": 0.48, "learning_rate": 4.2065469335931736e-05, "loss": 0.7004, "step": 42995 }, { "epoch": 0.48, "learning_rate": 4.206454660879322e-05, "loss": 0.711, "step": 43000 }, { "epoch": 0.48, "eval_loss": 0.6992262005805969, "eval_runtime": 69.3546, "eval_samples_per_second": 28.837, "eval_steps_per_second": 14.419, "step": 43000 }, { "epoch": 0.48, "learning_rate": 4.206362388165471e-05, "loss": 0.7564, "step": 43005 }, { "epoch": 0.48, "learning_rate": 4.20627011545162e-05, "loss": 0.744, "step": 43010 }, { "epoch": 0.48, "learning_rate": 4.206177842737769e-05, "loss": 0.8024, "step": 43015 }, { "epoch": 0.48, "learning_rate": 4.206085570023917e-05, "loss": 0.6836, "step": 43020 }, { "epoch": 0.48, "learning_rate": 4.2059932973100663e-05, "loss": 0.7462, "step": 43025 }, { "epoch": 0.48, "learning_rate": 4.205901024596215e-05, "loss": 0.6747, "step": 43030 }, { "epoch": 0.48, "learning_rate": 4.205808751882363e-05, "loss": 0.7261, "step": 43035 }, { "epoch": 0.48, "learning_rate": 4.205716479168512e-05, "loss": 0.6895, "step": 43040 }, { "epoch": 0.48, "learning_rate": 4.205624206454661e-05, "loss": 0.7434, "step": 43045 }, { "epoch": 0.48, "learning_rate": 4.20553193374081e-05, "loss": 0.7737, "step": 43050 }, { "epoch": 0.48, "learning_rate": 4.2054396610269584e-05, "loss": 0.7951, "step": 43055 }, { "epoch": 0.48, "learning_rate": 4.205347388313107e-05, "loss": 0.7448, "step": 43060 }, { "epoch": 0.48, "learning_rate": 4.205255115599256e-05, "loss": 0.7615, "step": 43065 }, { "epoch": 0.48, "learning_rate": 4.2051628428854054e-05, "loss": 0.7468, "step": 43070 }, { "epoch": 0.48, "learning_rate": 4.2050705701715535e-05, "loss": 0.7532, "step": 43075 }, { "epoch": 0.48, "learning_rate": 4.204978297457702e-05, "loss": 0.7957, "step": 43080 }, { "epoch": 0.48, "learning_rate": 4.204886024743851e-05, "loss": 0.7438, "step": 43085 }, { "epoch": 0.48, "learning_rate": 4.20479375203e-05, "loss": 0.7104, "step": 43090 }, { "epoch": 0.48, "learning_rate": 4.2047014793161487e-05, "loss": 0.7413, "step": 43095 }, { "epoch": 0.48, "learning_rate": 4.2046092066022974e-05, "loss": 0.7491, "step": 43100 }, { "epoch": 0.48, "learning_rate": 4.204516933888446e-05, "loss": 0.7992, "step": 43105 }, { "epoch": 0.48, "learning_rate": 4.204424661174595e-05, "loss": 0.7341, "step": 43110 }, { "epoch": 0.48, "learning_rate": 4.204332388460744e-05, "loss": 0.7553, "step": 43115 }, { "epoch": 0.48, "learning_rate": 4.2042401157468926e-05, "loss": 0.7657, "step": 43120 }, { "epoch": 0.48, "learning_rate": 4.2041478430330414e-05, "loss": 0.8334, "step": 43125 }, { "epoch": 0.48, "learning_rate": 4.2040555703191895e-05, "loss": 0.7813, "step": 43130 }, { "epoch": 0.48, "learning_rate": 4.203963297605339e-05, "loss": 0.7627, "step": 43135 }, { "epoch": 0.48, "learning_rate": 4.203871024891488e-05, "loss": 0.7627, "step": 43140 }, { "epoch": 0.48, "learning_rate": 4.2037787521776365e-05, "loss": 0.6947, "step": 43145 }, { "epoch": 0.48, "learning_rate": 4.2036864794637846e-05, "loss": 0.8073, "step": 43150 }, { "epoch": 0.48, "learning_rate": 4.203594206749934e-05, "loss": 0.8258, "step": 43155 }, { "epoch": 0.48, "learning_rate": 4.203501934036083e-05, "loss": 0.7431, "step": 43160 }, { "epoch": 0.48, "learning_rate": 4.203409661322231e-05, "loss": 0.6869, "step": 43165 }, { "epoch": 0.48, "learning_rate": 4.20331738860838e-05, "loss": 0.6925, "step": 43170 }, { "epoch": 0.48, "learning_rate": 4.203225115894529e-05, "loss": 0.7858, "step": 43175 }, { "epoch": 0.48, "learning_rate": 4.203132843180678e-05, "loss": 0.6999, "step": 43180 }, { "epoch": 0.48, "learning_rate": 4.203040570466826e-05, "loss": 0.8425, "step": 43185 }, { "epoch": 0.48, "learning_rate": 4.202948297752975e-05, "loss": 0.7772, "step": 43190 }, { "epoch": 0.48, "learning_rate": 4.202856025039124e-05, "loss": 0.7993, "step": 43195 }, { "epoch": 0.48, "learning_rate": 4.2027637523252725e-05, "loss": 0.76, "step": 43200 }, { "epoch": 0.48, "learning_rate": 4.202671479611421e-05, "loss": 0.7909, "step": 43205 }, { "epoch": 0.48, "learning_rate": 4.20257920689757e-05, "loss": 0.7826, "step": 43210 }, { "epoch": 0.48, "learning_rate": 4.202486934183719e-05, "loss": 0.7344, "step": 43215 }, { "epoch": 0.48, "learning_rate": 4.2023946614698676e-05, "loss": 0.7587, "step": 43220 }, { "epoch": 0.48, "learning_rate": 4.2023023887560164e-05, "loss": 0.6969, "step": 43225 }, { "epoch": 0.48, "learning_rate": 4.202210116042165e-05, "loss": 0.7167, "step": 43230 }, { "epoch": 0.48, "learning_rate": 4.202117843328314e-05, "loss": 0.7028, "step": 43235 }, { "epoch": 0.48, "learning_rate": 4.202025570614463e-05, "loss": 0.749, "step": 43240 }, { "epoch": 0.48, "learning_rate": 4.2019332979006115e-05, "loss": 0.7602, "step": 43245 }, { "epoch": 0.48, "learning_rate": 4.20184102518676e-05, "loss": 0.7382, "step": 43250 }, { "epoch": 0.48, "learning_rate": 4.201748752472909e-05, "loss": 0.7322, "step": 43255 }, { "epoch": 0.48, "learning_rate": 4.201656479759058e-05, "loss": 0.7274, "step": 43260 }, { "epoch": 0.48, "learning_rate": 4.201564207045207e-05, "loss": 0.7683, "step": 43265 }, { "epoch": 0.48, "learning_rate": 4.2014719343313554e-05, "loss": 0.7538, "step": 43270 }, { "epoch": 0.48, "learning_rate": 4.2013796616175036e-05, "loss": 0.755, "step": 43275 }, { "epoch": 0.48, "learning_rate": 4.201287388903652e-05, "loss": 0.7744, "step": 43280 }, { "epoch": 0.48, "learning_rate": 4.201195116189802e-05, "loss": 0.7919, "step": 43285 }, { "epoch": 0.48, "learning_rate": 4.2011028434759506e-05, "loss": 0.7294, "step": 43290 }, { "epoch": 0.48, "learning_rate": 4.201010570762099e-05, "loss": 0.7563, "step": 43295 }, { "epoch": 0.48, "learning_rate": 4.2009182980482475e-05, "loss": 0.7985, "step": 43300 }, { "epoch": 0.48, "learning_rate": 4.200826025334397e-05, "loss": 0.7766, "step": 43305 }, { "epoch": 0.48, "learning_rate": 4.200733752620545e-05, "loss": 0.724, "step": 43310 }, { "epoch": 0.48, "learning_rate": 4.200641479906694e-05, "loss": 0.7757, "step": 43315 }, { "epoch": 0.48, "learning_rate": 4.2005492071928426e-05, "loss": 0.7313, "step": 43320 }, { "epoch": 0.48, "learning_rate": 4.200456934478992e-05, "loss": 0.7746, "step": 43325 }, { "epoch": 0.48, "learning_rate": 4.20036466176514e-05, "loss": 0.729, "step": 43330 }, { "epoch": 0.48, "learning_rate": 4.200272389051289e-05, "loss": 0.7601, "step": 43335 }, { "epoch": 0.48, "learning_rate": 4.200180116337438e-05, "loss": 0.7174, "step": 43340 }, { "epoch": 0.48, "learning_rate": 4.2000878436235865e-05, "loss": 0.7792, "step": 43345 }, { "epoch": 0.48, "learning_rate": 4.199995570909735e-05, "loss": 0.7234, "step": 43350 }, { "epoch": 0.48, "learning_rate": 4.199903298195884e-05, "loss": 0.7991, "step": 43355 }, { "epoch": 0.48, "learning_rate": 4.199811025482033e-05, "loss": 0.6803, "step": 43360 }, { "epoch": 0.48, "learning_rate": 4.199718752768182e-05, "loss": 0.7751, "step": 43365 }, { "epoch": 0.48, "learning_rate": 4.1996264800543305e-05, "loss": 0.7359, "step": 43370 }, { "epoch": 0.48, "learning_rate": 4.199534207340479e-05, "loss": 0.7658, "step": 43375 }, { "epoch": 0.48, "learning_rate": 4.199441934626628e-05, "loss": 0.7299, "step": 43380 }, { "epoch": 0.48, "learning_rate": 4.199349661912776e-05, "loss": 0.8184, "step": 43385 }, { "epoch": 0.48, "learning_rate": 4.1992573891989256e-05, "loss": 0.768, "step": 43390 }, { "epoch": 0.48, "learning_rate": 4.1991651164850744e-05, "loss": 0.7472, "step": 43395 }, { "epoch": 0.48, "learning_rate": 4.199072843771223e-05, "loss": 0.8066, "step": 43400 }, { "epoch": 0.48, "learning_rate": 4.198980571057371e-05, "loss": 0.7819, "step": 43405 }, { "epoch": 0.48, "learning_rate": 4.198888298343521e-05, "loss": 0.7991, "step": 43410 }, { "epoch": 0.48, "learning_rate": 4.1987960256296695e-05, "loss": 0.7593, "step": 43415 }, { "epoch": 0.48, "learning_rate": 4.1987037529158176e-05, "loss": 0.7842, "step": 43420 }, { "epoch": 0.48, "learning_rate": 4.1986114802019664e-05, "loss": 0.785, "step": 43425 }, { "epoch": 0.48, "learning_rate": 4.198519207488115e-05, "loss": 0.7467, "step": 43430 }, { "epoch": 0.48, "learning_rate": 4.198426934774265e-05, "loss": 0.8085, "step": 43435 }, { "epoch": 0.48, "learning_rate": 4.198334662060413e-05, "loss": 0.8017, "step": 43440 }, { "epoch": 0.48, "learning_rate": 4.1982423893465616e-05, "loss": 0.7114, "step": 43445 }, { "epoch": 0.48, "learning_rate": 4.1981501166327103e-05, "loss": 0.787, "step": 43450 }, { "epoch": 0.48, "learning_rate": 4.19805784391886e-05, "loss": 0.694, "step": 43455 }, { "epoch": 0.48, "learning_rate": 4.197965571205008e-05, "loss": 0.7516, "step": 43460 }, { "epoch": 0.48, "learning_rate": 4.197873298491157e-05, "loss": 0.7252, "step": 43465 }, { "epoch": 0.48, "learning_rate": 4.1977810257773055e-05, "loss": 0.807, "step": 43470 }, { "epoch": 0.48, "learning_rate": 4.197688753063454e-05, "loss": 0.7422, "step": 43475 }, { "epoch": 0.48, "learning_rate": 4.197596480349603e-05, "loss": 0.8093, "step": 43480 }, { "epoch": 0.48, "learning_rate": 4.197504207635752e-05, "loss": 0.8009, "step": 43485 }, { "epoch": 0.48, "learning_rate": 4.1974119349219006e-05, "loss": 0.8003, "step": 43490 }, { "epoch": 0.48, "learning_rate": 4.197319662208049e-05, "loss": 0.8804, "step": 43495 }, { "epoch": 0.48, "learning_rate": 4.197227389494198e-05, "loss": 0.8018, "step": 43500 }, { "epoch": 0.48, "learning_rate": 4.197135116780347e-05, "loss": 0.7817, "step": 43505 }, { "epoch": 0.48, "learning_rate": 4.197042844066496e-05, "loss": 0.8048, "step": 43510 }, { "epoch": 0.48, "learning_rate": 4.196950571352644e-05, "loss": 0.8278, "step": 43515 }, { "epoch": 0.48, "learning_rate": 4.196858298638793e-05, "loss": 0.7863, "step": 43520 }, { "epoch": 0.48, "learning_rate": 4.196766025924942e-05, "loss": 0.7752, "step": 43525 }, { "epoch": 0.48, "learning_rate": 4.196673753211091e-05, "loss": 0.7513, "step": 43530 }, { "epoch": 0.48, "learning_rate": 4.196581480497239e-05, "loss": 0.8176, "step": 43535 }, { "epoch": 0.48, "learning_rate": 4.1964892077833885e-05, "loss": 0.7417, "step": 43540 }, { "epoch": 0.48, "learning_rate": 4.196396935069537e-05, "loss": 0.8684, "step": 43545 }, { "epoch": 0.48, "learning_rate": 4.1963046623556854e-05, "loss": 0.7986, "step": 43550 }, { "epoch": 0.48, "learning_rate": 4.196212389641834e-05, "loss": 0.8748, "step": 43555 }, { "epoch": 0.48, "learning_rate": 4.1961201169279836e-05, "loss": 0.7797, "step": 43560 }, { "epoch": 0.48, "learning_rate": 4.1960278442141324e-05, "loss": 0.7789, "step": 43565 }, { "epoch": 0.48, "learning_rate": 4.1959355715002805e-05, "loss": 0.8481, "step": 43570 }, { "epoch": 0.48, "learning_rate": 4.195843298786429e-05, "loss": 0.759, "step": 43575 }, { "epoch": 0.48, "learning_rate": 4.195751026072578e-05, "loss": 0.8585, "step": 43580 }, { "epoch": 0.48, "learning_rate": 4.195658753358727e-05, "loss": 0.7849, "step": 43585 }, { "epoch": 0.48, "learning_rate": 4.1955664806448756e-05, "loss": 0.7667, "step": 43590 }, { "epoch": 0.48, "learning_rate": 4.1954742079310244e-05, "loss": 0.7643, "step": 43595 }, { "epoch": 0.48, "learning_rate": 4.195381935217173e-05, "loss": 0.8056, "step": 43600 }, { "epoch": 0.48, "learning_rate": 4.195289662503322e-05, "loss": 0.7866, "step": 43605 }, { "epoch": 0.48, "learning_rate": 4.195197389789471e-05, "loss": 0.7229, "step": 43610 }, { "epoch": 0.48, "learning_rate": 4.1951051170756196e-05, "loss": 0.7402, "step": 43615 }, { "epoch": 0.48, "learning_rate": 4.1950128443617684e-05, "loss": 0.7968, "step": 43620 }, { "epoch": 0.48, "learning_rate": 4.194920571647917e-05, "loss": 0.8012, "step": 43625 }, { "epoch": 0.48, "learning_rate": 4.194828298934066e-05, "loss": 0.763, "step": 43630 }, { "epoch": 0.48, "learning_rate": 4.194736026220215e-05, "loss": 0.7666, "step": 43635 }, { "epoch": 0.48, "learning_rate": 4.1946437535063635e-05, "loss": 0.8528, "step": 43640 }, { "epoch": 0.48, "learning_rate": 4.1945514807925116e-05, "loss": 0.829, "step": 43645 }, { "epoch": 0.48, "learning_rate": 4.194459208078661e-05, "loss": 0.7688, "step": 43650 }, { "epoch": 0.48, "learning_rate": 4.19436693536481e-05, "loss": 0.8888, "step": 43655 }, { "epoch": 0.48, "learning_rate": 4.194274662650958e-05, "loss": 0.7956, "step": 43660 }, { "epoch": 0.48, "learning_rate": 4.194182389937107e-05, "loss": 0.8187, "step": 43665 }, { "epoch": 0.48, "learning_rate": 4.194090117223256e-05, "loss": 0.7749, "step": 43670 }, { "epoch": 0.48, "learning_rate": 4.193997844509405e-05, "loss": 0.8094, "step": 43675 }, { "epoch": 0.48, "learning_rate": 4.193905571795553e-05, "loss": 0.8534, "step": 43680 }, { "epoch": 0.48, "learning_rate": 4.193813299081702e-05, "loss": 0.7949, "step": 43685 }, { "epoch": 0.48, "learning_rate": 4.1937210263678513e-05, "loss": 0.7652, "step": 43690 }, { "epoch": 0.48, "learning_rate": 4.1936287536539994e-05, "loss": 0.7902, "step": 43695 }, { "epoch": 0.48, "learning_rate": 4.193536480940148e-05, "loss": 0.7981, "step": 43700 }, { "epoch": 0.48, "learning_rate": 4.193444208226297e-05, "loss": 0.7356, "step": 43705 }, { "epoch": 0.48, "learning_rate": 4.1933519355124465e-05, "loss": 0.7074, "step": 43710 }, { "epoch": 0.48, "learning_rate": 4.1932596627985946e-05, "loss": 0.8573, "step": 43715 }, { "epoch": 0.48, "learning_rate": 4.1931673900847434e-05, "loss": 0.8665, "step": 43720 }, { "epoch": 0.48, "learning_rate": 4.193075117370892e-05, "loss": 0.8597, "step": 43725 }, { "epoch": 0.48, "learning_rate": 4.192982844657041e-05, "loss": 0.7544, "step": 43730 }, { "epoch": 0.48, "learning_rate": 4.19289057194319e-05, "loss": 0.7806, "step": 43735 }, { "epoch": 0.48, "learning_rate": 4.1927982992293385e-05, "loss": 0.7733, "step": 43740 }, { "epoch": 0.48, "learning_rate": 4.192706026515487e-05, "loss": 0.799, "step": 43745 }, { "epoch": 0.48, "learning_rate": 4.192613753801636e-05, "loss": 0.8539, "step": 43750 }, { "epoch": 0.48, "learning_rate": 4.192521481087785e-05, "loss": 0.7684, "step": 43755 }, { "epoch": 0.48, "learning_rate": 4.1924292083739337e-05, "loss": 0.8701, "step": 43760 }, { "epoch": 0.48, "learning_rate": 4.1923369356600824e-05, "loss": 0.7945, "step": 43765 }, { "epoch": 0.48, "learning_rate": 4.1922446629462305e-05, "loss": 0.8317, "step": 43770 }, { "epoch": 0.48, "learning_rate": 4.19215239023238e-05, "loss": 0.7832, "step": 43775 }, { "epoch": 0.48, "learning_rate": 4.192060117518529e-05, "loss": 0.802, "step": 43780 }, { "epoch": 0.48, "learning_rate": 4.1919678448046776e-05, "loss": 0.8201, "step": 43785 }, { "epoch": 0.48, "learning_rate": 4.191875572090826e-05, "loss": 0.8249, "step": 43790 }, { "epoch": 0.48, "learning_rate": 4.1917832993769745e-05, "loss": 0.7146, "step": 43795 }, { "epoch": 0.48, "learning_rate": 4.191691026663124e-05, "loss": 0.7601, "step": 43800 }, { "epoch": 0.49, "learning_rate": 4.191598753949272e-05, "loss": 0.8092, "step": 43805 }, { "epoch": 0.49, "learning_rate": 4.191506481235421e-05, "loss": 0.7725, "step": 43810 }, { "epoch": 0.49, "learning_rate": 4.1914142085215696e-05, "loss": 0.8114, "step": 43815 }, { "epoch": 0.49, "learning_rate": 4.191321935807719e-05, "loss": 0.8413, "step": 43820 }, { "epoch": 0.49, "learning_rate": 4.191229663093867e-05, "loss": 0.7723, "step": 43825 }, { "epoch": 0.49, "learning_rate": 4.191137390380016e-05, "loss": 0.7724, "step": 43830 }, { "epoch": 0.49, "learning_rate": 4.191045117666165e-05, "loss": 0.7881, "step": 43835 }, { "epoch": 0.49, "learning_rate": 4.190952844952314e-05, "loss": 0.8678, "step": 43840 }, { "epoch": 0.49, "learning_rate": 4.190860572238462e-05, "loss": 0.7829, "step": 43845 }, { "epoch": 0.49, "learning_rate": 4.190768299524611e-05, "loss": 0.7889, "step": 43850 }, { "epoch": 0.49, "learning_rate": 4.19067602681076e-05, "loss": 0.7685, "step": 43855 }, { "epoch": 0.49, "learning_rate": 4.190583754096909e-05, "loss": 0.8517, "step": 43860 }, { "epoch": 0.49, "learning_rate": 4.1904914813830575e-05, "loss": 0.8174, "step": 43865 }, { "epoch": 0.49, "learning_rate": 4.190399208669206e-05, "loss": 0.8056, "step": 43870 }, { "epoch": 0.49, "learning_rate": 4.190306935955355e-05, "loss": 0.9183, "step": 43875 }, { "epoch": 0.49, "learning_rate": 4.190214663241503e-05, "loss": 0.9286, "step": 43880 }, { "epoch": 0.49, "learning_rate": 4.1901223905276526e-05, "loss": 0.7921, "step": 43885 }, { "epoch": 0.49, "learning_rate": 4.1900301178138014e-05, "loss": 0.8184, "step": 43890 }, { "epoch": 0.49, "learning_rate": 4.18993784509995e-05, "loss": 0.7785, "step": 43895 }, { "epoch": 0.49, "learning_rate": 4.189845572386098e-05, "loss": 0.8323, "step": 43900 }, { "epoch": 0.49, "learning_rate": 4.189753299672248e-05, "loss": 0.7785, "step": 43905 }, { "epoch": 0.49, "learning_rate": 4.1896610269583965e-05, "loss": 0.742, "step": 43910 }, { "epoch": 0.49, "learning_rate": 4.189568754244545e-05, "loss": 0.7901, "step": 43915 }, { "epoch": 0.49, "learning_rate": 4.1894764815306934e-05, "loss": 0.8273, "step": 43920 }, { "epoch": 0.49, "learning_rate": 4.189384208816843e-05, "loss": 0.8541, "step": 43925 }, { "epoch": 0.49, "learning_rate": 4.1892919361029917e-05, "loss": 0.8225, "step": 43930 }, { "epoch": 0.49, "learning_rate": 4.18919966338914e-05, "loss": 0.8054, "step": 43935 }, { "epoch": 0.49, "learning_rate": 4.1891073906752886e-05, "loss": 0.8007, "step": 43940 }, { "epoch": 0.49, "learning_rate": 4.189015117961438e-05, "loss": 0.8472, "step": 43945 }, { "epoch": 0.49, "learning_rate": 4.188922845247587e-05, "loss": 0.7973, "step": 43950 }, { "epoch": 0.49, "learning_rate": 4.188830572533735e-05, "loss": 0.7984, "step": 43955 }, { "epoch": 0.49, "learning_rate": 4.188738299819884e-05, "loss": 0.7342, "step": 43960 }, { "epoch": 0.49, "learning_rate": 4.1886460271060325e-05, "loss": 0.8715, "step": 43965 }, { "epoch": 0.49, "learning_rate": 4.188553754392181e-05, "loss": 0.8066, "step": 43970 }, { "epoch": 0.49, "learning_rate": 4.18846148167833e-05, "loss": 0.8308, "step": 43975 }, { "epoch": 0.49, "learning_rate": 4.188369208964479e-05, "loss": 0.8733, "step": 43980 }, { "epoch": 0.49, "learning_rate": 4.1882769362506276e-05, "loss": 0.6919, "step": 43985 }, { "epoch": 0.49, "learning_rate": 4.1881846635367764e-05, "loss": 0.7976, "step": 43990 }, { "epoch": 0.49, "learning_rate": 4.188092390822925e-05, "loss": 0.7744, "step": 43995 }, { "epoch": 0.49, "learning_rate": 4.188000118109074e-05, "loss": 0.7984, "step": 44000 }, { "epoch": 0.49, "eval_loss": 0.7055449485778809, "eval_runtime": 69.2512, "eval_samples_per_second": 28.88, "eval_steps_per_second": 14.44, "step": 44000 }, { "epoch": 0.49, "learning_rate": 4.187907845395223e-05, "loss": 0.7889, "step": 44005 }, { "epoch": 0.49, "learning_rate": 4.1878155726813715e-05, "loss": 0.7246, "step": 44010 }, { "epoch": 0.49, "learning_rate": 4.18772329996752e-05, "loss": 0.7823, "step": 44015 }, { "epoch": 0.49, "learning_rate": 4.187631027253669e-05, "loss": 0.8056, "step": 44020 }, { "epoch": 0.49, "learning_rate": 4.187538754539818e-05, "loss": 0.8379, "step": 44025 }, { "epoch": 0.49, "learning_rate": 4.187446481825966e-05, "loss": 0.8455, "step": 44030 }, { "epoch": 0.49, "learning_rate": 4.1873542091121155e-05, "loss": 0.8481, "step": 44035 }, { "epoch": 0.49, "learning_rate": 4.187261936398264e-05, "loss": 0.8566, "step": 44040 }, { "epoch": 0.49, "learning_rate": 4.1871696636844124e-05, "loss": 0.814, "step": 44045 }, { "epoch": 0.49, "learning_rate": 4.187077390970561e-05, "loss": 0.8158, "step": 44050 }, { "epoch": 0.49, "learning_rate": 4.1869851182567106e-05, "loss": 0.773, "step": 44055 }, { "epoch": 0.49, "learning_rate": 4.1868928455428594e-05, "loss": 0.7719, "step": 44060 }, { "epoch": 0.49, "learning_rate": 4.1868005728290075e-05, "loss": 0.7923, "step": 44065 }, { "epoch": 0.49, "learning_rate": 4.186708300115156e-05, "loss": 0.8193, "step": 44070 }, { "epoch": 0.49, "learning_rate": 4.186616027401306e-05, "loss": 0.7866, "step": 44075 }, { "epoch": 0.49, "learning_rate": 4.186523754687454e-05, "loss": 0.8432, "step": 44080 }, { "epoch": 0.49, "learning_rate": 4.1864314819736026e-05, "loss": 0.7395, "step": 44085 }, { "epoch": 0.49, "learning_rate": 4.1863392092597514e-05, "loss": 0.7769, "step": 44090 }, { "epoch": 0.49, "learning_rate": 4.186246936545901e-05, "loss": 0.7493, "step": 44095 }, { "epoch": 0.49, "learning_rate": 4.186154663832049e-05, "loss": 0.8746, "step": 44100 }, { "epoch": 0.49, "learning_rate": 4.186062391118198e-05, "loss": 0.7939, "step": 44105 }, { "epoch": 0.49, "learning_rate": 4.1859701184043466e-05, "loss": 0.832, "step": 44110 }, { "epoch": 0.49, "learning_rate": 4.1858778456904953e-05, "loss": 0.6867, "step": 44115 }, { "epoch": 0.49, "learning_rate": 4.185785572976644e-05, "loss": 0.857, "step": 44120 }, { "epoch": 0.49, "learning_rate": 4.185693300262793e-05, "loss": 0.8586, "step": 44125 }, { "epoch": 0.49, "learning_rate": 4.185601027548942e-05, "loss": 0.8727, "step": 44130 }, { "epoch": 0.49, "learning_rate": 4.1855087548350905e-05, "loss": 0.824, "step": 44135 }, { "epoch": 0.49, "learning_rate": 4.185416482121239e-05, "loss": 0.7563, "step": 44140 }, { "epoch": 0.49, "learning_rate": 4.185324209407388e-05, "loss": 0.7788, "step": 44145 }, { "epoch": 0.49, "learning_rate": 4.185231936693537e-05, "loss": 0.7817, "step": 44150 }, { "epoch": 0.49, "learning_rate": 4.185139663979685e-05, "loss": 0.8268, "step": 44155 }, { "epoch": 0.49, "learning_rate": 4.1850473912658344e-05, "loss": 0.8202, "step": 44160 }, { "epoch": 0.49, "learning_rate": 4.184955118551983e-05, "loss": 0.7634, "step": 44165 }, { "epoch": 0.49, "learning_rate": 4.184862845838132e-05, "loss": 0.9011, "step": 44170 }, { "epoch": 0.49, "learning_rate": 4.18477057312428e-05, "loss": 0.8258, "step": 44175 }, { "epoch": 0.49, "learning_rate": 4.184678300410429e-05, "loss": 0.7819, "step": 44180 }, { "epoch": 0.49, "learning_rate": 4.184586027696578e-05, "loss": 0.8508, "step": 44185 }, { "epoch": 0.49, "learning_rate": 4.1844937549827264e-05, "loss": 0.8166, "step": 44190 }, { "epoch": 0.49, "learning_rate": 4.184401482268875e-05, "loss": 0.8344, "step": 44195 }, { "epoch": 0.49, "learning_rate": 4.184309209555024e-05, "loss": 0.8304, "step": 44200 }, { "epoch": 0.49, "learning_rate": 4.1842169368411735e-05, "loss": 0.7669, "step": 44205 }, { "epoch": 0.49, "learning_rate": 4.1841246641273216e-05, "loss": 0.7464, "step": 44210 }, { "epoch": 0.49, "learning_rate": 4.1840323914134704e-05, "loss": 0.7736, "step": 44215 }, { "epoch": 0.49, "learning_rate": 4.183940118699619e-05, "loss": 0.7857, "step": 44220 }, { "epoch": 0.49, "learning_rate": 4.1838478459857686e-05, "loss": 0.785, "step": 44225 }, { "epoch": 0.49, "learning_rate": 4.183755573271917e-05, "loss": 0.827, "step": 44230 }, { "epoch": 0.49, "learning_rate": 4.1836633005580655e-05, "loss": 0.759, "step": 44235 }, { "epoch": 0.49, "learning_rate": 4.183571027844214e-05, "loss": 0.831, "step": 44240 }, { "epoch": 0.49, "learning_rate": 4.183478755130363e-05, "loss": 0.7964, "step": 44245 }, { "epoch": 0.49, "learning_rate": 4.183386482416512e-05, "loss": 0.8377, "step": 44250 }, { "epoch": 0.49, "learning_rate": 4.1832942097026606e-05, "loss": 0.7567, "step": 44255 }, { "epoch": 0.49, "learning_rate": 4.1832019369888094e-05, "loss": 0.818, "step": 44260 }, { "epoch": 0.49, "learning_rate": 4.1831096642749575e-05, "loss": 0.8261, "step": 44265 }, { "epoch": 0.49, "learning_rate": 4.183017391561107e-05, "loss": 0.8302, "step": 44270 }, { "epoch": 0.49, "learning_rate": 4.182925118847256e-05, "loss": 0.7497, "step": 44275 }, { "epoch": 0.49, "learning_rate": 4.1828328461334046e-05, "loss": 0.7794, "step": 44280 }, { "epoch": 0.49, "learning_rate": 4.182740573419553e-05, "loss": 0.7816, "step": 44285 }, { "epoch": 0.49, "learning_rate": 4.182648300705702e-05, "loss": 0.9425, "step": 44290 }, { "epoch": 0.49, "learning_rate": 4.182556027991851e-05, "loss": 0.7814, "step": 44295 }, { "epoch": 0.49, "learning_rate": 4.182463755278e-05, "loss": 0.7364, "step": 44300 }, { "epoch": 0.49, "learning_rate": 4.182371482564148e-05, "loss": 0.8294, "step": 44305 }, { "epoch": 0.49, "learning_rate": 4.182279209850297e-05, "loss": 0.83, "step": 44310 }, { "epoch": 0.49, "learning_rate": 4.182186937136446e-05, "loss": 0.8245, "step": 44315 }, { "epoch": 0.49, "learning_rate": 4.182094664422594e-05, "loss": 0.8631, "step": 44320 }, { "epoch": 0.49, "learning_rate": 4.182002391708743e-05, "loss": 0.8629, "step": 44325 }, { "epoch": 0.49, "learning_rate": 4.181910118994892e-05, "loss": 0.8073, "step": 44330 }, { "epoch": 0.49, "learning_rate": 4.181817846281041e-05, "loss": 0.7738, "step": 44335 }, { "epoch": 0.49, "learning_rate": 4.181725573567189e-05, "loss": 0.7274, "step": 44340 }, { "epoch": 0.49, "learning_rate": 4.181633300853338e-05, "loss": 0.8039, "step": 44345 }, { "epoch": 0.49, "learning_rate": 4.181541028139487e-05, "loss": 0.8252, "step": 44350 }, { "epoch": 0.49, "learning_rate": 4.181448755425636e-05, "loss": 0.886, "step": 44355 }, { "epoch": 0.49, "learning_rate": 4.1813564827117844e-05, "loss": 0.8832, "step": 44360 }, { "epoch": 0.49, "learning_rate": 4.181264209997933e-05, "loss": 0.7894, "step": 44365 }, { "epoch": 0.49, "learning_rate": 4.181171937284082e-05, "loss": 0.8222, "step": 44370 }, { "epoch": 0.49, "learning_rate": 4.181079664570231e-05, "loss": 0.8647, "step": 44375 }, { "epoch": 0.49, "learning_rate": 4.1809873918563796e-05, "loss": 0.8242, "step": 44380 }, { "epoch": 0.49, "learning_rate": 4.1808951191425284e-05, "loss": 0.7309, "step": 44385 }, { "epoch": 0.49, "learning_rate": 4.180802846428677e-05, "loss": 0.8097, "step": 44390 }, { "epoch": 0.49, "learning_rate": 4.180710573714826e-05, "loss": 0.8068, "step": 44395 }, { "epoch": 0.49, "learning_rate": 4.180618301000975e-05, "loss": 0.7831, "step": 44400 }, { "epoch": 0.49, "learning_rate": 4.1805260282871235e-05, "loss": 0.7773, "step": 44405 }, { "epoch": 0.49, "learning_rate": 4.180433755573272e-05, "loss": 0.8089, "step": 44410 }, { "epoch": 0.49, "learning_rate": 4.1803414828594204e-05, "loss": 0.8282, "step": 44415 }, { "epoch": 0.49, "learning_rate": 4.18024921014557e-05, "loss": 0.8215, "step": 44420 }, { "epoch": 0.49, "learning_rate": 4.1801569374317187e-05, "loss": 0.828, "step": 44425 }, { "epoch": 0.49, "learning_rate": 4.180064664717867e-05, "loss": 0.855, "step": 44430 }, { "epoch": 0.49, "learning_rate": 4.1799723920040155e-05, "loss": 0.7466, "step": 44435 }, { "epoch": 0.49, "learning_rate": 4.179880119290165e-05, "loss": 0.7886, "step": 44440 }, { "epoch": 0.49, "learning_rate": 4.179787846576314e-05, "loss": 0.7662, "step": 44445 }, { "epoch": 0.49, "learning_rate": 4.179695573862462e-05, "loss": 0.7953, "step": 44450 }, { "epoch": 0.49, "learning_rate": 4.179603301148611e-05, "loss": 0.7538, "step": 44455 }, { "epoch": 0.49, "learning_rate": 4.17951102843476e-05, "loss": 0.7911, "step": 44460 }, { "epoch": 0.49, "learning_rate": 4.179418755720908e-05, "loss": 0.8264, "step": 44465 }, { "epoch": 0.49, "learning_rate": 4.179326483007057e-05, "loss": 0.7941, "step": 44470 }, { "epoch": 0.49, "learning_rate": 4.179234210293206e-05, "loss": 0.8226, "step": 44475 }, { "epoch": 0.49, "learning_rate": 4.1791419375793546e-05, "loss": 0.7577, "step": 44480 }, { "epoch": 0.49, "learning_rate": 4.1790496648655034e-05, "loss": 0.6979, "step": 44485 }, { "epoch": 0.49, "learning_rate": 4.178957392151652e-05, "loss": 0.8644, "step": 44490 }, { "epoch": 0.49, "learning_rate": 4.178865119437801e-05, "loss": 0.8113, "step": 44495 }, { "epoch": 0.49, "learning_rate": 4.17877284672395e-05, "loss": 0.8496, "step": 44500 }, { "epoch": 0.49, "learning_rate": 4.1786805740100985e-05, "loss": 0.862, "step": 44505 }, { "epoch": 0.49, "learning_rate": 4.178588301296247e-05, "loss": 0.8406, "step": 44510 }, { "epoch": 0.49, "learning_rate": 4.178496028582396e-05, "loss": 0.8324, "step": 44515 }, { "epoch": 0.49, "learning_rate": 4.178403755868545e-05, "loss": 0.7627, "step": 44520 }, { "epoch": 0.49, "learning_rate": 4.178311483154694e-05, "loss": 0.846, "step": 44525 }, { "epoch": 0.49, "learning_rate": 4.1782192104408425e-05, "loss": 0.79, "step": 44530 }, { "epoch": 0.49, "learning_rate": 4.178126937726991e-05, "loss": 0.8174, "step": 44535 }, { "epoch": 0.49, "learning_rate": 4.1780346650131393e-05, "loss": 0.785, "step": 44540 }, { "epoch": 0.49, "learning_rate": 4.177942392299289e-05, "loss": 0.8201, "step": 44545 }, { "epoch": 0.49, "learning_rate": 4.1778501195854376e-05, "loss": 0.8018, "step": 44550 }, { "epoch": 0.49, "learning_rate": 4.1777578468715864e-05, "loss": 0.8279, "step": 44555 }, { "epoch": 0.49, "learning_rate": 4.1776655741577345e-05, "loss": 0.796, "step": 44560 }, { "epoch": 0.49, "learning_rate": 4.177573301443883e-05, "loss": 0.7912, "step": 44565 }, { "epoch": 0.49, "learning_rate": 4.177481028730033e-05, "loss": 0.7847, "step": 44570 }, { "epoch": 0.49, "learning_rate": 4.1773887560161815e-05, "loss": 0.879, "step": 44575 }, { "epoch": 0.49, "learning_rate": 4.1772964833023296e-05, "loss": 0.788, "step": 44580 }, { "epoch": 0.49, "learning_rate": 4.1772042105884784e-05, "loss": 0.802, "step": 44585 }, { "epoch": 0.49, "learning_rate": 4.177111937874628e-05, "loss": 0.8451, "step": 44590 }, { "epoch": 0.49, "learning_rate": 4.177019665160776e-05, "loss": 0.7501, "step": 44595 }, { "epoch": 0.49, "learning_rate": 4.176927392446925e-05, "loss": 0.7758, "step": 44600 }, { "epoch": 0.49, "learning_rate": 4.1768351197330736e-05, "loss": 0.8269, "step": 44605 }, { "epoch": 0.49, "learning_rate": 4.176742847019223e-05, "loss": 0.827, "step": 44610 }, { "epoch": 0.49, "learning_rate": 4.176650574305371e-05, "loss": 0.7869, "step": 44615 }, { "epoch": 0.49, "learning_rate": 4.17655830159152e-05, "loss": 0.786, "step": 44620 }, { "epoch": 0.49, "learning_rate": 4.176466028877669e-05, "loss": 0.8484, "step": 44625 }, { "epoch": 0.49, "learning_rate": 4.1763737561638175e-05, "loss": 0.8604, "step": 44630 }, { "epoch": 0.49, "learning_rate": 4.176281483449966e-05, "loss": 0.8419, "step": 44635 }, { "epoch": 0.49, "learning_rate": 4.176189210736115e-05, "loss": 0.8977, "step": 44640 }, { "epoch": 0.49, "learning_rate": 4.176096938022264e-05, "loss": 0.7978, "step": 44645 }, { "epoch": 0.49, "learning_rate": 4.1760046653084126e-05, "loss": 0.8491, "step": 44650 }, { "epoch": 0.49, "learning_rate": 4.1759123925945614e-05, "loss": 0.8085, "step": 44655 }, { "epoch": 0.49, "learning_rate": 4.17582011988071e-05, "loss": 0.8749, "step": 44660 }, { "epoch": 0.49, "learning_rate": 4.175727847166859e-05, "loss": 0.8657, "step": 44665 }, { "epoch": 0.49, "learning_rate": 4.175635574453007e-05, "loss": 0.8084, "step": 44670 }, { "epoch": 0.49, "learning_rate": 4.1755433017391565e-05, "loss": 0.8636, "step": 44675 }, { "epoch": 0.49, "learning_rate": 4.175451029025305e-05, "loss": 0.7366, "step": 44680 }, { "epoch": 0.49, "learning_rate": 4.175358756311454e-05, "loss": 0.7848, "step": 44685 }, { "epoch": 0.49, "learning_rate": 4.175266483597602e-05, "loss": 0.7804, "step": 44690 }, { "epoch": 0.49, "learning_rate": 4.175174210883752e-05, "loss": 0.8096, "step": 44695 }, { "epoch": 0.49, "learning_rate": 4.1750819381699005e-05, "loss": 0.8116, "step": 44700 }, { "epoch": 0.5, "learning_rate": 4.1749896654560486e-05, "loss": 0.8044, "step": 44705 }, { "epoch": 0.5, "learning_rate": 4.1748973927421974e-05, "loss": 0.8543, "step": 44710 }, { "epoch": 0.5, "learning_rate": 4.174805120028346e-05, "loss": 0.8893, "step": 44715 }, { "epoch": 0.5, "learning_rate": 4.1747128473144956e-05, "loss": 0.8134, "step": 44720 }, { "epoch": 0.5, "learning_rate": 4.174620574600644e-05, "loss": 0.793, "step": 44725 }, { "epoch": 0.5, "learning_rate": 4.1745283018867925e-05, "loss": 0.8004, "step": 44730 }, { "epoch": 0.5, "learning_rate": 4.174436029172941e-05, "loss": 0.7897, "step": 44735 }, { "epoch": 0.5, "learning_rate": 4.17434375645909e-05, "loss": 0.8274, "step": 44740 }, { "epoch": 0.5, "learning_rate": 4.174251483745239e-05, "loss": 0.7855, "step": 44745 }, { "epoch": 0.5, "learning_rate": 4.1741592110313876e-05, "loss": 0.8677, "step": 44750 }, { "epoch": 0.5, "learning_rate": 4.1740669383175364e-05, "loss": 0.7755, "step": 44755 }, { "epoch": 0.5, "learning_rate": 4.173974665603685e-05, "loss": 0.782, "step": 44760 }, { "epoch": 0.5, "learning_rate": 4.173882392889834e-05, "loss": 0.7987, "step": 44765 }, { "epoch": 0.5, "learning_rate": 4.173790120175983e-05, "loss": 0.8009, "step": 44770 }, { "epoch": 0.5, "learning_rate": 4.1736978474621316e-05, "loss": 0.8641, "step": 44775 }, { "epoch": 0.5, "learning_rate": 4.1736055747482803e-05, "loss": 0.8628, "step": 44780 }, { "epoch": 0.5, "learning_rate": 4.173513302034429e-05, "loss": 0.8145, "step": 44785 }, { "epoch": 0.5, "learning_rate": 4.173421029320578e-05, "loss": 0.77, "step": 44790 }, { "epoch": 0.5, "learning_rate": 4.173328756606727e-05, "loss": 0.7926, "step": 44795 }, { "epoch": 0.5, "learning_rate": 4.173236483892875e-05, "loss": 0.782, "step": 44800 }, { "epoch": 0.5, "learning_rate": 4.173144211179024e-05, "loss": 0.7892, "step": 44805 }, { "epoch": 0.5, "learning_rate": 4.173051938465173e-05, "loss": 0.8317, "step": 44810 }, { "epoch": 0.5, "learning_rate": 4.172959665751321e-05, "loss": 0.8761, "step": 44815 }, { "epoch": 0.5, "learning_rate": 4.17286739303747e-05, "loss": 0.7683, "step": 44820 }, { "epoch": 0.5, "learning_rate": 4.1727751203236194e-05, "loss": 0.8013, "step": 44825 }, { "epoch": 0.5, "learning_rate": 4.172682847609768e-05, "loss": 0.808, "step": 44830 }, { "epoch": 0.5, "learning_rate": 4.172590574895916e-05, "loss": 0.7503, "step": 44835 }, { "epoch": 0.5, "learning_rate": 4.172498302182065e-05, "loss": 0.7685, "step": 44840 }, { "epoch": 0.5, "learning_rate": 4.1724060294682145e-05, "loss": 0.8383, "step": 44845 }, { "epoch": 0.5, "learning_rate": 4.1723137567543627e-05, "loss": 0.7698, "step": 44850 }, { "epoch": 0.5, "learning_rate": 4.1722214840405114e-05, "loss": 0.8308, "step": 44855 }, { "epoch": 0.5, "learning_rate": 4.17212921132666e-05, "loss": 0.783, "step": 44860 }, { "epoch": 0.5, "learning_rate": 4.172036938612809e-05, "loss": 0.7521, "step": 44865 }, { "epoch": 0.5, "learning_rate": 4.171944665898958e-05, "loss": 0.8523, "step": 44870 }, { "epoch": 0.5, "learning_rate": 4.1718523931851066e-05, "loss": 0.7316, "step": 44875 }, { "epoch": 0.5, "learning_rate": 4.1717601204712554e-05, "loss": 0.7882, "step": 44880 }, { "epoch": 0.5, "learning_rate": 4.171667847757404e-05, "loss": 0.8123, "step": 44885 }, { "epoch": 0.5, "learning_rate": 4.171575575043553e-05, "loss": 0.8138, "step": 44890 }, { "epoch": 0.5, "learning_rate": 4.171483302329702e-05, "loss": 0.7966, "step": 44895 }, { "epoch": 0.5, "learning_rate": 4.1713910296158505e-05, "loss": 0.8132, "step": 44900 }, { "epoch": 0.5, "learning_rate": 4.171298756901999e-05, "loss": 0.7986, "step": 44905 }, { "epoch": 0.5, "learning_rate": 4.171206484188148e-05, "loss": 0.8893, "step": 44910 }, { "epoch": 0.5, "learning_rate": 4.171114211474297e-05, "loss": 0.7849, "step": 44915 }, { "epoch": 0.5, "learning_rate": 4.1710219387604456e-05, "loss": 0.7884, "step": 44920 }, { "epoch": 0.5, "learning_rate": 4.170929666046594e-05, "loss": 0.8339, "step": 44925 }, { "epoch": 0.5, "learning_rate": 4.170837393332743e-05, "loss": 0.8047, "step": 44930 }, { "epoch": 0.5, "learning_rate": 4.170745120618892e-05, "loss": 0.7522, "step": 44935 }, { "epoch": 0.5, "learning_rate": 4.170652847905041e-05, "loss": 0.7392, "step": 44940 }, { "epoch": 0.5, "learning_rate": 4.170560575191189e-05, "loss": 0.7247, "step": 44945 }, { "epoch": 0.5, "learning_rate": 4.170468302477338e-05, "loss": 0.7579, "step": 44950 }, { "epoch": 0.5, "learning_rate": 4.170376029763487e-05, "loss": 0.8458, "step": 44955 }, { "epoch": 0.5, "learning_rate": 4.170283757049636e-05, "loss": 0.8533, "step": 44960 }, { "epoch": 0.5, "learning_rate": 4.170191484335784e-05, "loss": 0.7525, "step": 44965 }, { "epoch": 0.5, "learning_rate": 4.170099211621933e-05, "loss": 0.8043, "step": 44970 }, { "epoch": 0.5, "learning_rate": 4.170006938908082e-05, "loss": 0.7772, "step": 44975 }, { "epoch": 0.5, "learning_rate": 4.1699146661942304e-05, "loss": 0.8174, "step": 44980 }, { "epoch": 0.5, "learning_rate": 4.169822393480379e-05, "loss": 0.826, "step": 44985 }, { "epoch": 0.5, "learning_rate": 4.169730120766528e-05, "loss": 0.808, "step": 44990 }, { "epoch": 0.5, "learning_rate": 4.1696378480526774e-05, "loss": 0.8483, "step": 44995 }, { "epoch": 0.5, "learning_rate": 4.1695455753388255e-05, "loss": 0.8402, "step": 45000 }, { "epoch": 0.5, "eval_loss": 0.7107521891593933, "eval_runtime": 69.4069, "eval_samples_per_second": 28.816, "eval_steps_per_second": 14.408, "step": 45000 }, { "epoch": 0.5, "learning_rate": 4.169453302624974e-05, "loss": 0.9456, "step": 45005 }, { "epoch": 0.5, "learning_rate": 4.169361029911123e-05, "loss": 0.8777, "step": 45010 }, { "epoch": 0.5, "learning_rate": 4.169268757197272e-05, "loss": 0.7776, "step": 45015 }, { "epoch": 0.5, "learning_rate": 4.169176484483421e-05, "loss": 0.8135, "step": 45020 }, { "epoch": 0.5, "learning_rate": 4.1690842117695694e-05, "loss": 0.8566, "step": 45025 }, { "epoch": 0.5, "learning_rate": 4.168991939055718e-05, "loss": 0.851, "step": 45030 }, { "epoch": 0.5, "learning_rate": 4.168899666341867e-05, "loss": 0.7309, "step": 45035 }, { "epoch": 0.5, "learning_rate": 4.168807393628016e-05, "loss": 0.7958, "step": 45040 }, { "epoch": 0.5, "learning_rate": 4.1687151209141646e-05, "loss": 0.7951, "step": 45045 }, { "epoch": 0.5, "learning_rate": 4.1686228482003134e-05, "loss": 0.8135, "step": 45050 }, { "epoch": 0.5, "learning_rate": 4.1685305754864615e-05, "loss": 0.8912, "step": 45055 }, { "epoch": 0.5, "learning_rate": 4.168438302772611e-05, "loss": 0.7421, "step": 45060 }, { "epoch": 0.5, "learning_rate": 4.16834603005876e-05, "loss": 0.8015, "step": 45065 }, { "epoch": 0.5, "learning_rate": 4.1682537573449085e-05, "loss": 0.7768, "step": 45070 }, { "epoch": 0.5, "learning_rate": 4.1681614846310566e-05, "loss": 0.7563, "step": 45075 }, { "epoch": 0.5, "learning_rate": 4.168069211917206e-05, "loss": 0.8456, "step": 45080 }, { "epoch": 0.5, "learning_rate": 4.167976939203355e-05, "loss": 0.823, "step": 45085 }, { "epoch": 0.5, "learning_rate": 4.167884666489503e-05, "loss": 0.8398, "step": 45090 }, { "epoch": 0.5, "learning_rate": 4.167792393775652e-05, "loss": 0.795, "step": 45095 }, { "epoch": 0.5, "learning_rate": 4.1677001210618005e-05, "loss": 0.7781, "step": 45100 }, { "epoch": 0.5, "learning_rate": 4.16760784834795e-05, "loss": 0.8184, "step": 45105 }, { "epoch": 0.5, "learning_rate": 4.167515575634098e-05, "loss": 0.811, "step": 45110 }, { "epoch": 0.5, "learning_rate": 4.167423302920247e-05, "loss": 0.8282, "step": 45115 }, { "epoch": 0.5, "learning_rate": 4.167331030206396e-05, "loss": 0.763, "step": 45120 }, { "epoch": 0.5, "learning_rate": 4.1672387574925445e-05, "loss": 0.8129, "step": 45125 }, { "epoch": 0.5, "learning_rate": 4.167146484778693e-05, "loss": 0.7194, "step": 45130 }, { "epoch": 0.5, "learning_rate": 4.167054212064842e-05, "loss": 0.8444, "step": 45135 }, { "epoch": 0.5, "learning_rate": 4.166961939350991e-05, "loss": 0.8828, "step": 45140 }, { "epoch": 0.5, "learning_rate": 4.1668696666371396e-05, "loss": 0.8055, "step": 45145 }, { "epoch": 0.5, "learning_rate": 4.1667773939232884e-05, "loss": 0.8039, "step": 45150 }, { "epoch": 0.5, "learning_rate": 4.166685121209437e-05, "loss": 0.7481, "step": 45155 }, { "epoch": 0.5, "learning_rate": 4.166592848495586e-05, "loss": 0.7922, "step": 45160 }, { "epoch": 0.5, "learning_rate": 4.166500575781734e-05, "loss": 0.8177, "step": 45165 }, { "epoch": 0.5, "learning_rate": 4.1664083030678835e-05, "loss": 0.8336, "step": 45170 }, { "epoch": 0.5, "learning_rate": 4.166316030354032e-05, "loss": 0.7561, "step": 45175 }, { "epoch": 0.5, "learning_rate": 4.166223757640181e-05, "loss": 0.8421, "step": 45180 }, { "epoch": 0.5, "learning_rate": 4.166131484926329e-05, "loss": 0.7206, "step": 45185 }, { "epoch": 0.5, "learning_rate": 4.166039212212479e-05, "loss": 0.8383, "step": 45190 }, { "epoch": 0.5, "learning_rate": 4.1659469394986275e-05, "loss": 0.7736, "step": 45195 }, { "epoch": 0.5, "learning_rate": 4.1658546667847756e-05, "loss": 0.9503, "step": 45200 }, { "epoch": 0.5, "learning_rate": 4.1657623940709243e-05, "loss": 0.8007, "step": 45205 }, { "epoch": 0.5, "learning_rate": 4.165670121357074e-05, "loss": 0.8462, "step": 45210 }, { "epoch": 0.5, "learning_rate": 4.1655778486432226e-05, "loss": 0.7696, "step": 45215 }, { "epoch": 0.5, "learning_rate": 4.165485575929371e-05, "loss": 0.875, "step": 45220 }, { "epoch": 0.5, "learning_rate": 4.1653933032155195e-05, "loss": 0.8703, "step": 45225 }, { "epoch": 0.5, "learning_rate": 4.165301030501669e-05, "loss": 0.8711, "step": 45230 }, { "epoch": 0.5, "learning_rate": 4.165208757787817e-05, "loss": 0.754, "step": 45235 }, { "epoch": 0.5, "learning_rate": 4.165116485073966e-05, "loss": 0.7898, "step": 45240 }, { "epoch": 0.5, "learning_rate": 4.1650242123601146e-05, "loss": 0.7476, "step": 45245 }, { "epoch": 0.5, "learning_rate": 4.1649319396462634e-05, "loss": 0.8474, "step": 45250 }, { "epoch": 0.5, "learning_rate": 4.164839666932412e-05, "loss": 0.7811, "step": 45255 }, { "epoch": 0.5, "learning_rate": 4.164747394218561e-05, "loss": 0.7652, "step": 45260 }, { "epoch": 0.5, "learning_rate": 4.16465512150471e-05, "loss": 0.7332, "step": 45265 }, { "epoch": 0.5, "learning_rate": 4.1645628487908586e-05, "loss": 0.7692, "step": 45270 }, { "epoch": 0.5, "learning_rate": 4.164470576077007e-05, "loss": 0.7811, "step": 45275 }, { "epoch": 0.5, "learning_rate": 4.164378303363156e-05, "loss": 0.8564, "step": 45280 }, { "epoch": 0.5, "learning_rate": 4.164286030649305e-05, "loss": 0.8622, "step": 45285 }, { "epoch": 0.5, "learning_rate": 4.164193757935454e-05, "loss": 0.7526, "step": 45290 }, { "epoch": 0.5, "learning_rate": 4.1641014852216025e-05, "loss": 0.7319, "step": 45295 }, { "epoch": 0.5, "learning_rate": 4.164009212507751e-05, "loss": 0.7455, "step": 45300 }, { "epoch": 0.5, "learning_rate": 4.1639169397939e-05, "loss": 0.7742, "step": 45305 }, { "epoch": 0.5, "learning_rate": 4.163824667080048e-05, "loss": 0.7247, "step": 45310 }, { "epoch": 0.5, "learning_rate": 4.163732394366197e-05, "loss": 0.7046, "step": 45315 }, { "epoch": 0.5, "learning_rate": 4.1636401216523464e-05, "loss": 0.8034, "step": 45320 }, { "epoch": 0.5, "learning_rate": 4.163547848938495e-05, "loss": 0.7071, "step": 45325 }, { "epoch": 0.5, "learning_rate": 4.163455576224643e-05, "loss": 0.7585, "step": 45330 }, { "epoch": 0.5, "learning_rate": 4.163363303510792e-05, "loss": 0.782, "step": 45335 }, { "epoch": 0.5, "learning_rate": 4.1632710307969415e-05, "loss": 0.7045, "step": 45340 }, { "epoch": 0.5, "learning_rate": 4.16317875808309e-05, "loss": 0.7455, "step": 45345 }, { "epoch": 0.5, "learning_rate": 4.1630864853692384e-05, "loss": 0.7981, "step": 45350 }, { "epoch": 0.5, "learning_rate": 4.162994212655387e-05, "loss": 0.7453, "step": 45355 }, { "epoch": 0.5, "learning_rate": 4.162901939941537e-05, "loss": 0.8002, "step": 45360 }, { "epoch": 0.5, "learning_rate": 4.162809667227685e-05, "loss": 0.7459, "step": 45365 }, { "epoch": 0.5, "learning_rate": 4.1627173945138336e-05, "loss": 0.71, "step": 45370 }, { "epoch": 0.5, "learning_rate": 4.1626251217999824e-05, "loss": 0.7707, "step": 45375 }, { "epoch": 0.5, "learning_rate": 4.162532849086132e-05, "loss": 0.7593, "step": 45380 }, { "epoch": 0.5, "learning_rate": 4.16244057637228e-05, "loss": 0.7124, "step": 45385 }, { "epoch": 0.5, "learning_rate": 4.162348303658429e-05, "loss": 0.7276, "step": 45390 }, { "epoch": 0.5, "learning_rate": 4.1622560309445775e-05, "loss": 0.7295, "step": 45395 }, { "epoch": 0.5, "learning_rate": 4.162163758230726e-05, "loss": 0.7528, "step": 45400 }, { "epoch": 0.5, "learning_rate": 4.162071485516875e-05, "loss": 0.7522, "step": 45405 }, { "epoch": 0.5, "learning_rate": 4.161979212803024e-05, "loss": 0.704, "step": 45410 }, { "epoch": 0.5, "learning_rate": 4.1618869400891726e-05, "loss": 0.7, "step": 45415 }, { "epoch": 0.5, "learning_rate": 4.1617946673753214e-05, "loss": 0.7823, "step": 45420 }, { "epoch": 0.5, "learning_rate": 4.16170239466147e-05, "loss": 0.7752, "step": 45425 }, { "epoch": 0.5, "learning_rate": 4.161610121947619e-05, "loss": 0.7083, "step": 45430 }, { "epoch": 0.5, "learning_rate": 4.161517849233768e-05, "loss": 0.7946, "step": 45435 }, { "epoch": 0.5, "learning_rate": 4.161425576519916e-05, "loss": 0.6956, "step": 45440 }, { "epoch": 0.5, "learning_rate": 4.1613333038060653e-05, "loss": 0.8147, "step": 45445 }, { "epoch": 0.5, "learning_rate": 4.161241031092214e-05, "loss": 0.7415, "step": 45450 }, { "epoch": 0.5, "learning_rate": 4.161148758378363e-05, "loss": 0.7177, "step": 45455 }, { "epoch": 0.5, "learning_rate": 4.161056485664511e-05, "loss": 0.7483, "step": 45460 }, { "epoch": 0.5, "learning_rate": 4.16096421295066e-05, "loss": 0.8225, "step": 45465 }, { "epoch": 0.5, "learning_rate": 4.160871940236809e-05, "loss": 0.8097, "step": 45470 }, { "epoch": 0.5, "learning_rate": 4.1607796675229574e-05, "loss": 0.7954, "step": 45475 }, { "epoch": 0.5, "learning_rate": 4.160687394809106e-05, "loss": 0.6822, "step": 45480 }, { "epoch": 0.5, "learning_rate": 4.160595122095255e-05, "loss": 0.7136, "step": 45485 }, { "epoch": 0.5, "learning_rate": 4.1605028493814044e-05, "loss": 0.7167, "step": 45490 }, { "epoch": 0.5, "learning_rate": 4.1604105766675525e-05, "loss": 0.7641, "step": 45495 }, { "epoch": 0.5, "learning_rate": 4.160318303953701e-05, "loss": 0.7898, "step": 45500 }, { "epoch": 0.5, "learning_rate": 4.16022603123985e-05, "loss": 0.7458, "step": 45505 }, { "epoch": 0.5, "learning_rate": 4.160133758525999e-05, "loss": 0.7119, "step": 45510 }, { "epoch": 0.5, "learning_rate": 4.1600414858121477e-05, "loss": 0.6671, "step": 45515 }, { "epoch": 0.5, "learning_rate": 4.1599492130982964e-05, "loss": 0.7668, "step": 45520 }, { "epoch": 0.5, "learning_rate": 4.159856940384445e-05, "loss": 0.7986, "step": 45525 }, { "epoch": 0.5, "learning_rate": 4.159764667670594e-05, "loss": 0.7728, "step": 45530 }, { "epoch": 0.5, "learning_rate": 4.159672394956743e-05, "loss": 0.6799, "step": 45535 }, { "epoch": 0.5, "learning_rate": 4.1595801222428916e-05, "loss": 0.6739, "step": 45540 }, { "epoch": 0.5, "learning_rate": 4.1594878495290404e-05, "loss": 0.6911, "step": 45545 }, { "epoch": 0.5, "learning_rate": 4.1593955768151885e-05, "loss": 0.6945, "step": 45550 }, { "epoch": 0.5, "learning_rate": 4.159303304101338e-05, "loss": 0.789, "step": 45555 }, { "epoch": 0.5, "learning_rate": 4.159211031387487e-05, "loss": 0.7753, "step": 45560 }, { "epoch": 0.5, "learning_rate": 4.1591187586736355e-05, "loss": 0.7467, "step": 45565 }, { "epoch": 0.5, "learning_rate": 4.1590264859597836e-05, "loss": 0.7865, "step": 45570 }, { "epoch": 0.5, "learning_rate": 4.158934213245933e-05, "loss": 0.7481, "step": 45575 }, { "epoch": 0.5, "learning_rate": 4.158841940532082e-05, "loss": 0.7461, "step": 45580 }, { "epoch": 0.5, "learning_rate": 4.15874966781823e-05, "loss": 0.6802, "step": 45585 }, { "epoch": 0.5, "learning_rate": 4.158657395104379e-05, "loss": 0.7617, "step": 45590 }, { "epoch": 0.5, "learning_rate": 4.158565122390528e-05, "loss": 0.7648, "step": 45595 }, { "epoch": 0.5, "learning_rate": 4.158472849676677e-05, "loss": 0.7524, "step": 45600 }, { "epoch": 0.5, "learning_rate": 4.158380576962825e-05, "loss": 0.729, "step": 45605 }, { "epoch": 0.51, "learning_rate": 4.158288304248974e-05, "loss": 0.7167, "step": 45610 }, { "epoch": 0.51, "learning_rate": 4.1581960315351234e-05, "loss": 0.7208, "step": 45615 }, { "epoch": 0.51, "learning_rate": 4.1581037588212715e-05, "loss": 0.7681, "step": 45620 }, { "epoch": 0.51, "learning_rate": 4.15801148610742e-05, "loss": 0.704, "step": 45625 }, { "epoch": 0.51, "learning_rate": 4.157919213393569e-05, "loss": 0.8036, "step": 45630 }, { "epoch": 0.51, "learning_rate": 4.157826940679718e-05, "loss": 0.716, "step": 45635 }, { "epoch": 0.51, "learning_rate": 4.1577346679658666e-05, "loss": 0.7213, "step": 45640 }, { "epoch": 0.51, "learning_rate": 4.1576423952520154e-05, "loss": 0.735, "step": 45645 }, { "epoch": 0.51, "learning_rate": 4.157550122538164e-05, "loss": 0.6969, "step": 45650 }, { "epoch": 0.51, "learning_rate": 4.157457849824313e-05, "loss": 0.7855, "step": 45655 }, { "epoch": 0.51, "learning_rate": 4.157365577110462e-05, "loss": 0.6809, "step": 45660 }, { "epoch": 0.51, "learning_rate": 4.1572733043966105e-05, "loss": 0.7438, "step": 45665 }, { "epoch": 0.51, "learning_rate": 4.157181031682759e-05, "loss": 0.7649, "step": 45670 }, { "epoch": 0.51, "learning_rate": 4.157088758968908e-05, "loss": 0.7965, "step": 45675 }, { "epoch": 0.51, "learning_rate": 4.156996486255057e-05, "loss": 0.7615, "step": 45680 }, { "epoch": 0.51, "learning_rate": 4.156904213541206e-05, "loss": 0.7451, "step": 45685 }, { "epoch": 0.51, "learning_rate": 4.1568119408273544e-05, "loss": 0.6524, "step": 45690 }, { "epoch": 0.51, "learning_rate": 4.1567196681135026e-05, "loss": 0.7607, "step": 45695 }, { "epoch": 0.51, "learning_rate": 4.1566273953996513e-05, "loss": 0.7664, "step": 45700 }, { "epoch": 0.51, "learning_rate": 4.156535122685801e-05, "loss": 0.7971, "step": 45705 }, { "epoch": 0.51, "learning_rate": 4.1564428499719496e-05, "loss": 0.735, "step": 45710 }, { "epoch": 0.51, "learning_rate": 4.156350577258098e-05, "loss": 0.7552, "step": 45715 }, { "epoch": 0.51, "learning_rate": 4.1562583045442465e-05, "loss": 0.7491, "step": 45720 }, { "epoch": 0.51, "learning_rate": 4.156166031830396e-05, "loss": 0.7728, "step": 45725 }, { "epoch": 0.51, "learning_rate": 4.156073759116545e-05, "loss": 0.7196, "step": 45730 }, { "epoch": 0.51, "learning_rate": 4.155981486402693e-05, "loss": 0.702, "step": 45735 }, { "epoch": 0.51, "learning_rate": 4.1558892136888416e-05, "loss": 0.7378, "step": 45740 }, { "epoch": 0.51, "learning_rate": 4.155796940974991e-05, "loss": 0.725, "step": 45745 }, { "epoch": 0.51, "learning_rate": 4.155704668261139e-05, "loss": 0.7554, "step": 45750 }, { "epoch": 0.51, "learning_rate": 4.155612395547288e-05, "loss": 0.6935, "step": 45755 }, { "epoch": 0.51, "learning_rate": 4.155520122833437e-05, "loss": 0.7924, "step": 45760 }, { "epoch": 0.51, "learning_rate": 4.155427850119586e-05, "loss": 0.7349, "step": 45765 }, { "epoch": 0.51, "learning_rate": 4.155335577405734e-05, "loss": 0.7437, "step": 45770 }, { "epoch": 0.51, "learning_rate": 4.155243304691883e-05, "loss": 0.7535, "step": 45775 }, { "epoch": 0.51, "learning_rate": 4.155151031978032e-05, "loss": 0.7425, "step": 45780 }, { "epoch": 0.51, "learning_rate": 4.155058759264181e-05, "loss": 0.759, "step": 45785 }, { "epoch": 0.51, "learning_rate": 4.1549664865503295e-05, "loss": 0.7156, "step": 45790 }, { "epoch": 0.51, "learning_rate": 4.154874213836478e-05, "loss": 0.7634, "step": 45795 }, { "epoch": 0.51, "learning_rate": 4.154781941122627e-05, "loss": 0.7453, "step": 45800 }, { "epoch": 0.51, "learning_rate": 4.154689668408776e-05, "loss": 0.7285, "step": 45805 }, { "epoch": 0.51, "learning_rate": 4.1545973956949246e-05, "loss": 0.6902, "step": 45810 }, { "epoch": 0.51, "learning_rate": 4.1545051229810734e-05, "loss": 0.7323, "step": 45815 }, { "epoch": 0.51, "learning_rate": 4.154412850267222e-05, "loss": 0.7721, "step": 45820 }, { "epoch": 0.51, "learning_rate": 4.15432057755337e-05, "loss": 0.7548, "step": 45825 }, { "epoch": 0.51, "learning_rate": 4.15422830483952e-05, "loss": 0.6785, "step": 45830 }, { "epoch": 0.51, "learning_rate": 4.1541360321256685e-05, "loss": 0.7004, "step": 45835 }, { "epoch": 0.51, "learning_rate": 4.154043759411817e-05, "loss": 0.7491, "step": 45840 }, { "epoch": 0.51, "learning_rate": 4.1539514866979654e-05, "loss": 0.7329, "step": 45845 }, { "epoch": 0.51, "learning_rate": 4.153859213984114e-05, "loss": 0.7356, "step": 45850 }, { "epoch": 0.51, "learning_rate": 4.153766941270264e-05, "loss": 0.754, "step": 45855 }, { "epoch": 0.51, "learning_rate": 4.153674668556412e-05, "loss": 0.8177, "step": 45860 }, { "epoch": 0.51, "learning_rate": 4.1535823958425606e-05, "loss": 0.7491, "step": 45865 }, { "epoch": 0.51, "learning_rate": 4.1534901231287093e-05, "loss": 0.7809, "step": 45870 }, { "epoch": 0.51, "learning_rate": 4.153397850414859e-05, "loss": 0.7428, "step": 45875 }, { "epoch": 0.51, "learning_rate": 4.153305577701007e-05, "loss": 0.7327, "step": 45880 }, { "epoch": 0.51, "learning_rate": 4.153213304987156e-05, "loss": 0.7303, "step": 45885 }, { "epoch": 0.51, "learning_rate": 4.1531210322733045e-05, "loss": 0.7629, "step": 45890 }, { "epoch": 0.51, "learning_rate": 4.153028759559453e-05, "loss": 0.7732, "step": 45895 }, { "epoch": 0.51, "learning_rate": 4.152936486845602e-05, "loss": 0.7442, "step": 45900 }, { "epoch": 0.51, "learning_rate": 4.152844214131751e-05, "loss": 0.7582, "step": 45905 }, { "epoch": 0.51, "learning_rate": 4.1527519414178996e-05, "loss": 0.7491, "step": 45910 }, { "epoch": 0.51, "learning_rate": 4.1526596687040484e-05, "loss": 0.7325, "step": 45915 }, { "epoch": 0.51, "learning_rate": 4.152567395990197e-05, "loss": 0.7408, "step": 45920 }, { "epoch": 0.51, "learning_rate": 4.152475123276346e-05, "loss": 0.8441, "step": 45925 }, { "epoch": 0.51, "learning_rate": 4.152382850562495e-05, "loss": 0.7572, "step": 45930 }, { "epoch": 0.51, "learning_rate": 4.152290577848643e-05, "loss": 0.7595, "step": 45935 }, { "epoch": 0.51, "learning_rate": 4.152198305134792e-05, "loss": 0.7455, "step": 45940 }, { "epoch": 0.51, "learning_rate": 4.152106032420941e-05, "loss": 0.8278, "step": 45945 }, { "epoch": 0.51, "learning_rate": 4.15201375970709e-05, "loss": 0.7783, "step": 45950 }, { "epoch": 0.51, "learning_rate": 4.151921486993238e-05, "loss": 0.7726, "step": 45955 }, { "epoch": 0.51, "learning_rate": 4.1518292142793875e-05, "loss": 0.7404, "step": 45960 }, { "epoch": 0.51, "learning_rate": 4.151736941565536e-05, "loss": 0.7604, "step": 45965 }, { "epoch": 0.51, "learning_rate": 4.1516446688516844e-05, "loss": 0.7786, "step": 45970 }, { "epoch": 0.51, "learning_rate": 4.151552396137833e-05, "loss": 0.7098, "step": 45975 }, { "epoch": 0.51, "learning_rate": 4.1514601234239826e-05, "loss": 0.7121, "step": 45980 }, { "epoch": 0.51, "learning_rate": 4.1513678507101314e-05, "loss": 0.7228, "step": 45985 }, { "epoch": 0.51, "learning_rate": 4.1512755779962795e-05, "loss": 0.7286, "step": 45990 }, { "epoch": 0.51, "learning_rate": 4.151183305282428e-05, "loss": 0.7293, "step": 45995 }, { "epoch": 0.51, "learning_rate": 4.151091032568577e-05, "loss": 0.8553, "step": 46000 }, { "epoch": 0.51, "eval_loss": 0.7004783153533936, "eval_runtime": 69.2003, "eval_samples_per_second": 28.902, "eval_steps_per_second": 14.451, "step": 46000 }, { "epoch": 0.51, "learning_rate": 4.150998759854726e-05, "loss": 0.7397, "step": 46005 }, { "epoch": 0.51, "learning_rate": 4.1509064871408746e-05, "loss": 0.7648, "step": 46010 }, { "epoch": 0.51, "learning_rate": 4.1508142144270234e-05, "loss": 0.7213, "step": 46015 }, { "epoch": 0.51, "learning_rate": 4.150721941713172e-05, "loss": 0.7746, "step": 46020 }, { "epoch": 0.51, "learning_rate": 4.150629668999321e-05, "loss": 0.8032, "step": 46025 }, { "epoch": 0.51, "learning_rate": 4.15053739628547e-05, "loss": 0.7751, "step": 46030 }, { "epoch": 0.51, "learning_rate": 4.1504451235716186e-05, "loss": 0.8294, "step": 46035 }, { "epoch": 0.51, "learning_rate": 4.1503528508577674e-05, "loss": 0.7825, "step": 46040 }, { "epoch": 0.51, "learning_rate": 4.150260578143916e-05, "loss": 0.6956, "step": 46045 }, { "epoch": 0.51, "learning_rate": 4.150168305430065e-05, "loss": 0.8041, "step": 46050 }, { "epoch": 0.51, "learning_rate": 4.150076032716214e-05, "loss": 0.7071, "step": 46055 }, { "epoch": 0.51, "learning_rate": 4.1499837600023625e-05, "loss": 0.7345, "step": 46060 }, { "epoch": 0.51, "learning_rate": 4.149891487288511e-05, "loss": 0.7284, "step": 46065 }, { "epoch": 0.51, "learning_rate": 4.14979921457466e-05, "loss": 0.8014, "step": 46070 }, { "epoch": 0.51, "learning_rate": 4.149706941860809e-05, "loss": 0.7441, "step": 46075 }, { "epoch": 0.51, "learning_rate": 4.149614669146957e-05, "loss": 0.7554, "step": 46080 }, { "epoch": 0.51, "learning_rate": 4.149522396433106e-05, "loss": 0.7486, "step": 46085 }, { "epoch": 0.51, "learning_rate": 4.149430123719255e-05, "loss": 0.7574, "step": 46090 }, { "epoch": 0.51, "learning_rate": 4.149337851005404e-05, "loss": 0.7262, "step": 46095 }, { "epoch": 0.51, "learning_rate": 4.149245578291552e-05, "loss": 0.7567, "step": 46100 }, { "epoch": 0.51, "learning_rate": 4.149153305577701e-05, "loss": 0.7794, "step": 46105 }, { "epoch": 0.51, "learning_rate": 4.1490610328638503e-05, "loss": 0.7653, "step": 46110 }, { "epoch": 0.51, "learning_rate": 4.148968760149999e-05, "loss": 0.7338, "step": 46115 }, { "epoch": 0.51, "learning_rate": 4.148876487436147e-05, "loss": 0.7487, "step": 46120 }, { "epoch": 0.51, "learning_rate": 4.148784214722296e-05, "loss": 0.7253, "step": 46125 }, { "epoch": 0.51, "learning_rate": 4.1486919420084455e-05, "loss": 0.7541, "step": 46130 }, { "epoch": 0.51, "learning_rate": 4.1485996692945936e-05, "loss": 0.6921, "step": 46135 }, { "epoch": 0.51, "learning_rate": 4.1485073965807424e-05, "loss": 0.8231, "step": 46140 }, { "epoch": 0.51, "learning_rate": 4.148415123866891e-05, "loss": 0.7354, "step": 46145 }, { "epoch": 0.51, "learning_rate": 4.14832285115304e-05, "loss": 0.6729, "step": 46150 }, { "epoch": 0.51, "learning_rate": 4.148230578439189e-05, "loss": 0.7216, "step": 46155 }, { "epoch": 0.51, "learning_rate": 4.1481383057253375e-05, "loss": 0.812, "step": 46160 }, { "epoch": 0.51, "learning_rate": 4.148046033011486e-05, "loss": 0.7708, "step": 46165 }, { "epoch": 0.51, "learning_rate": 4.147953760297635e-05, "loss": 0.7888, "step": 46170 }, { "epoch": 0.51, "learning_rate": 4.147861487583784e-05, "loss": 0.7992, "step": 46175 }, { "epoch": 0.51, "learning_rate": 4.1477692148699327e-05, "loss": 0.7599, "step": 46180 }, { "epoch": 0.51, "learning_rate": 4.1476769421560814e-05, "loss": 0.7985, "step": 46185 }, { "epoch": 0.51, "learning_rate": 4.14758466944223e-05, "loss": 0.7363, "step": 46190 }, { "epoch": 0.51, "learning_rate": 4.147492396728379e-05, "loss": 0.725, "step": 46195 }, { "epoch": 0.51, "learning_rate": 4.147400124014528e-05, "loss": 0.7016, "step": 46200 }, { "epoch": 0.51, "learning_rate": 4.1473078513006766e-05, "loss": 0.7827, "step": 46205 }, { "epoch": 0.51, "learning_rate": 4.147215578586825e-05, "loss": 0.7367, "step": 46210 }, { "epoch": 0.51, "learning_rate": 4.147123305872974e-05, "loss": 0.7034, "step": 46215 }, { "epoch": 0.51, "learning_rate": 4.147031033159123e-05, "loss": 0.7793, "step": 46220 }, { "epoch": 0.51, "learning_rate": 4.146938760445272e-05, "loss": 0.7816, "step": 46225 }, { "epoch": 0.51, "learning_rate": 4.14684648773142e-05, "loss": 0.7326, "step": 46230 }, { "epoch": 0.51, "learning_rate": 4.1467542150175686e-05, "loss": 0.7442, "step": 46235 }, { "epoch": 0.51, "learning_rate": 4.146661942303718e-05, "loss": 0.7752, "step": 46240 }, { "epoch": 0.51, "learning_rate": 4.146569669589866e-05, "loss": 0.7534, "step": 46245 }, { "epoch": 0.51, "learning_rate": 4.146477396876015e-05, "loss": 0.7533, "step": 46250 }, { "epoch": 0.51, "learning_rate": 4.146385124162164e-05, "loss": 0.7552, "step": 46255 }, { "epoch": 0.51, "learning_rate": 4.146292851448313e-05, "loss": 0.7798, "step": 46260 }, { "epoch": 0.51, "learning_rate": 4.146200578734461e-05, "loss": 0.7316, "step": 46265 }, { "epoch": 0.51, "learning_rate": 4.14610830602061e-05, "loss": 0.7986, "step": 46270 }, { "epoch": 0.51, "learning_rate": 4.146016033306759e-05, "loss": 0.7571, "step": 46275 }, { "epoch": 0.51, "learning_rate": 4.145923760592908e-05, "loss": 0.7131, "step": 46280 }, { "epoch": 0.51, "learning_rate": 4.1458314878790565e-05, "loss": 0.7462, "step": 46285 }, { "epoch": 0.51, "learning_rate": 4.145739215165205e-05, "loss": 0.6967, "step": 46290 }, { "epoch": 0.51, "learning_rate": 4.145646942451354e-05, "loss": 0.7292, "step": 46295 }, { "epoch": 0.51, "learning_rate": 4.145554669737503e-05, "loss": 0.7151, "step": 46300 }, { "epoch": 0.51, "learning_rate": 4.1454623970236516e-05, "loss": 0.7588, "step": 46305 }, { "epoch": 0.51, "learning_rate": 4.1453701243098004e-05, "loss": 0.7251, "step": 46310 }, { "epoch": 0.51, "learning_rate": 4.145277851595949e-05, "loss": 0.7165, "step": 46315 }, { "epoch": 0.51, "learning_rate": 4.145185578882097e-05, "loss": 0.7557, "step": 46320 }, { "epoch": 0.51, "learning_rate": 4.145093306168247e-05, "loss": 0.7572, "step": 46325 }, { "epoch": 0.51, "learning_rate": 4.1450010334543955e-05, "loss": 0.6868, "step": 46330 }, { "epoch": 0.51, "learning_rate": 4.144908760740544e-05, "loss": 0.7681, "step": 46335 }, { "epoch": 0.51, "learning_rate": 4.1448164880266924e-05, "loss": 0.7937, "step": 46340 }, { "epoch": 0.51, "learning_rate": 4.144724215312842e-05, "loss": 0.7919, "step": 46345 }, { "epoch": 0.51, "learning_rate": 4.144631942598991e-05, "loss": 0.8299, "step": 46350 }, { "epoch": 0.51, "learning_rate": 4.144539669885139e-05, "loss": 0.8275, "step": 46355 }, { "epoch": 0.51, "learning_rate": 4.1444473971712876e-05, "loss": 0.7563, "step": 46360 }, { "epoch": 0.51, "learning_rate": 4.144355124457437e-05, "loss": 0.8024, "step": 46365 }, { "epoch": 0.51, "learning_rate": 4.144262851743586e-05, "loss": 0.7577, "step": 46370 }, { "epoch": 0.51, "learning_rate": 4.144170579029734e-05, "loss": 0.7622, "step": 46375 }, { "epoch": 0.51, "learning_rate": 4.144078306315883e-05, "loss": 0.728, "step": 46380 }, { "epoch": 0.51, "learning_rate": 4.1439860336020315e-05, "loss": 0.6723, "step": 46385 }, { "epoch": 0.51, "learning_rate": 4.14389376088818e-05, "loss": 0.7768, "step": 46390 }, { "epoch": 0.51, "learning_rate": 4.143801488174329e-05, "loss": 0.7965, "step": 46395 }, { "epoch": 0.51, "learning_rate": 4.143709215460478e-05, "loss": 0.6967, "step": 46400 }, { "epoch": 0.51, "learning_rate": 4.1436169427466266e-05, "loss": 0.7168, "step": 46405 }, { "epoch": 0.51, "learning_rate": 4.1435246700327754e-05, "loss": 0.7626, "step": 46410 }, { "epoch": 0.51, "learning_rate": 4.143432397318924e-05, "loss": 0.8081, "step": 46415 }, { "epoch": 0.51, "learning_rate": 4.143340124605073e-05, "loss": 0.7231, "step": 46420 }, { "epoch": 0.51, "learning_rate": 4.143247851891222e-05, "loss": 0.818, "step": 46425 }, { "epoch": 0.51, "learning_rate": 4.1431555791773705e-05, "loss": 0.7866, "step": 46430 }, { "epoch": 0.51, "learning_rate": 4.143063306463519e-05, "loss": 0.7778, "step": 46435 }, { "epoch": 0.51, "learning_rate": 4.142971033749668e-05, "loss": 0.7514, "step": 46440 }, { "epoch": 0.51, "learning_rate": 4.142878761035817e-05, "loss": 0.7193, "step": 46445 }, { "epoch": 0.51, "learning_rate": 4.142786488321966e-05, "loss": 0.7255, "step": 46450 }, { "epoch": 0.51, "learning_rate": 4.1426942156081145e-05, "loss": 0.7663, "step": 46455 }, { "epoch": 0.51, "learning_rate": 4.142601942894263e-05, "loss": 0.728, "step": 46460 }, { "epoch": 0.51, "learning_rate": 4.1425096701804114e-05, "loss": 0.7369, "step": 46465 }, { "epoch": 0.51, "learning_rate": 4.14241739746656e-05, "loss": 0.7993, "step": 46470 }, { "epoch": 0.51, "learning_rate": 4.1423251247527096e-05, "loss": 0.717, "step": 46475 }, { "epoch": 0.51, "learning_rate": 4.1422328520388584e-05, "loss": 0.7523, "step": 46480 }, { "epoch": 0.51, "learning_rate": 4.1421405793250065e-05, "loss": 0.715, "step": 46485 }, { "epoch": 0.51, "learning_rate": 4.142048306611155e-05, "loss": 0.7252, "step": 46490 }, { "epoch": 0.51, "learning_rate": 4.141956033897305e-05, "loss": 0.7569, "step": 46495 }, { "epoch": 0.51, "learning_rate": 4.1418637611834535e-05, "loss": 0.7099, "step": 46500 }, { "epoch": 0.51, "learning_rate": 4.1417714884696016e-05, "loss": 0.7362, "step": 46505 }, { "epoch": 0.51, "learning_rate": 4.1416792157557504e-05, "loss": 0.7081, "step": 46510 }, { "epoch": 0.52, "learning_rate": 4.1415869430419e-05, "loss": 0.7527, "step": 46515 }, { "epoch": 0.52, "learning_rate": 4.141494670328048e-05, "loss": 0.744, "step": 46520 }, { "epoch": 0.52, "learning_rate": 4.141402397614197e-05, "loss": 0.7354, "step": 46525 }, { "epoch": 0.52, "learning_rate": 4.1413101249003456e-05, "loss": 0.7273, "step": 46530 }, { "epoch": 0.52, "learning_rate": 4.1412178521864943e-05, "loss": 0.7248, "step": 46535 }, { "epoch": 0.52, "learning_rate": 4.141125579472643e-05, "loss": 0.808, "step": 46540 }, { "epoch": 0.52, "learning_rate": 4.141033306758792e-05, "loss": 0.7544, "step": 46545 }, { "epoch": 0.52, "learning_rate": 4.140941034044941e-05, "loss": 0.7646, "step": 46550 }, { "epoch": 0.52, "learning_rate": 4.1408487613310895e-05, "loss": 0.7242, "step": 46555 }, { "epoch": 0.52, "learning_rate": 4.140756488617238e-05, "loss": 0.761, "step": 46560 }, { "epoch": 0.52, "learning_rate": 4.140664215903387e-05, "loss": 0.7725, "step": 46565 }, { "epoch": 0.52, "learning_rate": 4.140571943189536e-05, "loss": 0.7175, "step": 46570 }, { "epoch": 0.52, "learning_rate": 4.1404796704756846e-05, "loss": 0.7756, "step": 46575 }, { "epoch": 0.52, "learning_rate": 4.1403873977618334e-05, "loss": 0.7011, "step": 46580 }, { "epoch": 0.52, "learning_rate": 4.140295125047982e-05, "loss": 0.7316, "step": 46585 }, { "epoch": 0.52, "learning_rate": 4.140202852334131e-05, "loss": 0.7147, "step": 46590 }, { "epoch": 0.52, "learning_rate": 4.140110579620279e-05, "loss": 0.6988, "step": 46595 }, { "epoch": 0.52, "learning_rate": 4.1400183069064285e-05, "loss": 0.8273, "step": 46600 }, { "epoch": 0.52, "learning_rate": 4.139926034192577e-05, "loss": 0.7437, "step": 46605 }, { "epoch": 0.52, "learning_rate": 4.139833761478726e-05, "loss": 0.8016, "step": 46610 }, { "epoch": 0.52, "learning_rate": 4.139741488764874e-05, "loss": 0.6986, "step": 46615 }, { "epoch": 0.52, "learning_rate": 4.139649216051023e-05, "loss": 0.8248, "step": 46620 }, { "epoch": 0.52, "learning_rate": 4.1395569433371725e-05, "loss": 0.777, "step": 46625 }, { "epoch": 0.52, "learning_rate": 4.1394646706233206e-05, "loss": 0.7694, "step": 46630 }, { "epoch": 0.52, "learning_rate": 4.1393723979094694e-05, "loss": 0.7686, "step": 46635 }, { "epoch": 0.52, "learning_rate": 4.139280125195618e-05, "loss": 0.6995, "step": 46640 }, { "epoch": 0.52, "learning_rate": 4.1391878524817676e-05, "loss": 0.7, "step": 46645 }, { "epoch": 0.52, "learning_rate": 4.139095579767916e-05, "loss": 0.7611, "step": 46650 }, { "epoch": 0.52, "learning_rate": 4.1390033070540645e-05, "loss": 0.6973, "step": 46655 }, { "epoch": 0.52, "learning_rate": 4.138911034340213e-05, "loss": 0.7853, "step": 46660 }, { "epoch": 0.52, "learning_rate": 4.138818761626362e-05, "loss": 0.7048, "step": 46665 }, { "epoch": 0.52, "learning_rate": 4.138726488912511e-05, "loss": 0.7145, "step": 46670 }, { "epoch": 0.52, "learning_rate": 4.1386342161986596e-05, "loss": 0.6949, "step": 46675 }, { "epoch": 0.52, "learning_rate": 4.1385419434848084e-05, "loss": 0.7246, "step": 46680 }, { "epoch": 0.52, "learning_rate": 4.138449670770957e-05, "loss": 0.801, "step": 46685 }, { "epoch": 0.52, "learning_rate": 4.138357398057106e-05, "loss": 0.7544, "step": 46690 }, { "epoch": 0.52, "learning_rate": 4.138265125343255e-05, "loss": 0.7147, "step": 46695 }, { "epoch": 0.52, "learning_rate": 4.1381728526294036e-05, "loss": 0.7286, "step": 46700 }, { "epoch": 0.52, "learning_rate": 4.138080579915552e-05, "loss": 0.7469, "step": 46705 }, { "epoch": 0.52, "learning_rate": 4.137988307201701e-05, "loss": 0.7935, "step": 46710 }, { "epoch": 0.52, "learning_rate": 4.13789603448785e-05, "loss": 0.7625, "step": 46715 }, { "epoch": 0.52, "learning_rate": 4.137803761773999e-05, "loss": 0.752, "step": 46720 }, { "epoch": 0.52, "learning_rate": 4.137711489060147e-05, "loss": 0.7089, "step": 46725 }, { "epoch": 0.52, "learning_rate": 4.137619216346296e-05, "loss": 0.7638, "step": 46730 }, { "epoch": 0.52, "learning_rate": 4.137526943632445e-05, "loss": 0.7161, "step": 46735 }, { "epoch": 0.52, "learning_rate": 4.137434670918593e-05, "loss": 0.7382, "step": 46740 }, { "epoch": 0.52, "learning_rate": 4.137342398204742e-05, "loss": 0.728, "step": 46745 }, { "epoch": 0.52, "learning_rate": 4.1372501254908914e-05, "loss": 0.7202, "step": 46750 }, { "epoch": 0.52, "learning_rate": 4.13715785277704e-05, "loss": 0.7121, "step": 46755 }, { "epoch": 0.52, "learning_rate": 4.137065580063188e-05, "loss": 0.7398, "step": 46760 }, { "epoch": 0.52, "learning_rate": 4.136973307349337e-05, "loss": 0.7147, "step": 46765 }, { "epoch": 0.52, "learning_rate": 4.136881034635486e-05, "loss": 0.693, "step": 46770 }, { "epoch": 0.52, "learning_rate": 4.136788761921635e-05, "loss": 0.7284, "step": 46775 }, { "epoch": 0.52, "learning_rate": 4.1366964892077834e-05, "loss": 0.7004, "step": 46780 }, { "epoch": 0.52, "learning_rate": 4.136604216493932e-05, "loss": 0.7541, "step": 46785 }, { "epoch": 0.52, "learning_rate": 4.136511943780081e-05, "loss": 0.7395, "step": 46790 }, { "epoch": 0.52, "learning_rate": 4.13641967106623e-05, "loss": 0.7333, "step": 46795 }, { "epoch": 0.52, "learning_rate": 4.1363273983523786e-05, "loss": 0.752, "step": 46800 }, { "epoch": 0.52, "learning_rate": 4.1362351256385274e-05, "loss": 0.7225, "step": 46805 }, { "epoch": 0.52, "learning_rate": 4.136142852924676e-05, "loss": 0.7569, "step": 46810 }, { "epoch": 0.52, "learning_rate": 4.136050580210825e-05, "loss": 0.7511, "step": 46815 }, { "epoch": 0.52, "learning_rate": 4.135958307496974e-05, "loss": 0.7072, "step": 46820 }, { "epoch": 0.52, "learning_rate": 4.1358660347831225e-05, "loss": 0.7702, "step": 46825 }, { "epoch": 0.52, "learning_rate": 4.135773762069271e-05, "loss": 0.7263, "step": 46830 }, { "epoch": 0.52, "learning_rate": 4.1356814893554194e-05, "loss": 0.7319, "step": 46835 }, { "epoch": 0.52, "learning_rate": 4.135589216641569e-05, "loss": 0.7756, "step": 46840 }, { "epoch": 0.52, "learning_rate": 4.1354969439277177e-05, "loss": 0.745, "step": 46845 }, { "epoch": 0.52, "learning_rate": 4.1354046712138664e-05, "loss": 0.7511, "step": 46850 }, { "epoch": 0.52, "learning_rate": 4.1353123985000145e-05, "loss": 0.7725, "step": 46855 }, { "epoch": 0.52, "learning_rate": 4.135220125786164e-05, "loss": 0.7473, "step": 46860 }, { "epoch": 0.52, "learning_rate": 4.135127853072313e-05, "loss": 0.7532, "step": 46865 }, { "epoch": 0.52, "learning_rate": 4.135035580358461e-05, "loss": 0.7345, "step": 46870 }, { "epoch": 0.52, "learning_rate": 4.13494330764461e-05, "loss": 0.7462, "step": 46875 }, { "epoch": 0.52, "learning_rate": 4.134851034930759e-05, "loss": 0.7897, "step": 46880 }, { "epoch": 0.52, "learning_rate": 4.134758762216908e-05, "loss": 0.7173, "step": 46885 }, { "epoch": 0.52, "learning_rate": 4.134666489503056e-05, "loss": 0.7481, "step": 46890 }, { "epoch": 0.52, "learning_rate": 4.134574216789205e-05, "loss": 0.7795, "step": 46895 }, { "epoch": 0.52, "learning_rate": 4.134481944075354e-05, "loss": 0.7739, "step": 46900 }, { "epoch": 0.52, "learning_rate": 4.1343896713615024e-05, "loss": 0.6613, "step": 46905 }, { "epoch": 0.52, "learning_rate": 4.134297398647651e-05, "loss": 0.7413, "step": 46910 }, { "epoch": 0.52, "learning_rate": 4.1342051259338e-05, "loss": 0.849, "step": 46915 }, { "epoch": 0.52, "learning_rate": 4.134112853219949e-05, "loss": 0.7645, "step": 46920 }, { "epoch": 0.52, "learning_rate": 4.1340205805060975e-05, "loss": 0.7834, "step": 46925 }, { "epoch": 0.52, "learning_rate": 4.133928307792246e-05, "loss": 0.6888, "step": 46930 }, { "epoch": 0.52, "learning_rate": 4.133836035078395e-05, "loss": 0.7848, "step": 46935 }, { "epoch": 0.52, "learning_rate": 4.133743762364544e-05, "loss": 0.755, "step": 46940 }, { "epoch": 0.52, "learning_rate": 4.133651489650693e-05, "loss": 0.7478, "step": 46945 }, { "epoch": 0.52, "learning_rate": 4.1335592169368415e-05, "loss": 0.7639, "step": 46950 }, { "epoch": 0.52, "learning_rate": 4.13346694422299e-05, "loss": 0.7145, "step": 46955 }, { "epoch": 0.52, "learning_rate": 4.133374671509139e-05, "loss": 0.8192, "step": 46960 }, { "epoch": 0.52, "learning_rate": 4.133282398795288e-05, "loss": 0.7418, "step": 46965 }, { "epoch": 0.52, "learning_rate": 4.1331901260814366e-05, "loss": 0.7365, "step": 46970 }, { "epoch": 0.52, "learning_rate": 4.1330978533675854e-05, "loss": 0.6708, "step": 46975 }, { "epoch": 0.52, "learning_rate": 4.1330055806537335e-05, "loss": 0.7274, "step": 46980 }, { "epoch": 0.52, "learning_rate": 4.132913307939882e-05, "loss": 0.7792, "step": 46985 }, { "epoch": 0.52, "learning_rate": 4.132821035226032e-05, "loss": 0.6958, "step": 46990 }, { "epoch": 0.52, "learning_rate": 4.1327287625121805e-05, "loss": 0.7623, "step": 46995 }, { "epoch": 0.52, "learning_rate": 4.1326364897983286e-05, "loss": 0.7538, "step": 47000 }, { "epoch": 0.52, "eval_loss": 0.7207958698272705, "eval_runtime": 69.2351, "eval_samples_per_second": 28.887, "eval_steps_per_second": 14.444, "step": 47000 }, { "epoch": 0.52, "learning_rate": 4.1325442170844774e-05, "loss": 0.7375, "step": 47005 }, { "epoch": 0.52, "learning_rate": 4.132451944370627e-05, "loss": 0.6567, "step": 47010 }, { "epoch": 0.52, "learning_rate": 4.132359671656775e-05, "loss": 0.7613, "step": 47015 }, { "epoch": 0.52, "learning_rate": 4.132267398942924e-05, "loss": 0.6663, "step": 47020 }, { "epoch": 0.52, "learning_rate": 4.1321751262290726e-05, "loss": 0.7625, "step": 47025 }, { "epoch": 0.52, "learning_rate": 4.132082853515222e-05, "loss": 0.7195, "step": 47030 }, { "epoch": 0.52, "learning_rate": 4.13199058080137e-05, "loss": 0.7017, "step": 47035 }, { "epoch": 0.52, "learning_rate": 4.131898308087519e-05, "loss": 0.8312, "step": 47040 }, { "epoch": 0.52, "learning_rate": 4.131806035373668e-05, "loss": 0.7089, "step": 47045 }, { "epoch": 0.52, "learning_rate": 4.1317137626598165e-05, "loss": 0.7721, "step": 47050 }, { "epoch": 0.52, "learning_rate": 4.131621489945965e-05, "loss": 0.7404, "step": 47055 }, { "epoch": 0.52, "learning_rate": 4.131529217232114e-05, "loss": 0.7544, "step": 47060 }, { "epoch": 0.52, "learning_rate": 4.131436944518263e-05, "loss": 0.7405, "step": 47065 }, { "epoch": 0.52, "learning_rate": 4.1313446718044116e-05, "loss": 0.7544, "step": 47070 }, { "epoch": 0.52, "learning_rate": 4.1312523990905604e-05, "loss": 0.8001, "step": 47075 }, { "epoch": 0.52, "learning_rate": 4.131160126376709e-05, "loss": 0.7672, "step": 47080 }, { "epoch": 0.52, "learning_rate": 4.131067853662858e-05, "loss": 0.7186, "step": 47085 }, { "epoch": 0.52, "learning_rate": 4.130975580949006e-05, "loss": 0.7972, "step": 47090 }, { "epoch": 0.52, "learning_rate": 4.1308833082351555e-05, "loss": 0.7628, "step": 47095 }, { "epoch": 0.52, "learning_rate": 4.130791035521304e-05, "loss": 0.7604, "step": 47100 }, { "epoch": 0.52, "learning_rate": 4.130698762807453e-05, "loss": 0.8197, "step": 47105 }, { "epoch": 0.52, "learning_rate": 4.130606490093601e-05, "loss": 0.7998, "step": 47110 }, { "epoch": 0.52, "learning_rate": 4.130514217379751e-05, "loss": 0.736, "step": 47115 }, { "epoch": 0.52, "learning_rate": 4.1304219446658995e-05, "loss": 0.7622, "step": 47120 }, { "epoch": 0.52, "learning_rate": 4.1303296719520476e-05, "loss": 0.6813, "step": 47125 }, { "epoch": 0.52, "learning_rate": 4.1302373992381964e-05, "loss": 0.7137, "step": 47130 }, { "epoch": 0.52, "learning_rate": 4.130145126524345e-05, "loss": 0.7316, "step": 47135 }, { "epoch": 0.52, "learning_rate": 4.1300528538104946e-05, "loss": 0.7869, "step": 47140 }, { "epoch": 0.52, "learning_rate": 4.129960581096643e-05, "loss": 0.7927, "step": 47145 }, { "epoch": 0.52, "learning_rate": 4.1298683083827915e-05, "loss": 0.7585, "step": 47150 }, { "epoch": 0.52, "learning_rate": 4.12977603566894e-05, "loss": 0.7027, "step": 47155 }, { "epoch": 0.52, "learning_rate": 4.12968376295509e-05, "loss": 0.6915, "step": 47160 }, { "epoch": 0.52, "learning_rate": 4.129591490241238e-05, "loss": 0.7818, "step": 47165 }, { "epoch": 0.52, "learning_rate": 4.1294992175273866e-05, "loss": 0.7101, "step": 47170 }, { "epoch": 0.52, "learning_rate": 4.1294069448135354e-05, "loss": 0.7273, "step": 47175 }, { "epoch": 0.52, "learning_rate": 4.129314672099684e-05, "loss": 0.8162, "step": 47180 }, { "epoch": 0.52, "learning_rate": 4.129222399385833e-05, "loss": 0.7592, "step": 47185 }, { "epoch": 0.52, "learning_rate": 4.129130126671982e-05, "loss": 0.7085, "step": 47190 }, { "epoch": 0.52, "learning_rate": 4.1290378539581306e-05, "loss": 0.7482, "step": 47195 }, { "epoch": 0.52, "learning_rate": 4.1289455812442793e-05, "loss": 0.7171, "step": 47200 }, { "epoch": 0.52, "learning_rate": 4.128853308530428e-05, "loss": 0.7688, "step": 47205 }, { "epoch": 0.52, "learning_rate": 4.128761035816577e-05, "loss": 0.6774, "step": 47210 }, { "epoch": 0.52, "learning_rate": 4.128668763102726e-05, "loss": 0.7815, "step": 47215 }, { "epoch": 0.52, "learning_rate": 4.128576490388874e-05, "loss": 0.7388, "step": 47220 }, { "epoch": 0.52, "learning_rate": 4.128484217675023e-05, "loss": 0.8068, "step": 47225 }, { "epoch": 0.52, "learning_rate": 4.128391944961172e-05, "loss": 0.743, "step": 47230 }, { "epoch": 0.52, "learning_rate": 4.128299672247321e-05, "loss": 0.7875, "step": 47235 }, { "epoch": 0.52, "learning_rate": 4.128207399533469e-05, "loss": 0.7276, "step": 47240 }, { "epoch": 0.52, "learning_rate": 4.1281151268196184e-05, "loss": 0.7773, "step": 47245 }, { "epoch": 0.52, "learning_rate": 4.128022854105767e-05, "loss": 0.7291, "step": 47250 }, { "epoch": 0.52, "learning_rate": 4.127930581391915e-05, "loss": 0.7516, "step": 47255 }, { "epoch": 0.52, "learning_rate": 4.127838308678064e-05, "loss": 0.6998, "step": 47260 }, { "epoch": 0.52, "learning_rate": 4.1277460359642135e-05, "loss": 0.7661, "step": 47265 }, { "epoch": 0.52, "learning_rate": 4.127653763250362e-05, "loss": 0.7076, "step": 47270 }, { "epoch": 0.52, "learning_rate": 4.1275614905365104e-05, "loss": 0.7642, "step": 47275 }, { "epoch": 0.52, "learning_rate": 4.127469217822659e-05, "loss": 0.7842, "step": 47280 }, { "epoch": 0.52, "learning_rate": 4.127376945108809e-05, "loss": 0.7925, "step": 47285 }, { "epoch": 0.52, "learning_rate": 4.127284672394957e-05, "loss": 0.722, "step": 47290 }, { "epoch": 0.52, "learning_rate": 4.1271923996811056e-05, "loss": 0.7232, "step": 47295 }, { "epoch": 0.52, "learning_rate": 4.1271001269672544e-05, "loss": 0.7426, "step": 47300 }, { "epoch": 0.52, "learning_rate": 4.127007854253403e-05, "loss": 0.7823, "step": 47305 }, { "epoch": 0.52, "learning_rate": 4.126915581539552e-05, "loss": 0.778, "step": 47310 }, { "epoch": 0.52, "learning_rate": 4.126823308825701e-05, "loss": 0.7351, "step": 47315 }, { "epoch": 0.52, "learning_rate": 4.1267310361118495e-05, "loss": 0.7384, "step": 47320 }, { "epoch": 0.52, "learning_rate": 4.126638763397998e-05, "loss": 0.6791, "step": 47325 }, { "epoch": 0.52, "learning_rate": 4.126546490684147e-05, "loss": 0.6872, "step": 47330 }, { "epoch": 0.52, "learning_rate": 4.126454217970296e-05, "loss": 0.6661, "step": 47335 }, { "epoch": 0.52, "learning_rate": 4.1263619452564446e-05, "loss": 0.7592, "step": 47340 }, { "epoch": 0.52, "learning_rate": 4.1262696725425934e-05, "loss": 0.7381, "step": 47345 }, { "epoch": 0.52, "learning_rate": 4.126177399828742e-05, "loss": 0.7686, "step": 47350 }, { "epoch": 0.52, "learning_rate": 4.126085127114891e-05, "loss": 0.7872, "step": 47355 }, { "epoch": 0.52, "learning_rate": 4.12599285440104e-05, "loss": 0.7185, "step": 47360 }, { "epoch": 0.52, "learning_rate": 4.125900581687188e-05, "loss": 0.7155, "step": 47365 }, { "epoch": 0.52, "learning_rate": 4.125808308973337e-05, "loss": 0.7265, "step": 47370 }, { "epoch": 0.52, "learning_rate": 4.125716036259486e-05, "loss": 0.7512, "step": 47375 }, { "epoch": 0.52, "learning_rate": 4.125623763545635e-05, "loss": 0.7402, "step": 47380 }, { "epoch": 0.52, "learning_rate": 4.125531490831783e-05, "loss": 0.7595, "step": 47385 }, { "epoch": 0.52, "learning_rate": 4.125439218117932e-05, "loss": 0.7785, "step": 47390 }, { "epoch": 0.52, "learning_rate": 4.125346945404081e-05, "loss": 0.7759, "step": 47395 }, { "epoch": 0.52, "learning_rate": 4.1252546726902294e-05, "loss": 0.7383, "step": 47400 }, { "epoch": 0.52, "learning_rate": 4.125162399976378e-05, "loss": 0.7732, "step": 47405 }, { "epoch": 0.52, "learning_rate": 4.125070127262527e-05, "loss": 0.7573, "step": 47410 }, { "epoch": 0.53, "learning_rate": 4.1249778545486764e-05, "loss": 0.7369, "step": 47415 }, { "epoch": 0.53, "learning_rate": 4.1248855818348245e-05, "loss": 0.768, "step": 47420 }, { "epoch": 0.53, "learning_rate": 4.124793309120973e-05, "loss": 0.7596, "step": 47425 }, { "epoch": 0.53, "learning_rate": 4.124701036407122e-05, "loss": 0.7192, "step": 47430 }, { "epoch": 0.53, "learning_rate": 4.124608763693271e-05, "loss": 0.7204, "step": 47435 }, { "epoch": 0.53, "learning_rate": 4.12451649097942e-05, "loss": 0.7259, "step": 47440 }, { "epoch": 0.53, "learning_rate": 4.1244242182655684e-05, "loss": 0.7345, "step": 47445 }, { "epoch": 0.53, "learning_rate": 4.124331945551717e-05, "loss": 0.7183, "step": 47450 }, { "epoch": 0.53, "learning_rate": 4.124239672837866e-05, "loss": 0.8428, "step": 47455 }, { "epoch": 0.53, "learning_rate": 4.124147400124015e-05, "loss": 0.7546, "step": 47460 }, { "epoch": 0.53, "learning_rate": 4.1240551274101636e-05, "loss": 0.7312, "step": 47465 }, { "epoch": 0.53, "learning_rate": 4.1239628546963124e-05, "loss": 0.6929, "step": 47470 }, { "epoch": 0.53, "learning_rate": 4.1238705819824605e-05, "loss": 0.7069, "step": 47475 }, { "epoch": 0.53, "learning_rate": 4.12377830926861e-05, "loss": 0.762, "step": 47480 }, { "epoch": 0.53, "learning_rate": 4.123686036554759e-05, "loss": 0.737, "step": 47485 }, { "epoch": 0.53, "learning_rate": 4.1235937638409075e-05, "loss": 0.7376, "step": 47490 }, { "epoch": 0.53, "learning_rate": 4.1235014911270556e-05, "loss": 0.7694, "step": 47495 }, { "epoch": 0.53, "learning_rate": 4.123409218413205e-05, "loss": 0.8328, "step": 47500 }, { "epoch": 0.53, "learning_rate": 4.123316945699354e-05, "loss": 0.7157, "step": 47505 }, { "epoch": 0.53, "learning_rate": 4.123224672985502e-05, "loss": 0.7682, "step": 47510 }, { "epoch": 0.53, "learning_rate": 4.123132400271651e-05, "loss": 0.7226, "step": 47515 }, { "epoch": 0.53, "learning_rate": 4.1230401275577995e-05, "loss": 0.7714, "step": 47520 }, { "epoch": 0.53, "learning_rate": 4.122947854843949e-05, "loss": 0.7091, "step": 47525 }, { "epoch": 0.53, "learning_rate": 4.122855582130097e-05, "loss": 0.7135, "step": 47530 }, { "epoch": 0.53, "learning_rate": 4.122763309416246e-05, "loss": 0.7635, "step": 47535 }, { "epoch": 0.53, "learning_rate": 4.122671036702395e-05, "loss": 0.7665, "step": 47540 }, { "epoch": 0.53, "learning_rate": 4.122578763988544e-05, "loss": 0.7739, "step": 47545 }, { "epoch": 0.53, "learning_rate": 4.122486491274692e-05, "loss": 0.7393, "step": 47550 }, { "epoch": 0.53, "learning_rate": 4.122394218560841e-05, "loss": 0.7631, "step": 47555 }, { "epoch": 0.53, "learning_rate": 4.12230194584699e-05, "loss": 0.7718, "step": 47560 }, { "epoch": 0.53, "learning_rate": 4.1222096731331386e-05, "loss": 0.7266, "step": 47565 }, { "epoch": 0.53, "learning_rate": 4.1221174004192874e-05, "loss": 0.8317, "step": 47570 }, { "epoch": 0.53, "learning_rate": 4.122025127705436e-05, "loss": 0.7098, "step": 47575 }, { "epoch": 0.53, "learning_rate": 4.121932854991585e-05, "loss": 0.7438, "step": 47580 }, { "epoch": 0.53, "learning_rate": 4.121840582277734e-05, "loss": 0.7085, "step": 47585 }, { "epoch": 0.53, "learning_rate": 4.1217483095638825e-05, "loss": 0.7602, "step": 47590 }, { "epoch": 0.53, "learning_rate": 4.121656036850031e-05, "loss": 0.7409, "step": 47595 }, { "epoch": 0.53, "learning_rate": 4.12156376413618e-05, "loss": 0.777, "step": 47600 }, { "epoch": 0.53, "learning_rate": 4.121471491422328e-05, "loss": 0.7472, "step": 47605 }, { "epoch": 0.53, "learning_rate": 4.121379218708478e-05, "loss": 0.7478, "step": 47610 }, { "epoch": 0.53, "learning_rate": 4.1212869459946265e-05, "loss": 0.7519, "step": 47615 }, { "epoch": 0.53, "learning_rate": 4.121194673280775e-05, "loss": 0.7185, "step": 47620 }, { "epoch": 0.53, "learning_rate": 4.1211024005669233e-05, "loss": 0.7695, "step": 47625 }, { "epoch": 0.53, "learning_rate": 4.121010127853073e-05, "loss": 0.7762, "step": 47630 }, { "epoch": 0.53, "learning_rate": 4.1209178551392216e-05, "loss": 0.7019, "step": 47635 }, { "epoch": 0.53, "learning_rate": 4.12082558242537e-05, "loss": 0.723, "step": 47640 }, { "epoch": 0.53, "learning_rate": 4.1207333097115185e-05, "loss": 0.7366, "step": 47645 }, { "epoch": 0.53, "learning_rate": 4.120641036997668e-05, "loss": 0.7395, "step": 47650 }, { "epoch": 0.53, "learning_rate": 4.120548764283817e-05, "loss": 0.7728, "step": 47655 }, { "epoch": 0.53, "learning_rate": 4.120456491569965e-05, "loss": 0.7192, "step": 47660 }, { "epoch": 0.53, "learning_rate": 4.1203642188561136e-05, "loss": 0.7023, "step": 47665 }, { "epoch": 0.53, "learning_rate": 4.1202719461422624e-05, "loss": 0.7867, "step": 47670 }, { "epoch": 0.53, "learning_rate": 4.120179673428411e-05, "loss": 0.7385, "step": 47675 }, { "epoch": 0.53, "learning_rate": 4.12008740071456e-05, "loss": 0.751, "step": 47680 }, { "epoch": 0.53, "learning_rate": 4.119995128000709e-05, "loss": 0.7399, "step": 47685 }, { "epoch": 0.53, "learning_rate": 4.1199028552868576e-05, "loss": 0.6498, "step": 47690 }, { "epoch": 0.53, "learning_rate": 4.119810582573006e-05, "loss": 0.7599, "step": 47695 }, { "epoch": 0.53, "learning_rate": 4.119718309859155e-05, "loss": 0.741, "step": 47700 }, { "epoch": 0.53, "learning_rate": 4.119626037145304e-05, "loss": 0.7486, "step": 47705 }, { "epoch": 0.53, "learning_rate": 4.119533764431453e-05, "loss": 0.7641, "step": 47710 }, { "epoch": 0.53, "learning_rate": 4.1194414917176015e-05, "loss": 0.7419, "step": 47715 }, { "epoch": 0.53, "learning_rate": 4.11934921900375e-05, "loss": 0.753, "step": 47720 }, { "epoch": 0.53, "learning_rate": 4.119256946289899e-05, "loss": 0.7379, "step": 47725 }, { "epoch": 0.53, "learning_rate": 4.119164673576048e-05, "loss": 0.7324, "step": 47730 }, { "epoch": 0.53, "learning_rate": 4.1190724008621966e-05, "loss": 0.6914, "step": 47735 }, { "epoch": 0.53, "learning_rate": 4.1189801281483454e-05, "loss": 0.7284, "step": 47740 }, { "epoch": 0.53, "learning_rate": 4.118887855434494e-05, "loss": 0.714, "step": 47745 }, { "epoch": 0.53, "learning_rate": 4.118795582720642e-05, "loss": 0.7623, "step": 47750 }, { "epoch": 0.53, "learning_rate": 4.118703310006791e-05, "loss": 0.7299, "step": 47755 }, { "epoch": 0.53, "learning_rate": 4.1186110372929405e-05, "loss": 0.7438, "step": 47760 }, { "epoch": 0.53, "learning_rate": 4.118518764579089e-05, "loss": 0.7652, "step": 47765 }, { "epoch": 0.53, "learning_rate": 4.1184264918652374e-05, "loss": 0.703, "step": 47770 }, { "epoch": 0.53, "learning_rate": 4.118334219151386e-05, "loss": 0.6906, "step": 47775 }, { "epoch": 0.53, "learning_rate": 4.118241946437536e-05, "loss": 0.7224, "step": 47780 }, { "epoch": 0.53, "learning_rate": 4.118149673723684e-05, "loss": 0.8011, "step": 47785 }, { "epoch": 0.53, "learning_rate": 4.1180574010098326e-05, "loss": 0.7, "step": 47790 }, { "epoch": 0.53, "learning_rate": 4.1179651282959814e-05, "loss": 0.7014, "step": 47795 }, { "epoch": 0.53, "learning_rate": 4.117872855582131e-05, "loss": 0.7997, "step": 47800 }, { "epoch": 0.53, "learning_rate": 4.117780582868279e-05, "loss": 0.7395, "step": 47805 }, { "epoch": 0.53, "learning_rate": 4.117688310154428e-05, "loss": 0.713, "step": 47810 }, { "epoch": 0.53, "learning_rate": 4.1175960374405765e-05, "loss": 0.7365, "step": 47815 }, { "epoch": 0.53, "learning_rate": 4.117503764726725e-05, "loss": 0.7483, "step": 47820 }, { "epoch": 0.53, "learning_rate": 4.117411492012874e-05, "loss": 0.7529, "step": 47825 }, { "epoch": 0.53, "learning_rate": 4.117319219299023e-05, "loss": 0.7151, "step": 47830 }, { "epoch": 0.53, "learning_rate": 4.1172269465851716e-05, "loss": 0.7331, "step": 47835 }, { "epoch": 0.53, "learning_rate": 4.1171346738713204e-05, "loss": 0.7482, "step": 47840 }, { "epoch": 0.53, "learning_rate": 4.117042401157469e-05, "loss": 0.7992, "step": 47845 }, { "epoch": 0.53, "learning_rate": 4.116950128443618e-05, "loss": 0.8123, "step": 47850 }, { "epoch": 0.53, "learning_rate": 4.116857855729767e-05, "loss": 0.753, "step": 47855 }, { "epoch": 0.53, "learning_rate": 4.116765583015915e-05, "loss": 0.7691, "step": 47860 }, { "epoch": 0.53, "learning_rate": 4.1166733103020643e-05, "loss": 0.7366, "step": 47865 }, { "epoch": 0.53, "learning_rate": 4.116581037588213e-05, "loss": 0.7356, "step": 47870 }, { "epoch": 0.53, "learning_rate": 4.116488764874362e-05, "loss": 0.7242, "step": 47875 }, { "epoch": 0.53, "learning_rate": 4.11639649216051e-05, "loss": 0.7921, "step": 47880 }, { "epoch": 0.53, "learning_rate": 4.1163042194466595e-05, "loss": 0.773, "step": 47885 }, { "epoch": 0.53, "learning_rate": 4.116211946732808e-05, "loss": 0.7356, "step": 47890 }, { "epoch": 0.53, "learning_rate": 4.1161196740189564e-05, "loss": 0.767, "step": 47895 }, { "epoch": 0.53, "learning_rate": 4.116027401305105e-05, "loss": 0.7325, "step": 47900 }, { "epoch": 0.53, "learning_rate": 4.115935128591254e-05, "loss": 0.7165, "step": 47905 }, { "epoch": 0.53, "learning_rate": 4.1158428558774034e-05, "loss": 0.7108, "step": 47910 }, { "epoch": 0.53, "learning_rate": 4.1157505831635515e-05, "loss": 0.7191, "step": 47915 }, { "epoch": 0.53, "learning_rate": 4.1156583104497e-05, "loss": 0.6902, "step": 47920 }, { "epoch": 0.53, "learning_rate": 4.115566037735849e-05, "loss": 0.7282, "step": 47925 }, { "epoch": 0.53, "learning_rate": 4.1154737650219985e-05, "loss": 0.7687, "step": 47930 }, { "epoch": 0.53, "learning_rate": 4.1153814923081467e-05, "loss": 0.7472, "step": 47935 }, { "epoch": 0.53, "learning_rate": 4.1152892195942954e-05, "loss": 0.755, "step": 47940 }, { "epoch": 0.53, "learning_rate": 4.115196946880444e-05, "loss": 0.7618, "step": 47945 }, { "epoch": 0.53, "learning_rate": 4.115104674166593e-05, "loss": 0.7456, "step": 47950 }, { "epoch": 0.53, "learning_rate": 4.115012401452742e-05, "loss": 0.8161, "step": 47955 }, { "epoch": 0.53, "learning_rate": 4.1149201287388906e-05, "loss": 0.7452, "step": 47960 }, { "epoch": 0.53, "learning_rate": 4.1148278560250394e-05, "loss": 0.7909, "step": 47965 }, { "epoch": 0.53, "learning_rate": 4.1147355833111875e-05, "loss": 0.8033, "step": 47970 }, { "epoch": 0.53, "learning_rate": 4.114643310597337e-05, "loss": 0.7246, "step": 47975 }, { "epoch": 0.53, "learning_rate": 4.114551037883486e-05, "loss": 0.8243, "step": 47980 }, { "epoch": 0.53, "learning_rate": 4.1144587651696345e-05, "loss": 0.7103, "step": 47985 }, { "epoch": 0.53, "learning_rate": 4.1143664924557826e-05, "loss": 0.6666, "step": 47990 }, { "epoch": 0.53, "learning_rate": 4.114274219741932e-05, "loss": 0.7358, "step": 47995 }, { "epoch": 0.53, "learning_rate": 4.114181947028081e-05, "loss": 0.7169, "step": 48000 }, { "epoch": 0.53, "eval_loss": 0.7291169166564941, "eval_runtime": 69.3788, "eval_samples_per_second": 28.827, "eval_steps_per_second": 14.414, "step": 48000 }, { "epoch": 0.53, "learning_rate": 4.1140896743142296e-05, "loss": 0.7234, "step": 48005 }, { "epoch": 0.53, "learning_rate": 4.113997401600378e-05, "loss": 0.796, "step": 48010 }, { "epoch": 0.53, "learning_rate": 4.113905128886527e-05, "loss": 0.7693, "step": 48015 }, { "epoch": 0.53, "learning_rate": 4.113812856172676e-05, "loss": 0.7773, "step": 48020 }, { "epoch": 0.53, "learning_rate": 4.113720583458824e-05, "loss": 0.7772, "step": 48025 }, { "epoch": 0.53, "learning_rate": 4.113628310744973e-05, "loss": 0.7511, "step": 48030 }, { "epoch": 0.53, "learning_rate": 4.1135360380311224e-05, "loss": 0.7393, "step": 48035 }, { "epoch": 0.53, "learning_rate": 4.113443765317271e-05, "loss": 0.7507, "step": 48040 }, { "epoch": 0.53, "learning_rate": 4.113351492603419e-05, "loss": 0.7569, "step": 48045 }, { "epoch": 0.53, "learning_rate": 4.113259219889568e-05, "loss": 0.788, "step": 48050 }, { "epoch": 0.53, "learning_rate": 4.113166947175717e-05, "loss": 0.7062, "step": 48055 }, { "epoch": 0.53, "learning_rate": 4.1130746744618656e-05, "loss": 0.7132, "step": 48060 }, { "epoch": 0.53, "learning_rate": 4.1129824017480144e-05, "loss": 0.7574, "step": 48065 }, { "epoch": 0.53, "learning_rate": 4.112890129034163e-05, "loss": 0.7087, "step": 48070 }, { "epoch": 0.53, "learning_rate": 4.112797856320312e-05, "loss": 0.719, "step": 48075 }, { "epoch": 0.53, "learning_rate": 4.112705583606461e-05, "loss": 0.7949, "step": 48080 }, { "epoch": 0.53, "learning_rate": 4.1126133108926095e-05, "loss": 0.7336, "step": 48085 }, { "epoch": 0.53, "learning_rate": 4.112521038178758e-05, "loss": 0.706, "step": 48090 }, { "epoch": 0.53, "learning_rate": 4.112428765464907e-05, "loss": 0.7142, "step": 48095 }, { "epoch": 0.53, "learning_rate": 4.112336492751056e-05, "loss": 0.7838, "step": 48100 }, { "epoch": 0.53, "learning_rate": 4.112244220037205e-05, "loss": 0.7011, "step": 48105 }, { "epoch": 0.53, "learning_rate": 4.1121519473233534e-05, "loss": 0.8065, "step": 48110 }, { "epoch": 0.53, "learning_rate": 4.112059674609502e-05, "loss": 0.7516, "step": 48115 }, { "epoch": 0.53, "learning_rate": 4.111967401895651e-05, "loss": 0.727, "step": 48120 }, { "epoch": 0.53, "learning_rate": 4.1118751291818e-05, "loss": 0.7746, "step": 48125 }, { "epoch": 0.53, "learning_rate": 4.1117828564679486e-05, "loss": 0.7542, "step": 48130 }, { "epoch": 0.53, "learning_rate": 4.111690583754097e-05, "loss": 0.7546, "step": 48135 }, { "epoch": 0.53, "learning_rate": 4.1115983110402455e-05, "loss": 0.7723, "step": 48140 }, { "epoch": 0.53, "learning_rate": 4.111506038326395e-05, "loss": 0.7876, "step": 48145 }, { "epoch": 0.53, "learning_rate": 4.111413765612544e-05, "loss": 0.7405, "step": 48150 }, { "epoch": 0.53, "learning_rate": 4.111321492898692e-05, "loss": 0.7007, "step": 48155 }, { "epoch": 0.53, "learning_rate": 4.1112292201848406e-05, "loss": 0.7377, "step": 48160 }, { "epoch": 0.53, "learning_rate": 4.11113694747099e-05, "loss": 0.7406, "step": 48165 }, { "epoch": 0.53, "learning_rate": 4.111044674757138e-05, "loss": 0.7571, "step": 48170 }, { "epoch": 0.53, "learning_rate": 4.110952402043287e-05, "loss": 0.759, "step": 48175 }, { "epoch": 0.53, "learning_rate": 4.110860129329436e-05, "loss": 0.7249, "step": 48180 }, { "epoch": 0.53, "learning_rate": 4.110767856615585e-05, "loss": 0.6711, "step": 48185 }, { "epoch": 0.53, "learning_rate": 4.110675583901733e-05, "loss": 0.7512, "step": 48190 }, { "epoch": 0.53, "learning_rate": 4.110583311187882e-05, "loss": 0.7315, "step": 48195 }, { "epoch": 0.53, "learning_rate": 4.110491038474031e-05, "loss": 0.7494, "step": 48200 }, { "epoch": 0.53, "learning_rate": 4.11039876576018e-05, "loss": 0.7608, "step": 48205 }, { "epoch": 0.53, "learning_rate": 4.1103064930463285e-05, "loss": 0.7787, "step": 48210 }, { "epoch": 0.53, "learning_rate": 4.110214220332477e-05, "loss": 0.7973, "step": 48215 }, { "epoch": 0.53, "learning_rate": 4.110121947618626e-05, "loss": 0.727, "step": 48220 }, { "epoch": 0.53, "learning_rate": 4.110029674904775e-05, "loss": 0.7264, "step": 48225 }, { "epoch": 0.53, "learning_rate": 4.1099374021909236e-05, "loss": 0.7196, "step": 48230 }, { "epoch": 0.53, "learning_rate": 4.1098451294770724e-05, "loss": 0.7296, "step": 48235 }, { "epoch": 0.53, "learning_rate": 4.109752856763221e-05, "loss": 0.7536, "step": 48240 }, { "epoch": 0.53, "learning_rate": 4.109660584049369e-05, "loss": 0.7113, "step": 48245 }, { "epoch": 0.53, "learning_rate": 4.109568311335519e-05, "loss": 0.7452, "step": 48250 }, { "epoch": 0.53, "learning_rate": 4.1094760386216675e-05, "loss": 0.7806, "step": 48255 }, { "epoch": 0.53, "learning_rate": 4.109383765907816e-05, "loss": 0.6901, "step": 48260 }, { "epoch": 0.53, "learning_rate": 4.1092914931939644e-05, "loss": 0.7118, "step": 48265 }, { "epoch": 0.53, "learning_rate": 4.109199220480114e-05, "loss": 0.799, "step": 48270 }, { "epoch": 0.53, "learning_rate": 4.109106947766263e-05, "loss": 0.7443, "step": 48275 }, { "epoch": 0.53, "learning_rate": 4.109014675052411e-05, "loss": 0.7184, "step": 48280 }, { "epoch": 0.53, "learning_rate": 4.1089224023385596e-05, "loss": 0.741, "step": 48285 }, { "epoch": 0.53, "learning_rate": 4.1088301296247083e-05, "loss": 0.7748, "step": 48290 }, { "epoch": 0.53, "learning_rate": 4.108737856910858e-05, "loss": 0.7484, "step": 48295 }, { "epoch": 0.53, "learning_rate": 4.108645584197006e-05, "loss": 0.7261, "step": 48300 }, { "epoch": 0.53, "learning_rate": 4.108553311483155e-05, "loss": 0.7202, "step": 48305 }, { "epoch": 0.53, "learning_rate": 4.1084610387693035e-05, "loss": 0.728, "step": 48310 }, { "epoch": 0.53, "learning_rate": 4.108368766055453e-05, "loss": 0.7922, "step": 48315 }, { "epoch": 0.54, "learning_rate": 4.108276493341601e-05, "loss": 0.7177, "step": 48320 }, { "epoch": 0.54, "learning_rate": 4.10818422062775e-05, "loss": 0.7032, "step": 48325 }, { "epoch": 0.54, "learning_rate": 4.1080919479138986e-05, "loss": 0.7576, "step": 48330 }, { "epoch": 0.54, "learning_rate": 4.1079996752000474e-05, "loss": 0.6888, "step": 48335 }, { "epoch": 0.54, "learning_rate": 4.107907402486196e-05, "loss": 0.7308, "step": 48340 }, { "epoch": 0.54, "learning_rate": 4.107815129772345e-05, "loss": 0.7603, "step": 48345 }, { "epoch": 0.54, "learning_rate": 4.107722857058494e-05, "loss": 0.6995, "step": 48350 }, { "epoch": 0.54, "learning_rate": 4.107630584344642e-05, "loss": 0.7566, "step": 48355 }, { "epoch": 0.54, "learning_rate": 4.107538311630791e-05, "loss": 0.7272, "step": 48360 }, { "epoch": 0.54, "learning_rate": 4.10744603891694e-05, "loss": 0.7807, "step": 48365 }, { "epoch": 0.54, "learning_rate": 4.107353766203089e-05, "loss": 0.7645, "step": 48370 }, { "epoch": 0.54, "learning_rate": 4.107261493489237e-05, "loss": 0.7082, "step": 48375 }, { "epoch": 0.54, "learning_rate": 4.1071692207753865e-05, "loss": 0.812, "step": 48380 }, { "epoch": 0.54, "learning_rate": 4.107076948061535e-05, "loss": 0.7832, "step": 48385 }, { "epoch": 0.54, "learning_rate": 4.106984675347684e-05, "loss": 0.7015, "step": 48390 }, { "epoch": 0.54, "learning_rate": 4.106892402633832e-05, "loss": 0.7711, "step": 48395 }, { "epoch": 0.54, "learning_rate": 4.1068001299199816e-05, "loss": 0.7948, "step": 48400 }, { "epoch": 0.54, "learning_rate": 4.1067078572061304e-05, "loss": 0.86, "step": 48405 }, { "epoch": 0.54, "learning_rate": 4.1066155844922785e-05, "loss": 0.7546, "step": 48410 }, { "epoch": 0.54, "learning_rate": 4.106523311778427e-05, "loss": 0.7621, "step": 48415 }, { "epoch": 0.54, "learning_rate": 4.106431039064577e-05, "loss": 0.7339, "step": 48420 }, { "epoch": 0.54, "learning_rate": 4.1063387663507255e-05, "loss": 0.7936, "step": 48425 }, { "epoch": 0.54, "learning_rate": 4.1062464936368736e-05, "loss": 0.7646, "step": 48430 }, { "epoch": 0.54, "learning_rate": 4.1061542209230224e-05, "loss": 0.8023, "step": 48435 }, { "epoch": 0.54, "learning_rate": 4.106061948209171e-05, "loss": 0.7462, "step": 48440 }, { "epoch": 0.54, "learning_rate": 4.10596967549532e-05, "loss": 0.7133, "step": 48445 }, { "epoch": 0.54, "learning_rate": 4.105877402781469e-05, "loss": 0.7236, "step": 48450 }, { "epoch": 0.54, "learning_rate": 4.1057851300676176e-05, "loss": 0.7317, "step": 48455 }, { "epoch": 0.54, "learning_rate": 4.1056928573537664e-05, "loss": 0.7409, "step": 48460 }, { "epoch": 0.54, "learning_rate": 4.105600584639915e-05, "loss": 0.762, "step": 48465 }, { "epoch": 0.54, "learning_rate": 4.105508311926064e-05, "loss": 0.7116, "step": 48470 }, { "epoch": 0.54, "learning_rate": 4.105416039212213e-05, "loss": 0.7527, "step": 48475 }, { "epoch": 0.54, "learning_rate": 4.1053237664983615e-05, "loss": 0.7732, "step": 48480 }, { "epoch": 0.54, "learning_rate": 4.10523149378451e-05, "loss": 0.7425, "step": 48485 }, { "epoch": 0.54, "learning_rate": 4.105139221070659e-05, "loss": 0.7487, "step": 48490 }, { "epoch": 0.54, "learning_rate": 4.105046948356808e-05, "loss": 0.7216, "step": 48495 }, { "epoch": 0.54, "learning_rate": 4.1049546756429566e-05, "loss": 0.7739, "step": 48500 }, { "epoch": 0.54, "learning_rate": 4.104862402929105e-05, "loss": 0.7394, "step": 48505 }, { "epoch": 0.54, "learning_rate": 4.104770130215254e-05, "loss": 0.6526, "step": 48510 }, { "epoch": 0.54, "learning_rate": 4.104677857501403e-05, "loss": 0.8317, "step": 48515 }, { "epoch": 0.54, "learning_rate": 4.104585584787551e-05, "loss": 0.7727, "step": 48520 }, { "epoch": 0.54, "learning_rate": 4.1044933120737e-05, "loss": 0.7124, "step": 48525 }, { "epoch": 0.54, "learning_rate": 4.1044010393598493e-05, "loss": 0.6783, "step": 48530 }, { "epoch": 0.54, "learning_rate": 4.104308766645998e-05, "loss": 0.7884, "step": 48535 }, { "epoch": 0.54, "learning_rate": 4.104216493932146e-05, "loss": 0.7177, "step": 48540 }, { "epoch": 0.54, "learning_rate": 4.104124221218295e-05, "loss": 0.7592, "step": 48545 }, { "epoch": 0.54, "learning_rate": 4.1040319485044445e-05, "loss": 0.7406, "step": 48550 }, { "epoch": 0.54, "learning_rate": 4.1039396757905926e-05, "loss": 0.7737, "step": 48555 }, { "epoch": 0.54, "learning_rate": 4.1038474030767414e-05, "loss": 0.7971, "step": 48560 }, { "epoch": 0.54, "learning_rate": 4.10375513036289e-05, "loss": 0.7657, "step": 48565 }, { "epoch": 0.54, "learning_rate": 4.1036628576490396e-05, "loss": 0.8037, "step": 48570 }, { "epoch": 0.54, "learning_rate": 4.103570584935188e-05, "loss": 0.7437, "step": 48575 }, { "epoch": 0.54, "learning_rate": 4.1034783122213365e-05, "loss": 0.755, "step": 48580 }, { "epoch": 0.54, "learning_rate": 4.103386039507485e-05, "loss": 0.7703, "step": 48585 }, { "epoch": 0.54, "learning_rate": 4.103293766793634e-05, "loss": 0.7768, "step": 48590 }, { "epoch": 0.54, "learning_rate": 4.103201494079783e-05, "loss": 0.6874, "step": 48595 }, { "epoch": 0.54, "learning_rate": 4.1031092213659317e-05, "loss": 0.7364, "step": 48600 }, { "epoch": 0.54, "learning_rate": 4.1030169486520804e-05, "loss": 0.7665, "step": 48605 }, { "epoch": 0.54, "learning_rate": 4.102924675938229e-05, "loss": 0.7522, "step": 48610 }, { "epoch": 0.54, "learning_rate": 4.102832403224378e-05, "loss": 0.7547, "step": 48615 }, { "epoch": 0.54, "learning_rate": 4.102740130510527e-05, "loss": 0.789, "step": 48620 }, { "epoch": 0.54, "learning_rate": 4.1026478577966756e-05, "loss": 0.7432, "step": 48625 }, { "epoch": 0.54, "learning_rate": 4.102555585082824e-05, "loss": 0.7954, "step": 48630 }, { "epoch": 0.54, "learning_rate": 4.102463312368973e-05, "loss": 0.7686, "step": 48635 }, { "epoch": 0.54, "learning_rate": 4.102371039655122e-05, "loss": 0.7474, "step": 48640 }, { "epoch": 0.54, "learning_rate": 4.102278766941271e-05, "loss": 0.7084, "step": 48645 }, { "epoch": 0.54, "learning_rate": 4.102186494227419e-05, "loss": 0.7385, "step": 48650 }, { "epoch": 0.54, "learning_rate": 4.1020942215135676e-05, "loss": 0.7143, "step": 48655 }, { "epoch": 0.54, "learning_rate": 4.102001948799717e-05, "loss": 0.6944, "step": 48660 }, { "epoch": 0.54, "learning_rate": 4.101909676085865e-05, "loss": 0.7457, "step": 48665 }, { "epoch": 0.54, "learning_rate": 4.101817403372014e-05, "loss": 0.7752, "step": 48670 }, { "epoch": 0.54, "learning_rate": 4.101725130658163e-05, "loss": 0.7543, "step": 48675 }, { "epoch": 0.54, "learning_rate": 4.101632857944312e-05, "loss": 0.7911, "step": 48680 }, { "epoch": 0.54, "learning_rate": 4.10154058523046e-05, "loss": 0.7416, "step": 48685 }, { "epoch": 0.54, "learning_rate": 4.101448312516609e-05, "loss": 0.7055, "step": 48690 }, { "epoch": 0.54, "learning_rate": 4.101356039802758e-05, "loss": 0.7531, "step": 48695 }, { "epoch": 0.54, "learning_rate": 4.1012637670889074e-05, "loss": 0.7865, "step": 48700 }, { "epoch": 0.54, "learning_rate": 4.1011714943750555e-05, "loss": 0.7168, "step": 48705 }, { "epoch": 0.54, "learning_rate": 4.101079221661204e-05, "loss": 0.7036, "step": 48710 }, { "epoch": 0.54, "learning_rate": 4.100986948947353e-05, "loss": 0.7749, "step": 48715 }, { "epoch": 0.54, "learning_rate": 4.100894676233502e-05, "loss": 0.72, "step": 48720 }, { "epoch": 0.54, "learning_rate": 4.1008024035196506e-05, "loss": 0.7103, "step": 48725 }, { "epoch": 0.54, "learning_rate": 4.1007101308057994e-05, "loss": 0.7398, "step": 48730 }, { "epoch": 0.54, "learning_rate": 4.100617858091948e-05, "loss": 0.8017, "step": 48735 }, { "epoch": 0.54, "learning_rate": 4.100525585378096e-05, "loss": 0.7753, "step": 48740 }, { "epoch": 0.54, "learning_rate": 4.100433312664246e-05, "loss": 0.7424, "step": 48745 }, { "epoch": 0.54, "learning_rate": 4.1003410399503945e-05, "loss": 0.7667, "step": 48750 }, { "epoch": 0.54, "learning_rate": 4.100248767236543e-05, "loss": 0.7287, "step": 48755 }, { "epoch": 0.54, "learning_rate": 4.1001564945226914e-05, "loss": 0.7427, "step": 48760 }, { "epoch": 0.54, "learning_rate": 4.100064221808841e-05, "loss": 0.7595, "step": 48765 }, { "epoch": 0.54, "learning_rate": 4.09997194909499e-05, "loss": 0.7346, "step": 48770 }, { "epoch": 0.54, "learning_rate": 4.0998796763811384e-05, "loss": 0.7437, "step": 48775 }, { "epoch": 0.54, "learning_rate": 4.0997874036672866e-05, "loss": 0.7355, "step": 48780 }, { "epoch": 0.54, "learning_rate": 4.099695130953436e-05, "loss": 0.7681, "step": 48785 }, { "epoch": 0.54, "learning_rate": 4.099602858239585e-05, "loss": 0.7178, "step": 48790 }, { "epoch": 0.54, "learning_rate": 4.099510585525733e-05, "loss": 0.7089, "step": 48795 }, { "epoch": 0.54, "learning_rate": 4.099418312811882e-05, "loss": 0.7221, "step": 48800 }, { "epoch": 0.54, "learning_rate": 4.0993260400980305e-05, "loss": 0.727, "step": 48805 }, { "epoch": 0.54, "learning_rate": 4.09923376738418e-05, "loss": 0.6992, "step": 48810 }, { "epoch": 0.54, "learning_rate": 4.099141494670328e-05, "loss": 0.7472, "step": 48815 }, { "epoch": 0.54, "learning_rate": 4.099049221956477e-05, "loss": 0.671, "step": 48820 }, { "epoch": 0.54, "learning_rate": 4.0989569492426256e-05, "loss": 0.7643, "step": 48825 }, { "epoch": 0.54, "learning_rate": 4.0988646765287744e-05, "loss": 0.8135, "step": 48830 }, { "epoch": 0.54, "learning_rate": 4.098772403814923e-05, "loss": 0.7555, "step": 48835 }, { "epoch": 0.54, "learning_rate": 4.098680131101072e-05, "loss": 0.7538, "step": 48840 }, { "epoch": 0.54, "learning_rate": 4.098587858387221e-05, "loss": 0.7721, "step": 48845 }, { "epoch": 0.54, "learning_rate": 4.0984955856733695e-05, "loss": 0.7591, "step": 48850 }, { "epoch": 0.54, "learning_rate": 4.098403312959518e-05, "loss": 0.7907, "step": 48855 }, { "epoch": 0.54, "learning_rate": 4.098311040245667e-05, "loss": 0.7325, "step": 48860 }, { "epoch": 0.54, "learning_rate": 4.098218767531816e-05, "loss": 0.72, "step": 48865 }, { "epoch": 0.54, "learning_rate": 4.098126494817965e-05, "loss": 0.7272, "step": 48870 }, { "epoch": 0.54, "learning_rate": 4.0980342221041135e-05, "loss": 0.7211, "step": 48875 }, { "epoch": 0.54, "learning_rate": 4.097941949390262e-05, "loss": 0.7261, "step": 48880 }, { "epoch": 0.54, "learning_rate": 4.097849676676411e-05, "loss": 0.7544, "step": 48885 }, { "epoch": 0.54, "learning_rate": 4.097757403962559e-05, "loss": 0.7769, "step": 48890 }, { "epoch": 0.54, "learning_rate": 4.0976651312487086e-05, "loss": 0.7545, "step": 48895 }, { "epoch": 0.54, "learning_rate": 4.0975728585348574e-05, "loss": 0.7252, "step": 48900 }, { "epoch": 0.54, "learning_rate": 4.0974805858210055e-05, "loss": 0.6616, "step": 48905 }, { "epoch": 0.54, "learning_rate": 4.097388313107154e-05, "loss": 0.7215, "step": 48910 }, { "epoch": 0.54, "learning_rate": 4.097296040393304e-05, "loss": 0.7089, "step": 48915 }, { "epoch": 0.54, "learning_rate": 4.0972037676794525e-05, "loss": 0.7544, "step": 48920 }, { "epoch": 0.54, "learning_rate": 4.0971114949656006e-05, "loss": 0.7219, "step": 48925 }, { "epoch": 0.54, "learning_rate": 4.0970192222517494e-05, "loss": 0.7936, "step": 48930 }, { "epoch": 0.54, "learning_rate": 4.096926949537899e-05, "loss": 0.767, "step": 48935 }, { "epoch": 0.54, "learning_rate": 4.096834676824047e-05, "loss": 0.7163, "step": 48940 }, { "epoch": 0.54, "learning_rate": 4.096742404110196e-05, "loss": 0.7257, "step": 48945 }, { "epoch": 0.54, "learning_rate": 4.0966501313963446e-05, "loss": 0.7358, "step": 48950 }, { "epoch": 0.54, "learning_rate": 4.096557858682494e-05, "loss": 0.6908, "step": 48955 }, { "epoch": 0.54, "learning_rate": 4.096465585968642e-05, "loss": 0.8135, "step": 48960 }, { "epoch": 0.54, "learning_rate": 4.096373313254791e-05, "loss": 0.6894, "step": 48965 }, { "epoch": 0.54, "learning_rate": 4.09628104054094e-05, "loss": 0.7864, "step": 48970 }, { "epoch": 0.54, "learning_rate": 4.0961887678270885e-05, "loss": 0.7529, "step": 48975 }, { "epoch": 0.54, "learning_rate": 4.096096495113237e-05, "loss": 0.7331, "step": 48980 }, { "epoch": 0.54, "learning_rate": 4.096004222399386e-05, "loss": 0.7478, "step": 48985 }, { "epoch": 0.54, "learning_rate": 4.095911949685535e-05, "loss": 0.7272, "step": 48990 }, { "epoch": 0.54, "learning_rate": 4.0958196769716836e-05, "loss": 0.7219, "step": 48995 }, { "epoch": 0.54, "learning_rate": 4.0957274042578324e-05, "loss": 0.7345, "step": 49000 }, { "epoch": 0.54, "eval_loss": 0.7194899320602417, "eval_runtime": 69.3801, "eval_samples_per_second": 28.827, "eval_steps_per_second": 14.413, "step": 49000 }, { "epoch": 0.54, "learning_rate": 4.095635131543981e-05, "loss": 0.7748, "step": 49005 }, { "epoch": 0.54, "learning_rate": 4.09554285883013e-05, "loss": 0.6607, "step": 49010 }, { "epoch": 0.54, "learning_rate": 4.095450586116278e-05, "loss": 0.724, "step": 49015 }, { "epoch": 0.54, "learning_rate": 4.0953583134024276e-05, "loss": 0.8113, "step": 49020 }, { "epoch": 0.54, "learning_rate": 4.095266040688576e-05, "loss": 0.7884, "step": 49025 }, { "epoch": 0.54, "learning_rate": 4.095173767974725e-05, "loss": 0.7267, "step": 49030 }, { "epoch": 0.54, "learning_rate": 4.095081495260873e-05, "loss": 0.7521, "step": 49035 }, { "epoch": 0.54, "learning_rate": 4.094989222547022e-05, "loss": 0.6845, "step": 49040 }, { "epoch": 0.54, "learning_rate": 4.0948969498331715e-05, "loss": 0.6836, "step": 49045 }, { "epoch": 0.54, "learning_rate": 4.0948046771193196e-05, "loss": 0.7805, "step": 49050 }, { "epoch": 0.54, "learning_rate": 4.0947124044054684e-05, "loss": 0.7762, "step": 49055 }, { "epoch": 0.54, "learning_rate": 4.094620131691617e-05, "loss": 0.7714, "step": 49060 }, { "epoch": 0.54, "learning_rate": 4.0945278589777666e-05, "loss": 0.7514, "step": 49065 }, { "epoch": 0.54, "learning_rate": 4.094435586263915e-05, "loss": 0.7649, "step": 49070 }, { "epoch": 0.54, "learning_rate": 4.0943433135500635e-05, "loss": 0.6993, "step": 49075 }, { "epoch": 0.54, "learning_rate": 4.094251040836212e-05, "loss": 0.6802, "step": 49080 }, { "epoch": 0.54, "learning_rate": 4.094158768122362e-05, "loss": 0.7053, "step": 49085 }, { "epoch": 0.54, "learning_rate": 4.09406649540851e-05, "loss": 0.6913, "step": 49090 }, { "epoch": 0.54, "learning_rate": 4.0939742226946586e-05, "loss": 0.742, "step": 49095 }, { "epoch": 0.54, "learning_rate": 4.0938819499808074e-05, "loss": 0.7275, "step": 49100 }, { "epoch": 0.54, "learning_rate": 4.093789677266956e-05, "loss": 0.8532, "step": 49105 }, { "epoch": 0.54, "learning_rate": 4.093697404553105e-05, "loss": 0.7658, "step": 49110 }, { "epoch": 0.54, "learning_rate": 4.093605131839254e-05, "loss": 0.7657, "step": 49115 }, { "epoch": 0.54, "learning_rate": 4.0935128591254026e-05, "loss": 0.7819, "step": 49120 }, { "epoch": 0.54, "learning_rate": 4.0934205864115514e-05, "loss": 0.7487, "step": 49125 }, { "epoch": 0.54, "learning_rate": 4.0933283136977e-05, "loss": 0.7076, "step": 49130 }, { "epoch": 0.54, "learning_rate": 4.093236040983849e-05, "loss": 0.7411, "step": 49135 }, { "epoch": 0.54, "learning_rate": 4.093143768269998e-05, "loss": 0.7335, "step": 49140 }, { "epoch": 0.54, "learning_rate": 4.093051495556146e-05, "loss": 0.7012, "step": 49145 }, { "epoch": 0.54, "learning_rate": 4.092959222842295e-05, "loss": 0.7253, "step": 49150 }, { "epoch": 0.54, "learning_rate": 4.092866950128444e-05, "loss": 0.7135, "step": 49155 }, { "epoch": 0.54, "learning_rate": 4.092774677414593e-05, "loss": 0.7336, "step": 49160 }, { "epoch": 0.54, "learning_rate": 4.092682404700741e-05, "loss": 0.7277, "step": 49165 }, { "epoch": 0.54, "learning_rate": 4.0925901319868904e-05, "loss": 0.741, "step": 49170 }, { "epoch": 0.54, "learning_rate": 4.092497859273039e-05, "loss": 0.7109, "step": 49175 }, { "epoch": 0.54, "learning_rate": 4.092405586559187e-05, "loss": 0.733, "step": 49180 }, { "epoch": 0.54, "learning_rate": 4.092313313845336e-05, "loss": 0.7659, "step": 49185 }, { "epoch": 0.54, "learning_rate": 4.092221041131485e-05, "loss": 0.8238, "step": 49190 }, { "epoch": 0.54, "learning_rate": 4.0921287684176343e-05, "loss": 0.7853, "step": 49195 }, { "epoch": 0.54, "learning_rate": 4.0920364957037825e-05, "loss": 0.7088, "step": 49200 }, { "epoch": 0.54, "learning_rate": 4.091944222989931e-05, "loss": 0.7267, "step": 49205 }, { "epoch": 0.54, "learning_rate": 4.09185195027608e-05, "loss": 0.7099, "step": 49210 }, { "epoch": 0.54, "learning_rate": 4.091759677562229e-05, "loss": 0.7391, "step": 49215 }, { "epoch": 0.54, "learning_rate": 4.0916674048483776e-05, "loss": 0.7732, "step": 49220 }, { "epoch": 0.55, "learning_rate": 4.0915751321345264e-05, "loss": 0.7426, "step": 49225 }, { "epoch": 0.55, "learning_rate": 4.091482859420675e-05, "loss": 0.7209, "step": 49230 }, { "epoch": 0.55, "learning_rate": 4.091390586706824e-05, "loss": 0.7732, "step": 49235 }, { "epoch": 0.55, "learning_rate": 4.091298313992973e-05, "loss": 0.7244, "step": 49240 }, { "epoch": 0.55, "learning_rate": 4.0912060412791215e-05, "loss": 0.7367, "step": 49245 }, { "epoch": 0.55, "learning_rate": 4.09111376856527e-05, "loss": 0.6904, "step": 49250 }, { "epoch": 0.55, "learning_rate": 4.091021495851419e-05, "loss": 0.7533, "step": 49255 }, { "epoch": 0.55, "learning_rate": 4.090929223137568e-05, "loss": 0.7389, "step": 49260 }, { "epoch": 0.55, "learning_rate": 4.0908369504237167e-05, "loss": 0.7578, "step": 49265 }, { "epoch": 0.55, "learning_rate": 4.0907446777098654e-05, "loss": 0.7622, "step": 49270 }, { "epoch": 0.55, "learning_rate": 4.0906524049960135e-05, "loss": 0.6883, "step": 49275 }, { "epoch": 0.55, "learning_rate": 4.090560132282163e-05, "loss": 0.7139, "step": 49280 }, { "epoch": 0.55, "learning_rate": 4.090467859568312e-05, "loss": 0.7118, "step": 49285 }, { "epoch": 0.55, "learning_rate": 4.09037558685446e-05, "loss": 0.7741, "step": 49290 }, { "epoch": 0.55, "learning_rate": 4.090283314140609e-05, "loss": 0.6979, "step": 49295 }, { "epoch": 0.55, "learning_rate": 4.090191041426758e-05, "loss": 0.7738, "step": 49300 }, { "epoch": 0.55, "learning_rate": 4.090098768712907e-05, "loss": 0.7425, "step": 49305 }, { "epoch": 0.55, "learning_rate": 4.090006495999055e-05, "loss": 0.7827, "step": 49310 }, { "epoch": 0.55, "learning_rate": 4.089914223285204e-05, "loss": 0.7277, "step": 49315 }, { "epoch": 0.55, "learning_rate": 4.089821950571353e-05, "loss": 0.7351, "step": 49320 }, { "epoch": 0.55, "learning_rate": 4.0897296778575014e-05, "loss": 0.7516, "step": 49325 }, { "epoch": 0.55, "learning_rate": 4.08963740514365e-05, "loss": 0.7193, "step": 49330 }, { "epoch": 0.55, "learning_rate": 4.089545132429799e-05, "loss": 0.7204, "step": 49335 }, { "epoch": 0.55, "learning_rate": 4.089452859715948e-05, "loss": 0.6958, "step": 49340 }, { "epoch": 0.55, "learning_rate": 4.0893605870020965e-05, "loss": 0.7394, "step": 49345 }, { "epoch": 0.55, "learning_rate": 4.089268314288245e-05, "loss": 0.7691, "step": 49350 }, { "epoch": 0.55, "learning_rate": 4.089176041574394e-05, "loss": 0.7127, "step": 49355 }, { "epoch": 0.55, "learning_rate": 4.089083768860543e-05, "loss": 0.759, "step": 49360 }, { "epoch": 0.55, "learning_rate": 4.088991496146692e-05, "loss": 0.7367, "step": 49365 }, { "epoch": 0.55, "learning_rate": 4.0888992234328405e-05, "loss": 0.7096, "step": 49370 }, { "epoch": 0.55, "learning_rate": 4.088806950718989e-05, "loss": 0.764, "step": 49375 }, { "epoch": 0.55, "learning_rate": 4.088714678005138e-05, "loss": 0.7343, "step": 49380 }, { "epoch": 0.55, "learning_rate": 4.088622405291287e-05, "loss": 0.7595, "step": 49385 }, { "epoch": 0.55, "learning_rate": 4.0885301325774356e-05, "loss": 0.7556, "step": 49390 }, { "epoch": 0.55, "learning_rate": 4.0884378598635844e-05, "loss": 0.7253, "step": 49395 }, { "epoch": 0.55, "learning_rate": 4.0883455871497325e-05, "loss": 0.7577, "step": 49400 }, { "epoch": 0.55, "learning_rate": 4.088253314435882e-05, "loss": 0.7577, "step": 49405 }, { "epoch": 0.55, "learning_rate": 4.088161041722031e-05, "loss": 0.6924, "step": 49410 }, { "epoch": 0.55, "learning_rate": 4.0880687690081795e-05, "loss": 0.7182, "step": 49415 }, { "epoch": 0.55, "learning_rate": 4.0879764962943276e-05, "loss": 0.7502, "step": 49420 }, { "epoch": 0.55, "learning_rate": 4.0878842235804764e-05, "loss": 0.743, "step": 49425 }, { "epoch": 0.55, "learning_rate": 4.087791950866626e-05, "loss": 0.7245, "step": 49430 }, { "epoch": 0.55, "learning_rate": 4.087699678152775e-05, "loss": 0.6924, "step": 49435 }, { "epoch": 0.55, "learning_rate": 4.087607405438923e-05, "loss": 0.7501, "step": 49440 }, { "epoch": 0.55, "learning_rate": 4.0875151327250716e-05, "loss": 0.7269, "step": 49445 }, { "epoch": 0.55, "learning_rate": 4.087422860011221e-05, "loss": 0.7469, "step": 49450 }, { "epoch": 0.55, "learning_rate": 4.087330587297369e-05, "loss": 0.8496, "step": 49455 }, { "epoch": 0.55, "learning_rate": 4.087238314583518e-05, "loss": 0.7505, "step": 49460 }, { "epoch": 0.55, "learning_rate": 4.087146041869667e-05, "loss": 0.6802, "step": 49465 }, { "epoch": 0.55, "learning_rate": 4.087053769155816e-05, "loss": 0.7432, "step": 49470 }, { "epoch": 0.55, "learning_rate": 4.086961496441964e-05, "loss": 0.7852, "step": 49475 }, { "epoch": 0.55, "learning_rate": 4.086869223728113e-05, "loss": 0.7559, "step": 49480 }, { "epoch": 0.55, "learning_rate": 4.086776951014262e-05, "loss": 0.7496, "step": 49485 }, { "epoch": 0.55, "learning_rate": 4.0866846783004106e-05, "loss": 0.8054, "step": 49490 }, { "epoch": 0.55, "learning_rate": 4.0865924055865594e-05, "loss": 0.7452, "step": 49495 }, { "epoch": 0.55, "learning_rate": 4.086500132872708e-05, "loss": 0.7236, "step": 49500 }, { "epoch": 0.55, "learning_rate": 4.086407860158857e-05, "loss": 0.7072, "step": 49505 }, { "epoch": 0.55, "learning_rate": 4.086315587445006e-05, "loss": 0.7285, "step": 49510 }, { "epoch": 0.55, "learning_rate": 4.0862233147311545e-05, "loss": 0.7508, "step": 49515 }, { "epoch": 0.55, "learning_rate": 4.086131042017303e-05, "loss": 0.747, "step": 49520 }, { "epoch": 0.55, "learning_rate": 4.086038769303452e-05, "loss": 0.6968, "step": 49525 }, { "epoch": 0.55, "learning_rate": 4.0859464965896e-05, "loss": 0.7942, "step": 49530 }, { "epoch": 0.55, "learning_rate": 4.08585422387575e-05, "loss": 0.7081, "step": 49535 }, { "epoch": 0.55, "learning_rate": 4.0857619511618985e-05, "loss": 0.7602, "step": 49540 }, { "epoch": 0.55, "learning_rate": 4.085669678448047e-05, "loss": 0.8081, "step": 49545 }, { "epoch": 0.55, "learning_rate": 4.0855774057341954e-05, "loss": 0.7968, "step": 49550 }, { "epoch": 0.55, "learning_rate": 4.085485133020345e-05, "loss": 0.7616, "step": 49555 }, { "epoch": 0.55, "learning_rate": 4.0853928603064936e-05, "loss": 0.7253, "step": 49560 }, { "epoch": 0.55, "learning_rate": 4.085300587592642e-05, "loss": 0.7131, "step": 49565 }, { "epoch": 0.55, "learning_rate": 4.0852083148787905e-05, "loss": 0.7372, "step": 49570 }, { "epoch": 0.55, "learning_rate": 4.085116042164939e-05, "loss": 0.7445, "step": 49575 }, { "epoch": 0.55, "learning_rate": 4.085023769451089e-05, "loss": 0.7295, "step": 49580 }, { "epoch": 0.55, "learning_rate": 4.084931496737237e-05, "loss": 0.7442, "step": 49585 }, { "epoch": 0.55, "learning_rate": 4.0848392240233856e-05, "loss": 0.6655, "step": 49590 }, { "epoch": 0.55, "learning_rate": 4.0847469513095344e-05, "loss": 0.7355, "step": 49595 }, { "epoch": 0.55, "learning_rate": 4.084654678595683e-05, "loss": 0.7662, "step": 49600 }, { "epoch": 0.55, "learning_rate": 4.084562405881832e-05, "loss": 0.6828, "step": 49605 }, { "epoch": 0.55, "learning_rate": 4.084470133167981e-05, "loss": 0.7284, "step": 49610 }, { "epoch": 0.55, "learning_rate": 4.0843778604541296e-05, "loss": 0.7718, "step": 49615 }, { "epoch": 0.55, "learning_rate": 4.0842855877402783e-05, "loss": 0.7667, "step": 49620 }, { "epoch": 0.55, "learning_rate": 4.084193315026427e-05, "loss": 0.6984, "step": 49625 }, { "epoch": 0.55, "learning_rate": 4.084101042312576e-05, "loss": 0.7316, "step": 49630 }, { "epoch": 0.55, "learning_rate": 4.084008769598725e-05, "loss": 0.7068, "step": 49635 }, { "epoch": 0.55, "learning_rate": 4.083916496884873e-05, "loss": 0.7554, "step": 49640 }, { "epoch": 0.55, "learning_rate": 4.083824224171022e-05, "loss": 0.7101, "step": 49645 }, { "epoch": 0.55, "learning_rate": 4.083731951457171e-05, "loss": 0.728, "step": 49650 }, { "epoch": 0.55, "learning_rate": 4.08363967874332e-05, "loss": 0.703, "step": 49655 }, { "epoch": 0.55, "learning_rate": 4.083547406029468e-05, "loss": 0.7033, "step": 49660 }, { "epoch": 0.55, "learning_rate": 4.0834551333156174e-05, "loss": 0.7235, "step": 49665 }, { "epoch": 0.55, "learning_rate": 4.083362860601766e-05, "loss": 0.6687, "step": 49670 }, { "epoch": 0.55, "learning_rate": 4.083270587887914e-05, "loss": 0.6951, "step": 49675 }, { "epoch": 0.55, "learning_rate": 4.083178315174063e-05, "loss": 0.6941, "step": 49680 }, { "epoch": 0.55, "learning_rate": 4.0830860424602126e-05, "loss": 0.7473, "step": 49685 }, { "epoch": 0.55, "learning_rate": 4.082993769746361e-05, "loss": 0.8031, "step": 49690 }, { "epoch": 0.55, "learning_rate": 4.0829014970325094e-05, "loss": 0.7067, "step": 49695 }, { "epoch": 0.55, "learning_rate": 4.082809224318658e-05, "loss": 0.7857, "step": 49700 }, { "epoch": 0.55, "learning_rate": 4.082716951604808e-05, "loss": 0.739, "step": 49705 }, { "epoch": 0.55, "learning_rate": 4.082624678890956e-05, "loss": 0.7619, "step": 49710 }, { "epoch": 0.55, "learning_rate": 4.0825324061771046e-05, "loss": 0.7421, "step": 49715 }, { "epoch": 0.55, "learning_rate": 4.0824401334632534e-05, "loss": 0.7931, "step": 49720 }, { "epoch": 0.55, "learning_rate": 4.082347860749402e-05, "loss": 0.7944, "step": 49725 }, { "epoch": 0.55, "learning_rate": 4.082255588035551e-05, "loss": 0.7253, "step": 49730 }, { "epoch": 0.55, "learning_rate": 4.0821633153217e-05, "loss": 0.7459, "step": 49735 }, { "epoch": 0.55, "learning_rate": 4.0820710426078485e-05, "loss": 0.7408, "step": 49740 }, { "epoch": 0.55, "learning_rate": 4.081978769893997e-05, "loss": 0.7929, "step": 49745 }, { "epoch": 0.55, "learning_rate": 4.081886497180146e-05, "loss": 0.6857, "step": 49750 }, { "epoch": 0.55, "learning_rate": 4.081794224466295e-05, "loss": 0.7424, "step": 49755 }, { "epoch": 0.55, "learning_rate": 4.0817019517524436e-05, "loss": 0.7384, "step": 49760 }, { "epoch": 0.55, "learning_rate": 4.0816096790385924e-05, "loss": 0.8055, "step": 49765 }, { "epoch": 0.55, "learning_rate": 4.081517406324741e-05, "loss": 0.7646, "step": 49770 }, { "epoch": 0.55, "learning_rate": 4.08142513361089e-05, "loss": 0.6939, "step": 49775 }, { "epoch": 0.55, "learning_rate": 4.081332860897039e-05, "loss": 0.7713, "step": 49780 }, { "epoch": 0.55, "learning_rate": 4.081240588183187e-05, "loss": 0.7925, "step": 49785 }, { "epoch": 0.55, "learning_rate": 4.0811483154693364e-05, "loss": 0.7598, "step": 49790 }, { "epoch": 0.55, "learning_rate": 4.081056042755485e-05, "loss": 0.7315, "step": 49795 }, { "epoch": 0.55, "learning_rate": 4.080963770041634e-05, "loss": 0.776, "step": 49800 }, { "epoch": 0.55, "learning_rate": 4.080871497327782e-05, "loss": 0.8318, "step": 49805 }, { "epoch": 0.55, "learning_rate": 4.080779224613931e-05, "loss": 0.7943, "step": 49810 }, { "epoch": 0.55, "learning_rate": 4.08068695190008e-05, "loss": 0.7491, "step": 49815 }, { "epoch": 0.55, "learning_rate": 4.080594679186229e-05, "loss": 0.717, "step": 49820 }, { "epoch": 0.55, "learning_rate": 4.080502406472377e-05, "loss": 0.7877, "step": 49825 }, { "epoch": 0.55, "learning_rate": 4.080410133758526e-05, "loss": 0.7693, "step": 49830 }, { "epoch": 0.55, "learning_rate": 4.0803178610446754e-05, "loss": 0.7391, "step": 49835 }, { "epoch": 0.55, "learning_rate": 4.0802255883308235e-05, "loss": 0.7077, "step": 49840 }, { "epoch": 0.55, "learning_rate": 4.080133315616972e-05, "loss": 0.75, "step": 49845 }, { "epoch": 0.55, "learning_rate": 4.080041042903121e-05, "loss": 0.7479, "step": 49850 }, { "epoch": 0.55, "learning_rate": 4.0799487701892706e-05, "loss": 0.7084, "step": 49855 }, { "epoch": 0.55, "learning_rate": 4.079856497475419e-05, "loss": 0.76, "step": 49860 }, { "epoch": 0.55, "learning_rate": 4.0797642247615675e-05, "loss": 0.6958, "step": 49865 }, { "epoch": 0.55, "learning_rate": 4.079671952047716e-05, "loss": 0.7664, "step": 49870 }, { "epoch": 0.55, "learning_rate": 4.079579679333865e-05, "loss": 0.699, "step": 49875 }, { "epoch": 0.55, "learning_rate": 4.079487406620014e-05, "loss": 0.7396, "step": 49880 }, { "epoch": 0.55, "learning_rate": 4.0793951339061626e-05, "loss": 0.6961, "step": 49885 }, { "epoch": 0.55, "learning_rate": 4.0793028611923114e-05, "loss": 0.682, "step": 49890 }, { "epoch": 0.55, "learning_rate": 4.07921058847846e-05, "loss": 0.7822, "step": 49895 }, { "epoch": 0.55, "learning_rate": 4.079118315764609e-05, "loss": 0.7942, "step": 49900 }, { "epoch": 0.55, "learning_rate": 4.079026043050758e-05, "loss": 0.716, "step": 49905 }, { "epoch": 0.55, "learning_rate": 4.0789337703369065e-05, "loss": 0.703, "step": 49910 }, { "epoch": 0.55, "learning_rate": 4.0788414976230546e-05, "loss": 0.717, "step": 49915 }, { "epoch": 0.55, "learning_rate": 4.078749224909204e-05, "loss": 0.826, "step": 49920 }, { "epoch": 0.55, "learning_rate": 4.078656952195353e-05, "loss": 0.7433, "step": 49925 }, { "epoch": 0.55, "learning_rate": 4.0785646794815017e-05, "loss": 0.7439, "step": 49930 }, { "epoch": 0.55, "learning_rate": 4.07847240676765e-05, "loss": 0.7932, "step": 49935 }, { "epoch": 0.55, "learning_rate": 4.078380134053799e-05, "loss": 0.7144, "step": 49940 }, { "epoch": 0.55, "learning_rate": 4.078287861339948e-05, "loss": 0.6972, "step": 49945 }, { "epoch": 0.55, "learning_rate": 4.078195588626096e-05, "loss": 0.8102, "step": 49950 }, { "epoch": 0.55, "learning_rate": 4.078103315912245e-05, "loss": 0.741, "step": 49955 }, { "epoch": 0.55, "learning_rate": 4.078011043198394e-05, "loss": 0.6968, "step": 49960 }, { "epoch": 0.55, "learning_rate": 4.077918770484543e-05, "loss": 0.7259, "step": 49965 }, { "epoch": 0.55, "learning_rate": 4.077826497770691e-05, "loss": 0.753, "step": 49970 }, { "epoch": 0.55, "learning_rate": 4.07773422505684e-05, "loss": 0.7333, "step": 49975 }, { "epoch": 0.55, "learning_rate": 4.077641952342989e-05, "loss": 0.7805, "step": 49980 }, { "epoch": 0.55, "learning_rate": 4.0775496796291376e-05, "loss": 0.6942, "step": 49985 }, { "epoch": 0.55, "learning_rate": 4.0774574069152864e-05, "loss": 0.7729, "step": 49990 }, { "epoch": 0.55, "learning_rate": 4.077365134201435e-05, "loss": 0.7557, "step": 49995 }, { "epoch": 0.55, "learning_rate": 4.077272861487584e-05, "loss": 0.758, "step": 50000 }, { "epoch": 0.55, "eval_loss": 0.6693544983863831, "eval_runtime": 69.2521, "eval_samples_per_second": 28.88, "eval_steps_per_second": 14.44, "step": 50000 }, { "epoch": 0.55, "learning_rate": 4.077180588773733e-05, "loss": 0.7549, "step": 50005 }, { "epoch": 0.55, "learning_rate": 4.0770883160598815e-05, "loss": 0.765, "step": 50010 }, { "epoch": 0.55, "learning_rate": 4.07699604334603e-05, "loss": 0.7219, "step": 50015 }, { "epoch": 0.55, "learning_rate": 4.076903770632179e-05, "loss": 0.7356, "step": 50020 }, { "epoch": 0.55, "learning_rate": 4.076811497918327e-05, "loss": 0.6955, "step": 50025 }, { "epoch": 0.55, "learning_rate": 4.076719225204477e-05, "loss": 0.7124, "step": 50030 }, { "epoch": 0.55, "learning_rate": 4.0766269524906255e-05, "loss": 0.7027, "step": 50035 }, { "epoch": 0.55, "learning_rate": 4.076534679776774e-05, "loss": 0.7184, "step": 50040 }, { "epoch": 0.55, "learning_rate": 4.0764424070629224e-05, "loss": 0.7567, "step": 50045 }, { "epoch": 0.55, "learning_rate": 4.076350134349072e-05, "loss": 0.768, "step": 50050 }, { "epoch": 0.55, "learning_rate": 4.0762578616352206e-05, "loss": 0.7571, "step": 50055 }, { "epoch": 0.55, "learning_rate": 4.076165588921369e-05, "loss": 0.7644, "step": 50060 }, { "epoch": 0.55, "learning_rate": 4.0760733162075175e-05, "loss": 0.8031, "step": 50065 }, { "epoch": 0.55, "learning_rate": 4.075981043493667e-05, "loss": 0.7339, "step": 50070 }, { "epoch": 0.55, "learning_rate": 4.075888770779816e-05, "loss": 0.6826, "step": 50075 }, { "epoch": 0.55, "learning_rate": 4.075796498065964e-05, "loss": 0.709, "step": 50080 }, { "epoch": 0.55, "learning_rate": 4.0757042253521126e-05, "loss": 0.7481, "step": 50085 }, { "epoch": 0.55, "learning_rate": 4.075611952638262e-05, "loss": 0.7334, "step": 50090 }, { "epoch": 0.55, "learning_rate": 4.07551967992441e-05, "loss": 0.732, "step": 50095 }, { "epoch": 0.55, "learning_rate": 4.075427407210559e-05, "loss": 0.7004, "step": 50100 }, { "epoch": 0.55, "learning_rate": 4.075335134496708e-05, "loss": 0.7657, "step": 50105 }, { "epoch": 0.55, "learning_rate": 4.0752428617828566e-05, "loss": 0.8142, "step": 50110 }, { "epoch": 0.55, "learning_rate": 4.075150589069005e-05, "loss": 0.679, "step": 50115 }, { "epoch": 0.55, "learning_rate": 4.075058316355154e-05, "loss": 0.8133, "step": 50120 }, { "epoch": 0.56, "learning_rate": 4.074966043641303e-05, "loss": 0.7658, "step": 50125 }, { "epoch": 0.56, "learning_rate": 4.074873770927452e-05, "loss": 0.7345, "step": 50130 }, { "epoch": 0.56, "learning_rate": 4.0747814982136005e-05, "loss": 0.7152, "step": 50135 }, { "epoch": 0.56, "learning_rate": 4.074689225499749e-05, "loss": 0.7264, "step": 50140 }, { "epoch": 0.56, "learning_rate": 4.074596952785898e-05, "loss": 0.755, "step": 50145 }, { "epoch": 0.56, "learning_rate": 4.074504680072047e-05, "loss": 0.7689, "step": 50150 }, { "epoch": 0.56, "learning_rate": 4.0744124073581956e-05, "loss": 0.7287, "step": 50155 }, { "epoch": 0.56, "learning_rate": 4.0743201346443444e-05, "loss": 0.7598, "step": 50160 }, { "epoch": 0.56, "learning_rate": 4.074227861930493e-05, "loss": 0.7252, "step": 50165 }, { "epoch": 0.56, "learning_rate": 4.074135589216641e-05, "loss": 0.6939, "step": 50170 }, { "epoch": 0.56, "learning_rate": 4.07404331650279e-05, "loss": 0.7404, "step": 50175 }, { "epoch": 0.56, "learning_rate": 4.0739510437889395e-05, "loss": 0.6486, "step": 50180 }, { "epoch": 0.56, "learning_rate": 4.073858771075088e-05, "loss": 0.6361, "step": 50185 }, { "epoch": 0.56, "learning_rate": 4.0737664983612364e-05, "loss": 0.7674, "step": 50190 }, { "epoch": 0.56, "learning_rate": 4.073674225647385e-05, "loss": 0.7331, "step": 50195 }, { "epoch": 0.56, "learning_rate": 4.073581952933535e-05, "loss": 0.8045, "step": 50200 }, { "epoch": 0.56, "learning_rate": 4.0734896802196835e-05, "loss": 0.7185, "step": 50205 }, { "epoch": 0.56, "learning_rate": 4.0733974075058316e-05, "loss": 0.74, "step": 50210 }, { "epoch": 0.56, "learning_rate": 4.0733051347919804e-05, "loss": 0.7984, "step": 50215 }, { "epoch": 0.56, "learning_rate": 4.07321286207813e-05, "loss": 0.7417, "step": 50220 }, { "epoch": 0.56, "learning_rate": 4.073120589364278e-05, "loss": 0.7979, "step": 50225 }, { "epoch": 0.56, "learning_rate": 4.073028316650427e-05, "loss": 0.7945, "step": 50230 }, { "epoch": 0.56, "learning_rate": 4.0729360439365755e-05, "loss": 0.777, "step": 50235 }, { "epoch": 0.56, "learning_rate": 4.072843771222725e-05, "loss": 0.7115, "step": 50240 }, { "epoch": 0.56, "learning_rate": 4.072751498508873e-05, "loss": 0.7915, "step": 50245 }, { "epoch": 0.56, "learning_rate": 4.072659225795022e-05, "loss": 0.8082, "step": 50250 }, { "epoch": 0.56, "learning_rate": 4.0725669530811706e-05, "loss": 0.737, "step": 50255 }, { "epoch": 0.56, "learning_rate": 4.0724746803673194e-05, "loss": 0.7541, "step": 50260 }, { "epoch": 0.56, "learning_rate": 4.072382407653468e-05, "loss": 0.7295, "step": 50265 }, { "epoch": 0.56, "learning_rate": 4.072290134939617e-05, "loss": 0.7872, "step": 50270 }, { "epoch": 0.56, "learning_rate": 4.072197862225766e-05, "loss": 0.6953, "step": 50275 }, { "epoch": 0.56, "learning_rate": 4.0721055895119146e-05, "loss": 0.7509, "step": 50280 }, { "epoch": 0.56, "learning_rate": 4.0720133167980633e-05, "loss": 0.69, "step": 50285 }, { "epoch": 0.56, "learning_rate": 4.071921044084212e-05, "loss": 0.7863, "step": 50290 }, { "epoch": 0.56, "learning_rate": 4.071828771370361e-05, "loss": 0.7558, "step": 50295 }, { "epoch": 0.56, "learning_rate": 4.071736498656509e-05, "loss": 0.7218, "step": 50300 }, { "epoch": 0.56, "learning_rate": 4.0716442259426585e-05, "loss": 0.726, "step": 50305 }, { "epoch": 0.56, "learning_rate": 4.071551953228807e-05, "loss": 0.7532, "step": 50310 }, { "epoch": 0.56, "learning_rate": 4.071459680514956e-05, "loss": 0.7347, "step": 50315 }, { "epoch": 0.56, "learning_rate": 4.071367407801104e-05, "loss": 0.7728, "step": 50320 }, { "epoch": 0.56, "learning_rate": 4.071275135087253e-05, "loss": 0.6835, "step": 50325 }, { "epoch": 0.56, "learning_rate": 4.0711828623734024e-05, "loss": 0.788, "step": 50330 }, { "epoch": 0.56, "learning_rate": 4.0710905896595505e-05, "loss": 0.7299, "step": 50335 }, { "epoch": 0.56, "learning_rate": 4.070998316945699e-05, "loss": 0.7213, "step": 50340 }, { "epoch": 0.56, "learning_rate": 4.070906044231848e-05, "loss": 0.7249, "step": 50345 }, { "epoch": 0.56, "learning_rate": 4.0708137715179975e-05, "loss": 0.7623, "step": 50350 }, { "epoch": 0.56, "learning_rate": 4.0707214988041457e-05, "loss": 0.7491, "step": 50355 }, { "epoch": 0.56, "learning_rate": 4.0706292260902944e-05, "loss": 0.7588, "step": 50360 }, { "epoch": 0.56, "learning_rate": 4.070536953376443e-05, "loss": 0.7061, "step": 50365 }, { "epoch": 0.56, "learning_rate": 4.070444680662592e-05, "loss": 0.7732, "step": 50370 }, { "epoch": 0.56, "learning_rate": 4.070352407948741e-05, "loss": 0.7602, "step": 50375 }, { "epoch": 0.56, "learning_rate": 4.0702601352348896e-05, "loss": 0.7407, "step": 50380 }, { "epoch": 0.56, "learning_rate": 4.0701678625210384e-05, "loss": 0.7055, "step": 50385 }, { "epoch": 0.56, "learning_rate": 4.070075589807187e-05, "loss": 0.7069, "step": 50390 }, { "epoch": 0.56, "learning_rate": 4.069983317093336e-05, "loss": 0.7583, "step": 50395 }, { "epoch": 0.56, "learning_rate": 4.069891044379485e-05, "loss": 0.7427, "step": 50400 }, { "epoch": 0.56, "learning_rate": 4.0697987716656335e-05, "loss": 0.6968, "step": 50405 }, { "epoch": 0.56, "learning_rate": 4.0697064989517816e-05, "loss": 0.6922, "step": 50410 }, { "epoch": 0.56, "learning_rate": 4.069614226237931e-05, "loss": 0.754, "step": 50415 }, { "epoch": 0.56, "learning_rate": 4.06952195352408e-05, "loss": 0.7352, "step": 50420 }, { "epoch": 0.56, "learning_rate": 4.0694296808102286e-05, "loss": 0.737, "step": 50425 }, { "epoch": 0.56, "learning_rate": 4.069337408096377e-05, "loss": 0.7829, "step": 50430 }, { "epoch": 0.56, "learning_rate": 4.069245135382526e-05, "loss": 0.6991, "step": 50435 }, { "epoch": 0.56, "learning_rate": 4.069152862668675e-05, "loss": 0.7865, "step": 50440 }, { "epoch": 0.56, "learning_rate": 4.069060589954823e-05, "loss": 0.7498, "step": 50445 }, { "epoch": 0.56, "learning_rate": 4.068968317240972e-05, "loss": 0.7144, "step": 50450 }, { "epoch": 0.56, "learning_rate": 4.0688760445271214e-05, "loss": 0.7191, "step": 50455 }, { "epoch": 0.56, "learning_rate": 4.06878377181327e-05, "loss": 0.8012, "step": 50460 }, { "epoch": 0.56, "learning_rate": 4.068691499099418e-05, "loss": 0.7225, "step": 50465 }, { "epoch": 0.56, "learning_rate": 4.068599226385567e-05, "loss": 0.691, "step": 50470 }, { "epoch": 0.56, "learning_rate": 4.068506953671716e-05, "loss": 0.7345, "step": 50475 }, { "epoch": 0.56, "learning_rate": 4.0684146809578646e-05, "loss": 0.787, "step": 50480 }, { "epoch": 0.56, "learning_rate": 4.0683224082440134e-05, "loss": 0.7412, "step": 50485 }, { "epoch": 0.56, "learning_rate": 4.068230135530162e-05, "loss": 0.717, "step": 50490 }, { "epoch": 0.56, "learning_rate": 4.068137862816311e-05, "loss": 0.7255, "step": 50495 }, { "epoch": 0.56, "learning_rate": 4.06804559010246e-05, "loss": 0.7398, "step": 50500 }, { "epoch": 0.56, "learning_rate": 4.0679533173886085e-05, "loss": 0.7257, "step": 50505 }, { "epoch": 0.56, "learning_rate": 4.067861044674757e-05, "loss": 0.7801, "step": 50510 }, { "epoch": 0.56, "learning_rate": 4.067768771960906e-05, "loss": 0.8418, "step": 50515 }, { "epoch": 0.56, "learning_rate": 4.067676499247055e-05, "loss": 0.7007, "step": 50520 }, { "epoch": 0.56, "learning_rate": 4.067584226533204e-05, "loss": 0.7292, "step": 50525 }, { "epoch": 0.56, "learning_rate": 4.0674919538193524e-05, "loss": 0.7192, "step": 50530 }, { "epoch": 0.56, "learning_rate": 4.067399681105501e-05, "loss": 0.698, "step": 50535 }, { "epoch": 0.56, "learning_rate": 4.06730740839165e-05, "loss": 0.7559, "step": 50540 }, { "epoch": 0.56, "learning_rate": 4.067215135677799e-05, "loss": 0.7878, "step": 50545 }, { "epoch": 0.56, "learning_rate": 4.0671228629639476e-05, "loss": 0.7429, "step": 50550 }, { "epoch": 0.56, "learning_rate": 4.067030590250096e-05, "loss": 0.7854, "step": 50555 }, { "epoch": 0.56, "learning_rate": 4.0669383175362445e-05, "loss": 0.7124, "step": 50560 }, { "epoch": 0.56, "learning_rate": 4.066846044822394e-05, "loss": 0.7364, "step": 50565 }, { "epoch": 0.56, "learning_rate": 4.066753772108543e-05, "loss": 0.7091, "step": 50570 }, { "epoch": 0.56, "learning_rate": 4.066661499394691e-05, "loss": 0.7516, "step": 50575 }, { "epoch": 0.56, "learning_rate": 4.0665692266808396e-05, "loss": 0.7457, "step": 50580 }, { "epoch": 0.56, "learning_rate": 4.066476953966989e-05, "loss": 0.6768, "step": 50585 }, { "epoch": 0.56, "learning_rate": 4.066384681253138e-05, "loss": 0.7398, "step": 50590 }, { "epoch": 0.56, "learning_rate": 4.066292408539286e-05, "loss": 0.7223, "step": 50595 }, { "epoch": 0.56, "learning_rate": 4.066200135825435e-05, "loss": 0.7066, "step": 50600 }, { "epoch": 0.56, "learning_rate": 4.066107863111584e-05, "loss": 0.7206, "step": 50605 }, { "epoch": 0.56, "learning_rate": 4.066015590397732e-05, "loss": 0.7129, "step": 50610 }, { "epoch": 0.56, "learning_rate": 4.065923317683881e-05, "loss": 0.7364, "step": 50615 }, { "epoch": 0.56, "learning_rate": 4.06583104497003e-05, "loss": 0.7043, "step": 50620 }, { "epoch": 0.56, "learning_rate": 4.0657387722561794e-05, "loss": 0.7458, "step": 50625 }, { "epoch": 0.56, "learning_rate": 4.0656464995423275e-05, "loss": 0.7381, "step": 50630 }, { "epoch": 0.56, "learning_rate": 4.065554226828476e-05, "loss": 0.7421, "step": 50635 }, { "epoch": 0.56, "learning_rate": 4.065461954114625e-05, "loss": 0.7254, "step": 50640 }, { "epoch": 0.56, "learning_rate": 4.065369681400774e-05, "loss": 0.7923, "step": 50645 }, { "epoch": 0.56, "learning_rate": 4.0652774086869226e-05, "loss": 0.7428, "step": 50650 }, { "epoch": 0.56, "learning_rate": 4.0651851359730714e-05, "loss": 0.8208, "step": 50655 }, { "epoch": 0.56, "learning_rate": 4.06509286325922e-05, "loss": 0.8575, "step": 50660 }, { "epoch": 0.56, "learning_rate": 4.065000590545369e-05, "loss": 0.907, "step": 50665 }, { "epoch": 0.56, "learning_rate": 4.064908317831518e-05, "loss": 0.8121, "step": 50670 }, { "epoch": 0.56, "learning_rate": 4.0648160451176665e-05, "loss": 0.7799, "step": 50675 }, { "epoch": 0.56, "learning_rate": 4.064723772403815e-05, "loss": 0.684, "step": 50680 }, { "epoch": 0.56, "learning_rate": 4.0646314996899634e-05, "loss": 0.739, "step": 50685 }, { "epoch": 0.56, "learning_rate": 4.064539226976113e-05, "loss": 0.6965, "step": 50690 }, { "epoch": 0.56, "learning_rate": 4.064446954262262e-05, "loss": 0.7572, "step": 50695 }, { "epoch": 0.56, "learning_rate": 4.0643546815484105e-05, "loss": 0.7813, "step": 50700 }, { "epoch": 0.56, "learning_rate": 4.0642624088345586e-05, "loss": 0.77, "step": 50705 }, { "epoch": 0.56, "learning_rate": 4.0641701361207073e-05, "loss": 0.7193, "step": 50710 }, { "epoch": 0.56, "learning_rate": 4.064077863406857e-05, "loss": 0.7885, "step": 50715 }, { "epoch": 0.56, "learning_rate": 4.063985590693005e-05, "loss": 0.8334, "step": 50720 }, { "epoch": 0.56, "learning_rate": 4.063893317979154e-05, "loss": 0.7841, "step": 50725 }, { "epoch": 0.56, "learning_rate": 4.0638010452653025e-05, "loss": 0.7442, "step": 50730 }, { "epoch": 0.56, "learning_rate": 4.063708772551452e-05, "loss": 0.7292, "step": 50735 }, { "epoch": 0.56, "learning_rate": 4.0636164998376e-05, "loss": 0.7598, "step": 50740 }, { "epoch": 0.56, "learning_rate": 4.063524227123749e-05, "loss": 0.7296, "step": 50745 }, { "epoch": 0.56, "learning_rate": 4.0634319544098976e-05, "loss": 0.738, "step": 50750 }, { "epoch": 0.56, "learning_rate": 4.0633396816960464e-05, "loss": 0.8018, "step": 50755 }, { "epoch": 0.56, "learning_rate": 4.063247408982195e-05, "loss": 0.7547, "step": 50760 }, { "epoch": 0.56, "learning_rate": 4.063155136268344e-05, "loss": 0.7672, "step": 50765 }, { "epoch": 0.56, "learning_rate": 4.063062863554493e-05, "loss": 0.7561, "step": 50770 }, { "epoch": 0.56, "learning_rate": 4.0629705908406416e-05, "loss": 0.7078, "step": 50775 }, { "epoch": 0.56, "learning_rate": 4.06287831812679e-05, "loss": 0.7022, "step": 50780 }, { "epoch": 0.56, "learning_rate": 4.062786045412939e-05, "loss": 0.7306, "step": 50785 }, { "epoch": 0.56, "learning_rate": 4.062693772699088e-05, "loss": 0.7884, "step": 50790 }, { "epoch": 0.56, "learning_rate": 4.062601499985236e-05, "loss": 0.7117, "step": 50795 }, { "epoch": 0.56, "learning_rate": 4.0625092272713855e-05, "loss": 0.7482, "step": 50800 }, { "epoch": 0.56, "learning_rate": 4.062416954557534e-05, "loss": 0.7175, "step": 50805 }, { "epoch": 0.56, "learning_rate": 4.062324681843683e-05, "loss": 0.7179, "step": 50810 }, { "epoch": 0.56, "learning_rate": 4.062232409129831e-05, "loss": 0.7808, "step": 50815 }, { "epoch": 0.56, "learning_rate": 4.0621401364159806e-05, "loss": 0.686, "step": 50820 }, { "epoch": 0.56, "learning_rate": 4.0620478637021294e-05, "loss": 0.733, "step": 50825 }, { "epoch": 0.56, "learning_rate": 4.0619555909882775e-05, "loss": 0.7767, "step": 50830 }, { "epoch": 0.56, "learning_rate": 4.061863318274426e-05, "loss": 0.7415, "step": 50835 }, { "epoch": 0.56, "learning_rate": 4.061771045560576e-05, "loss": 0.698, "step": 50840 }, { "epoch": 0.56, "learning_rate": 4.0616787728467245e-05, "loss": 0.7348, "step": 50845 }, { "epoch": 0.56, "learning_rate": 4.0615865001328726e-05, "loss": 0.6902, "step": 50850 }, { "epoch": 0.56, "learning_rate": 4.0614942274190214e-05, "loss": 0.7418, "step": 50855 }, { "epoch": 0.56, "learning_rate": 4.06140195470517e-05, "loss": 0.6893, "step": 50860 }, { "epoch": 0.56, "learning_rate": 4.061309681991319e-05, "loss": 0.6954, "step": 50865 }, { "epoch": 0.56, "learning_rate": 4.061217409277468e-05, "loss": 0.7487, "step": 50870 }, { "epoch": 0.56, "learning_rate": 4.0611251365636166e-05, "loss": 0.6994, "step": 50875 }, { "epoch": 0.56, "learning_rate": 4.0610328638497654e-05, "loss": 0.7087, "step": 50880 }, { "epoch": 0.56, "learning_rate": 4.060940591135914e-05, "loss": 0.7285, "step": 50885 }, { "epoch": 0.56, "learning_rate": 4.060848318422063e-05, "loss": 0.728, "step": 50890 }, { "epoch": 0.56, "learning_rate": 4.060756045708212e-05, "loss": 0.72, "step": 50895 }, { "epoch": 0.56, "learning_rate": 4.0606637729943605e-05, "loss": 0.8109, "step": 50900 }, { "epoch": 0.56, "learning_rate": 4.060571500280509e-05, "loss": 0.7522, "step": 50905 }, { "epoch": 0.56, "learning_rate": 4.060479227566658e-05, "loss": 0.7694, "step": 50910 }, { "epoch": 0.56, "learning_rate": 4.060386954852807e-05, "loss": 0.7781, "step": 50915 }, { "epoch": 0.56, "learning_rate": 4.0602946821389556e-05, "loss": 0.728, "step": 50920 }, { "epoch": 0.56, "learning_rate": 4.0602024094251044e-05, "loss": 0.7792, "step": 50925 }, { "epoch": 0.56, "learning_rate": 4.060110136711253e-05, "loss": 0.7532, "step": 50930 }, { "epoch": 0.56, "learning_rate": 4.060017863997402e-05, "loss": 0.7812, "step": 50935 }, { "epoch": 0.56, "learning_rate": 4.05992559128355e-05, "loss": 0.7861, "step": 50940 }, { "epoch": 0.56, "learning_rate": 4.059833318569699e-05, "loss": 0.7548, "step": 50945 }, { "epoch": 0.56, "learning_rate": 4.0597410458558483e-05, "loss": 0.7401, "step": 50950 }, { "epoch": 0.56, "learning_rate": 4.059648773141997e-05, "loss": 0.7592, "step": 50955 }, { "epoch": 0.56, "learning_rate": 4.059556500428145e-05, "loss": 0.7328, "step": 50960 }, { "epoch": 0.56, "learning_rate": 4.059464227714294e-05, "loss": 0.7493, "step": 50965 }, { "epoch": 0.56, "learning_rate": 4.0593719550004435e-05, "loss": 0.7569, "step": 50970 }, { "epoch": 0.56, "learning_rate": 4.059279682286592e-05, "loss": 0.7258, "step": 50975 }, { "epoch": 0.56, "learning_rate": 4.0591874095727404e-05, "loss": 0.7862, "step": 50980 }, { "epoch": 0.56, "learning_rate": 4.059095136858889e-05, "loss": 0.742, "step": 50985 }, { "epoch": 0.56, "learning_rate": 4.0590028641450386e-05, "loss": 0.7055, "step": 50990 }, { "epoch": 0.56, "learning_rate": 4.058910591431187e-05, "loss": 0.8295, "step": 50995 }, { "epoch": 0.56, "learning_rate": 4.0588183187173355e-05, "loss": 0.7868, "step": 51000 }, { "epoch": 0.56, "eval_loss": 0.693796694278717, "eval_runtime": 69.3212, "eval_samples_per_second": 28.851, "eval_steps_per_second": 14.426, "step": 51000 }, { "epoch": 0.56, "learning_rate": 4.058726046003484e-05, "loss": 0.7354, "step": 51005 }, { "epoch": 0.56, "learning_rate": 4.058633773289633e-05, "loss": 0.747, "step": 51010 }, { "epoch": 0.56, "learning_rate": 4.058541500575782e-05, "loss": 0.6814, "step": 51015 }, { "epoch": 0.56, "learning_rate": 4.0584492278619307e-05, "loss": 0.7071, "step": 51020 }, { "epoch": 0.56, "learning_rate": 4.0583569551480794e-05, "loss": 0.6737, "step": 51025 }, { "epoch": 0.57, "learning_rate": 4.058264682434228e-05, "loss": 0.7045, "step": 51030 }, { "epoch": 0.57, "learning_rate": 4.058172409720377e-05, "loss": 0.6921, "step": 51035 }, { "epoch": 0.57, "learning_rate": 4.058080137006526e-05, "loss": 0.7985, "step": 51040 }, { "epoch": 0.57, "learning_rate": 4.0579878642926746e-05, "loss": 0.7867, "step": 51045 }, { "epoch": 0.57, "learning_rate": 4.0578955915788234e-05, "loss": 0.7516, "step": 51050 }, { "epoch": 0.57, "learning_rate": 4.057803318864972e-05, "loss": 0.7434, "step": 51055 }, { "epoch": 0.57, "learning_rate": 4.057711046151121e-05, "loss": 0.7791, "step": 51060 }, { "epoch": 0.57, "learning_rate": 4.05761877343727e-05, "loss": 0.7393, "step": 51065 }, { "epoch": 0.57, "learning_rate": 4.057526500723418e-05, "loss": 0.7868, "step": 51070 }, { "epoch": 0.57, "learning_rate": 4.057434228009567e-05, "loss": 0.7554, "step": 51075 }, { "epoch": 0.57, "learning_rate": 4.057341955295716e-05, "loss": 0.7018, "step": 51080 }, { "epoch": 0.57, "learning_rate": 4.057249682581865e-05, "loss": 0.7674, "step": 51085 }, { "epoch": 0.57, "learning_rate": 4.057157409868013e-05, "loss": 0.7304, "step": 51090 }, { "epoch": 0.57, "learning_rate": 4.057065137154162e-05, "loss": 0.7641, "step": 51095 }, { "epoch": 0.57, "learning_rate": 4.056972864440311e-05, "loss": 0.7903, "step": 51100 }, { "epoch": 0.57, "learning_rate": 4.056880591726459e-05, "loss": 0.6918, "step": 51105 }, { "epoch": 0.57, "learning_rate": 4.056788319012608e-05, "loss": 0.7781, "step": 51110 }, { "epoch": 0.57, "learning_rate": 4.056696046298757e-05, "loss": 0.7428, "step": 51115 }, { "epoch": 0.57, "learning_rate": 4.0566037735849064e-05, "loss": 0.706, "step": 51120 }, { "epoch": 0.57, "learning_rate": 4.0565115008710545e-05, "loss": 0.7263, "step": 51125 }, { "epoch": 0.57, "learning_rate": 4.056419228157203e-05, "loss": 0.6829, "step": 51130 }, { "epoch": 0.57, "learning_rate": 4.056326955443352e-05, "loss": 0.7188, "step": 51135 }, { "epoch": 0.57, "learning_rate": 4.056234682729501e-05, "loss": 0.6937, "step": 51140 }, { "epoch": 0.57, "learning_rate": 4.0561424100156496e-05, "loss": 0.7455, "step": 51145 }, { "epoch": 0.57, "learning_rate": 4.0560501373017984e-05, "loss": 0.7635, "step": 51150 }, { "epoch": 0.57, "learning_rate": 4.055957864587947e-05, "loss": 0.7585, "step": 51155 }, { "epoch": 0.57, "learning_rate": 4.055865591874096e-05, "loss": 0.703, "step": 51160 }, { "epoch": 0.57, "learning_rate": 4.055773319160245e-05, "loss": 0.7416, "step": 51165 }, { "epoch": 0.57, "learning_rate": 4.0556810464463935e-05, "loss": 0.7806, "step": 51170 }, { "epoch": 0.57, "learning_rate": 4.055588773732542e-05, "loss": 0.7102, "step": 51175 }, { "epoch": 0.57, "learning_rate": 4.0554965010186904e-05, "loss": 0.7097, "step": 51180 }, { "epoch": 0.57, "learning_rate": 4.05540422830484e-05, "loss": 0.7775, "step": 51185 }, { "epoch": 0.57, "learning_rate": 4.055311955590989e-05, "loss": 0.731, "step": 51190 }, { "epoch": 0.57, "learning_rate": 4.0552196828771374e-05, "loss": 0.7512, "step": 51195 }, { "epoch": 0.57, "learning_rate": 4.0551274101632856e-05, "loss": 0.7573, "step": 51200 }, { "epoch": 0.57, "learning_rate": 4.055035137449435e-05, "loss": 0.6911, "step": 51205 }, { "epoch": 0.57, "learning_rate": 4.054942864735584e-05, "loss": 0.7431, "step": 51210 }, { "epoch": 0.57, "learning_rate": 4.054850592021732e-05, "loss": 0.727, "step": 51215 }, { "epoch": 0.57, "learning_rate": 4.054758319307881e-05, "loss": 0.746, "step": 51220 }, { "epoch": 0.57, "learning_rate": 4.05466604659403e-05, "loss": 0.7792, "step": 51225 }, { "epoch": 0.57, "learning_rate": 4.054573773880179e-05, "loss": 0.7095, "step": 51230 }, { "epoch": 0.57, "learning_rate": 4.054481501166327e-05, "loss": 0.7359, "step": 51235 }, { "epoch": 0.57, "learning_rate": 4.054389228452476e-05, "loss": 0.7473, "step": 51240 }, { "epoch": 0.57, "learning_rate": 4.0542969557386246e-05, "loss": 0.7697, "step": 51245 }, { "epoch": 0.57, "learning_rate": 4.0542046830247734e-05, "loss": 0.7228, "step": 51250 }, { "epoch": 0.57, "learning_rate": 4.054112410310922e-05, "loss": 0.7653, "step": 51255 }, { "epoch": 0.57, "learning_rate": 4.054020137597071e-05, "loss": 0.6807, "step": 51260 }, { "epoch": 0.57, "learning_rate": 4.05392786488322e-05, "loss": 0.7479, "step": 51265 }, { "epoch": 0.57, "learning_rate": 4.0538355921693685e-05, "loss": 0.7258, "step": 51270 }, { "epoch": 0.57, "learning_rate": 4.053743319455517e-05, "loss": 0.7308, "step": 51275 }, { "epoch": 0.57, "learning_rate": 4.053651046741666e-05, "loss": 0.7376, "step": 51280 }, { "epoch": 0.57, "learning_rate": 4.053558774027815e-05, "loss": 0.7739, "step": 51285 }, { "epoch": 0.57, "learning_rate": 4.053466501313964e-05, "loss": 0.7639, "step": 51290 }, { "epoch": 0.57, "learning_rate": 4.0533742286001125e-05, "loss": 0.7079, "step": 51295 }, { "epoch": 0.57, "learning_rate": 4.053281955886261e-05, "loss": 0.8226, "step": 51300 }, { "epoch": 0.57, "learning_rate": 4.05318968317241e-05, "loss": 0.7432, "step": 51305 }, { "epoch": 0.57, "learning_rate": 4.053097410458558e-05, "loss": 0.7259, "step": 51310 }, { "epoch": 0.57, "learning_rate": 4.0530051377447076e-05, "loss": 0.7741, "step": 51315 }, { "epoch": 0.57, "learning_rate": 4.0529128650308564e-05, "loss": 0.7611, "step": 51320 }, { "epoch": 0.57, "learning_rate": 4.0528205923170045e-05, "loss": 0.7104, "step": 51325 }, { "epoch": 0.57, "learning_rate": 4.052728319603153e-05, "loss": 0.6856, "step": 51330 }, { "epoch": 0.57, "learning_rate": 4.052636046889303e-05, "loss": 0.6943, "step": 51335 }, { "epoch": 0.57, "learning_rate": 4.0525437741754515e-05, "loss": 0.6638, "step": 51340 }, { "epoch": 0.57, "learning_rate": 4.0524515014615996e-05, "loss": 0.712, "step": 51345 }, { "epoch": 0.57, "learning_rate": 4.0523592287477484e-05, "loss": 0.7066, "step": 51350 }, { "epoch": 0.57, "learning_rate": 4.052266956033898e-05, "loss": 0.7032, "step": 51355 }, { "epoch": 0.57, "learning_rate": 4.052174683320047e-05, "loss": 0.7141, "step": 51360 }, { "epoch": 0.57, "learning_rate": 4.052082410606195e-05, "loss": 0.7235, "step": 51365 }, { "epoch": 0.57, "learning_rate": 4.0519901378923436e-05, "loss": 0.7757, "step": 51370 }, { "epoch": 0.57, "learning_rate": 4.051897865178493e-05, "loss": 0.7182, "step": 51375 }, { "epoch": 0.57, "learning_rate": 4.051805592464641e-05, "loss": 0.7016, "step": 51380 }, { "epoch": 0.57, "learning_rate": 4.05171331975079e-05, "loss": 0.751, "step": 51385 }, { "epoch": 0.57, "learning_rate": 4.051621047036939e-05, "loss": 0.7554, "step": 51390 }, { "epoch": 0.57, "learning_rate": 4.0515287743230875e-05, "loss": 0.7631, "step": 51395 }, { "epoch": 0.57, "learning_rate": 4.051436501609236e-05, "loss": 0.7576, "step": 51400 }, { "epoch": 0.57, "learning_rate": 4.051344228895385e-05, "loss": 0.7353, "step": 51405 }, { "epoch": 0.57, "learning_rate": 4.051251956181534e-05, "loss": 0.7786, "step": 51410 }, { "epoch": 0.57, "learning_rate": 4.0511596834676826e-05, "loss": 0.6695, "step": 51415 }, { "epoch": 0.57, "learning_rate": 4.0510674107538314e-05, "loss": 0.7685, "step": 51420 }, { "epoch": 0.57, "learning_rate": 4.05097513803998e-05, "loss": 0.7306, "step": 51425 }, { "epoch": 0.57, "learning_rate": 4.050882865326129e-05, "loss": 0.7505, "step": 51430 }, { "epoch": 0.57, "learning_rate": 4.050790592612278e-05, "loss": 0.7497, "step": 51435 }, { "epoch": 0.57, "learning_rate": 4.0506983198984266e-05, "loss": 0.788, "step": 51440 }, { "epoch": 0.57, "learning_rate": 4.050606047184575e-05, "loss": 0.7603, "step": 51445 }, { "epoch": 0.57, "learning_rate": 4.050513774470724e-05, "loss": 0.7185, "step": 51450 }, { "epoch": 0.57, "learning_rate": 4.050421501756872e-05, "loss": 0.7984, "step": 51455 }, { "epoch": 0.57, "learning_rate": 4.050329229043022e-05, "loss": 0.6899, "step": 51460 }, { "epoch": 0.57, "learning_rate": 4.0502369563291705e-05, "loss": 0.7351, "step": 51465 }, { "epoch": 0.57, "learning_rate": 4.050144683615319e-05, "loss": 0.7591, "step": 51470 }, { "epoch": 0.57, "learning_rate": 4.0500524109014674e-05, "loss": 0.7104, "step": 51475 }, { "epoch": 0.57, "learning_rate": 4.049960138187616e-05, "loss": 0.7636, "step": 51480 }, { "epoch": 0.57, "learning_rate": 4.0498678654737656e-05, "loss": 0.7945, "step": 51485 }, { "epoch": 0.57, "learning_rate": 4.049775592759914e-05, "loss": 0.6997, "step": 51490 }, { "epoch": 0.57, "learning_rate": 4.0496833200460625e-05, "loss": 0.7831, "step": 51495 }, { "epoch": 0.57, "learning_rate": 4.049591047332211e-05, "loss": 0.6904, "step": 51500 }, { "epoch": 0.57, "learning_rate": 4.049498774618361e-05, "loss": 0.7093, "step": 51505 }, { "epoch": 0.57, "learning_rate": 4.049406501904509e-05, "loss": 0.7659, "step": 51510 }, { "epoch": 0.57, "learning_rate": 4.0493142291906576e-05, "loss": 0.7109, "step": 51515 }, { "epoch": 0.57, "learning_rate": 4.0492219564768064e-05, "loss": 0.774, "step": 51520 }, { "epoch": 0.57, "learning_rate": 4.049129683762955e-05, "loss": 0.699, "step": 51525 }, { "epoch": 0.57, "learning_rate": 4.049037411049104e-05, "loss": 0.7627, "step": 51530 }, { "epoch": 0.57, "learning_rate": 4.048945138335253e-05, "loss": 0.7131, "step": 51535 }, { "epoch": 0.57, "learning_rate": 4.0488528656214016e-05, "loss": 0.7506, "step": 51540 }, { "epoch": 0.57, "learning_rate": 4.0487605929075504e-05, "loss": 0.7697, "step": 51545 }, { "epoch": 0.57, "learning_rate": 4.048668320193699e-05, "loss": 0.6783, "step": 51550 }, { "epoch": 0.57, "learning_rate": 4.048576047479848e-05, "loss": 0.6823, "step": 51555 }, { "epoch": 0.57, "learning_rate": 4.048483774765997e-05, "loss": 0.7142, "step": 51560 }, { "epoch": 0.57, "learning_rate": 4.048391502052145e-05, "loss": 0.6882, "step": 51565 }, { "epoch": 0.57, "learning_rate": 4.048299229338294e-05, "loss": 0.7659, "step": 51570 }, { "epoch": 0.57, "learning_rate": 4.048206956624443e-05, "loss": 0.6794, "step": 51575 }, { "epoch": 0.57, "learning_rate": 4.048114683910592e-05, "loss": 0.7524, "step": 51580 }, { "epoch": 0.57, "learning_rate": 4.04802241119674e-05, "loss": 0.7128, "step": 51585 }, { "epoch": 0.57, "learning_rate": 4.0479301384828894e-05, "loss": 0.7589, "step": 51590 }, { "epoch": 0.57, "learning_rate": 4.047837865769038e-05, "loss": 0.6797, "step": 51595 }, { "epoch": 0.57, "learning_rate": 4.047745593055186e-05, "loss": 0.6977, "step": 51600 }, { "epoch": 0.57, "learning_rate": 4.047653320341335e-05, "loss": 0.7657, "step": 51605 }, { "epoch": 0.57, "learning_rate": 4.0475610476274846e-05, "loss": 0.7605, "step": 51610 }, { "epoch": 0.57, "learning_rate": 4.0474687749136333e-05, "loss": 0.763, "step": 51615 }, { "epoch": 0.57, "learning_rate": 4.0473765021997815e-05, "loss": 0.7098, "step": 51620 }, { "epoch": 0.57, "learning_rate": 4.04728422948593e-05, "loss": 0.7031, "step": 51625 }, { "epoch": 0.57, "learning_rate": 4.047191956772079e-05, "loss": 0.7317, "step": 51630 }, { "epoch": 0.57, "learning_rate": 4.0470996840582285e-05, "loss": 0.7512, "step": 51635 }, { "epoch": 0.57, "learning_rate": 4.0470074113443766e-05, "loss": 0.6903, "step": 51640 }, { "epoch": 0.57, "learning_rate": 4.0469151386305254e-05, "loss": 0.6915, "step": 51645 }, { "epoch": 0.57, "learning_rate": 4.046822865916674e-05, "loss": 0.7416, "step": 51650 }, { "epoch": 0.57, "learning_rate": 4.046730593202823e-05, "loss": 0.7038, "step": 51655 }, { "epoch": 0.57, "learning_rate": 4.046638320488972e-05, "loss": 0.7821, "step": 51660 }, { "epoch": 0.57, "learning_rate": 4.0465460477751205e-05, "loss": 0.7326, "step": 51665 }, { "epoch": 0.57, "learning_rate": 4.046453775061269e-05, "loss": 0.718, "step": 51670 }, { "epoch": 0.57, "learning_rate": 4.046361502347418e-05, "loss": 0.7421, "step": 51675 }, { "epoch": 0.57, "learning_rate": 4.046269229633567e-05, "loss": 0.7352, "step": 51680 }, { "epoch": 0.57, "learning_rate": 4.0461769569197157e-05, "loss": 0.7238, "step": 51685 }, { "epoch": 0.57, "learning_rate": 4.0460846842058644e-05, "loss": 0.7392, "step": 51690 }, { "epoch": 0.57, "learning_rate": 4.0459924114920125e-05, "loss": 0.7299, "step": 51695 }, { "epoch": 0.57, "learning_rate": 4.045900138778162e-05, "loss": 0.7468, "step": 51700 }, { "epoch": 0.57, "learning_rate": 4.045807866064311e-05, "loss": 0.742, "step": 51705 }, { "epoch": 0.57, "learning_rate": 4.0457155933504596e-05, "loss": 0.7393, "step": 51710 }, { "epoch": 0.57, "learning_rate": 4.045623320636608e-05, "loss": 0.7359, "step": 51715 }, { "epoch": 0.57, "learning_rate": 4.045531047922757e-05, "loss": 0.7256, "step": 51720 }, { "epoch": 0.57, "learning_rate": 4.045438775208906e-05, "loss": 0.7668, "step": 51725 }, { "epoch": 0.57, "learning_rate": 4.045346502495054e-05, "loss": 0.7168, "step": 51730 }, { "epoch": 0.57, "learning_rate": 4.045254229781203e-05, "loss": 0.8085, "step": 51735 }, { "epoch": 0.57, "learning_rate": 4.045161957067352e-05, "loss": 0.7738, "step": 51740 }, { "epoch": 0.57, "learning_rate": 4.045069684353501e-05, "loss": 0.7016, "step": 51745 }, { "epoch": 0.57, "learning_rate": 4.044977411639649e-05, "loss": 0.7013, "step": 51750 }, { "epoch": 0.57, "learning_rate": 4.044885138925798e-05, "loss": 0.717, "step": 51755 }, { "epoch": 0.57, "learning_rate": 4.0447928662119474e-05, "loss": 0.8428, "step": 51760 }, { "epoch": 0.57, "learning_rate": 4.0447005934980955e-05, "loss": 0.7143, "step": 51765 }, { "epoch": 0.57, "learning_rate": 4.044608320784244e-05, "loss": 0.7342, "step": 51770 }, { "epoch": 0.57, "learning_rate": 4.044516048070393e-05, "loss": 0.8163, "step": 51775 }, { "epoch": 0.57, "learning_rate": 4.044423775356542e-05, "loss": 0.7478, "step": 51780 }, { "epoch": 0.57, "learning_rate": 4.044331502642691e-05, "loss": 0.7443, "step": 51785 }, { "epoch": 0.57, "learning_rate": 4.0442392299288395e-05, "loss": 0.7526, "step": 51790 }, { "epoch": 0.57, "learning_rate": 4.044146957214988e-05, "loss": 0.7395, "step": 51795 }, { "epoch": 0.57, "learning_rate": 4.044054684501137e-05, "loss": 0.7009, "step": 51800 }, { "epoch": 0.57, "learning_rate": 4.043962411787286e-05, "loss": 0.7166, "step": 51805 }, { "epoch": 0.57, "learning_rate": 4.0438701390734346e-05, "loss": 0.734, "step": 51810 }, { "epoch": 0.57, "learning_rate": 4.0437778663595834e-05, "loss": 0.7247, "step": 51815 }, { "epoch": 0.57, "learning_rate": 4.043685593645732e-05, "loss": 0.6723, "step": 51820 }, { "epoch": 0.57, "learning_rate": 4.043593320931881e-05, "loss": 0.7401, "step": 51825 }, { "epoch": 0.57, "learning_rate": 4.04350104821803e-05, "loss": 0.7784, "step": 51830 }, { "epoch": 0.57, "learning_rate": 4.0434087755041785e-05, "loss": 0.6797, "step": 51835 }, { "epoch": 0.57, "learning_rate": 4.0433165027903266e-05, "loss": 0.8218, "step": 51840 }, { "epoch": 0.57, "learning_rate": 4.0432242300764754e-05, "loss": 0.7358, "step": 51845 }, { "epoch": 0.57, "learning_rate": 4.043131957362625e-05, "loss": 0.7107, "step": 51850 }, { "epoch": 0.57, "learning_rate": 4.043039684648774e-05, "loss": 0.7555, "step": 51855 }, { "epoch": 0.57, "learning_rate": 4.042947411934922e-05, "loss": 0.7466, "step": 51860 }, { "epoch": 0.57, "learning_rate": 4.0428551392210706e-05, "loss": 0.701, "step": 51865 }, { "epoch": 0.57, "learning_rate": 4.04276286650722e-05, "loss": 0.7739, "step": 51870 }, { "epoch": 0.57, "learning_rate": 4.042670593793368e-05, "loss": 0.7483, "step": 51875 }, { "epoch": 0.57, "learning_rate": 4.042578321079517e-05, "loss": 0.6999, "step": 51880 }, { "epoch": 0.57, "learning_rate": 4.042486048365666e-05, "loss": 0.7634, "step": 51885 }, { "epoch": 0.57, "learning_rate": 4.042393775651815e-05, "loss": 0.7438, "step": 51890 }, { "epoch": 0.57, "learning_rate": 4.042301502937963e-05, "loss": 0.7318, "step": 51895 }, { "epoch": 0.57, "learning_rate": 4.042209230224112e-05, "loss": 0.6837, "step": 51900 }, { "epoch": 0.57, "learning_rate": 4.042116957510261e-05, "loss": 0.7456, "step": 51905 }, { "epoch": 0.57, "learning_rate": 4.0420246847964096e-05, "loss": 0.7657, "step": 51910 }, { "epoch": 0.57, "learning_rate": 4.0419324120825584e-05, "loss": 0.7145, "step": 51915 }, { "epoch": 0.57, "learning_rate": 4.041840139368707e-05, "loss": 0.777, "step": 51920 }, { "epoch": 0.57, "learning_rate": 4.041747866654856e-05, "loss": 0.7132, "step": 51925 }, { "epoch": 0.58, "learning_rate": 4.041655593941005e-05, "loss": 0.7619, "step": 51930 }, { "epoch": 0.58, "learning_rate": 4.0415633212271535e-05, "loss": 0.7098, "step": 51935 }, { "epoch": 0.58, "learning_rate": 4.041471048513302e-05, "loss": 0.7121, "step": 51940 }, { "epoch": 0.58, "learning_rate": 4.041378775799451e-05, "loss": 0.7155, "step": 51945 }, { "epoch": 0.58, "learning_rate": 4.041286503085599e-05, "loss": 0.7128, "step": 51950 }, { "epoch": 0.58, "learning_rate": 4.041194230371749e-05, "loss": 0.7407, "step": 51955 }, { "epoch": 0.58, "learning_rate": 4.0411019576578975e-05, "loss": 0.7082, "step": 51960 }, { "epoch": 0.58, "learning_rate": 4.041009684944046e-05, "loss": 0.7424, "step": 51965 }, { "epoch": 0.58, "learning_rate": 4.0409174122301944e-05, "loss": 0.7223, "step": 51970 }, { "epoch": 0.58, "learning_rate": 4.040825139516344e-05, "loss": 0.6943, "step": 51975 }, { "epoch": 0.58, "learning_rate": 4.0407328668024926e-05, "loss": 0.7629, "step": 51980 }, { "epoch": 0.58, "learning_rate": 4.040640594088641e-05, "loss": 0.7492, "step": 51985 }, { "epoch": 0.58, "learning_rate": 4.0405483213747895e-05, "loss": 0.7591, "step": 51990 }, { "epoch": 0.58, "learning_rate": 4.040456048660938e-05, "loss": 0.7981, "step": 51995 }, { "epoch": 0.58, "learning_rate": 4.040363775947088e-05, "loss": 0.6966, "step": 52000 }, { "epoch": 0.58, "eval_loss": 0.6866953372955322, "eval_runtime": 69.3106, "eval_samples_per_second": 28.856, "eval_steps_per_second": 14.428, "step": 52000 }, { "epoch": 0.58, "learning_rate": 4.040271503233236e-05, "loss": 0.73, "step": 52005 }, { "epoch": 0.58, "learning_rate": 4.0401792305193846e-05, "loss": 0.7764, "step": 52010 }, { "epoch": 0.58, "learning_rate": 4.0400869578055334e-05, "loss": 0.746, "step": 52015 }, { "epoch": 0.58, "learning_rate": 4.039994685091683e-05, "loss": 0.754, "step": 52020 }, { "epoch": 0.58, "learning_rate": 4.039902412377831e-05, "loss": 0.7668, "step": 52025 }, { "epoch": 0.58, "learning_rate": 4.03981013966398e-05, "loss": 0.7455, "step": 52030 }, { "epoch": 0.58, "learning_rate": 4.0397178669501286e-05, "loss": 0.7428, "step": 52035 }, { "epoch": 0.58, "learning_rate": 4.0396255942362773e-05, "loss": 0.6864, "step": 52040 }, { "epoch": 0.58, "learning_rate": 4.039533321522426e-05, "loss": 0.7516, "step": 52045 }, { "epoch": 0.58, "learning_rate": 4.039441048808575e-05, "loss": 0.7349, "step": 52050 }, { "epoch": 0.58, "learning_rate": 4.039348776094724e-05, "loss": 0.8189, "step": 52055 }, { "epoch": 0.58, "learning_rate": 4.0392565033808725e-05, "loss": 0.7751, "step": 52060 }, { "epoch": 0.58, "learning_rate": 4.039164230667021e-05, "loss": 0.7062, "step": 52065 }, { "epoch": 0.58, "learning_rate": 4.03907195795317e-05, "loss": 0.7879, "step": 52070 }, { "epoch": 0.58, "learning_rate": 4.038979685239319e-05, "loss": 0.7478, "step": 52075 }, { "epoch": 0.58, "learning_rate": 4.038887412525467e-05, "loss": 0.7396, "step": 52080 }, { "epoch": 0.58, "learning_rate": 4.0387951398116164e-05, "loss": 0.7021, "step": 52085 }, { "epoch": 0.58, "learning_rate": 4.038702867097765e-05, "loss": 0.7424, "step": 52090 }, { "epoch": 0.58, "learning_rate": 4.038610594383914e-05, "loss": 0.7402, "step": 52095 }, { "epoch": 0.58, "learning_rate": 4.038518321670062e-05, "loss": 0.7952, "step": 52100 }, { "epoch": 0.58, "learning_rate": 4.0384260489562116e-05, "loss": 0.7478, "step": 52105 }, { "epoch": 0.58, "learning_rate": 4.03833377624236e-05, "loss": 0.7385, "step": 52110 }, { "epoch": 0.58, "learning_rate": 4.0382415035285084e-05, "loss": 0.7531, "step": 52115 }, { "epoch": 0.58, "learning_rate": 4.038149230814657e-05, "loss": 0.7702, "step": 52120 }, { "epoch": 0.58, "learning_rate": 4.038056958100807e-05, "loss": 0.7057, "step": 52125 }, { "epoch": 0.58, "learning_rate": 4.0379646853869555e-05, "loss": 0.7572, "step": 52130 }, { "epoch": 0.58, "learning_rate": 4.0378724126731036e-05, "loss": 0.735, "step": 52135 }, { "epoch": 0.58, "learning_rate": 4.0377801399592524e-05, "loss": 0.7172, "step": 52140 }, { "epoch": 0.58, "learning_rate": 4.037687867245401e-05, "loss": 0.7423, "step": 52145 }, { "epoch": 0.58, "learning_rate": 4.03759559453155e-05, "loss": 0.7344, "step": 52150 }, { "epoch": 0.58, "learning_rate": 4.037503321817699e-05, "loss": 0.7459, "step": 52155 }, { "epoch": 0.58, "learning_rate": 4.0374110491038475e-05, "loss": 0.684, "step": 52160 }, { "epoch": 0.58, "learning_rate": 4.037318776389996e-05, "loss": 0.7125, "step": 52165 }, { "epoch": 0.58, "learning_rate": 4.037226503676145e-05, "loss": 0.7493, "step": 52170 }, { "epoch": 0.58, "learning_rate": 4.037134230962294e-05, "loss": 0.7328, "step": 52175 }, { "epoch": 0.58, "learning_rate": 4.0370419582484426e-05, "loss": 0.727, "step": 52180 }, { "epoch": 0.58, "learning_rate": 4.0369496855345914e-05, "loss": 0.7722, "step": 52185 }, { "epoch": 0.58, "learning_rate": 4.03685741282074e-05, "loss": 0.7466, "step": 52190 }, { "epoch": 0.58, "learning_rate": 4.036765140106889e-05, "loss": 0.7028, "step": 52195 }, { "epoch": 0.58, "learning_rate": 4.036672867393038e-05, "loss": 0.7653, "step": 52200 }, { "epoch": 0.58, "learning_rate": 4.0365805946791866e-05, "loss": 0.684, "step": 52205 }, { "epoch": 0.58, "learning_rate": 4.0364883219653354e-05, "loss": 0.7739, "step": 52210 }, { "epoch": 0.58, "learning_rate": 4.036396049251484e-05, "loss": 0.7591, "step": 52215 }, { "epoch": 0.58, "learning_rate": 4.036303776537633e-05, "loss": 0.7797, "step": 52220 }, { "epoch": 0.58, "learning_rate": 4.036211503823781e-05, "loss": 0.6915, "step": 52225 }, { "epoch": 0.58, "learning_rate": 4.03611923110993e-05, "loss": 0.719, "step": 52230 }, { "epoch": 0.58, "learning_rate": 4.036026958396079e-05, "loss": 0.7656, "step": 52235 }, { "epoch": 0.58, "learning_rate": 4.035934685682228e-05, "loss": 0.7031, "step": 52240 }, { "epoch": 0.58, "learning_rate": 4.035842412968376e-05, "loss": 0.713, "step": 52245 }, { "epoch": 0.58, "learning_rate": 4.035750140254525e-05, "loss": 0.7887, "step": 52250 }, { "epoch": 0.58, "learning_rate": 4.0356578675406744e-05, "loss": 0.7515, "step": 52255 }, { "epoch": 0.58, "learning_rate": 4.0355655948268225e-05, "loss": 0.717, "step": 52260 }, { "epoch": 0.58, "learning_rate": 4.035473322112971e-05, "loss": 0.7047, "step": 52265 }, { "epoch": 0.58, "learning_rate": 4.03538104939912e-05, "loss": 0.7059, "step": 52270 }, { "epoch": 0.58, "learning_rate": 4.0352887766852696e-05, "loss": 0.7672, "step": 52275 }, { "epoch": 0.58, "learning_rate": 4.035196503971418e-05, "loss": 0.6965, "step": 52280 }, { "epoch": 0.58, "learning_rate": 4.0351042312575665e-05, "loss": 0.7141, "step": 52285 }, { "epoch": 0.58, "learning_rate": 4.035011958543715e-05, "loss": 0.7242, "step": 52290 }, { "epoch": 0.58, "learning_rate": 4.034919685829864e-05, "loss": 0.7461, "step": 52295 }, { "epoch": 0.58, "learning_rate": 4.034827413116013e-05, "loss": 0.713, "step": 52300 }, { "epoch": 0.58, "learning_rate": 4.0347351404021616e-05, "loss": 0.7916, "step": 52305 }, { "epoch": 0.58, "learning_rate": 4.0346428676883104e-05, "loss": 0.7401, "step": 52310 }, { "epoch": 0.58, "learning_rate": 4.034550594974459e-05, "loss": 0.7946, "step": 52315 }, { "epoch": 0.58, "learning_rate": 4.034458322260608e-05, "loss": 0.7986, "step": 52320 }, { "epoch": 0.58, "learning_rate": 4.034366049546757e-05, "loss": 0.6877, "step": 52325 }, { "epoch": 0.58, "learning_rate": 4.0342737768329055e-05, "loss": 0.7531, "step": 52330 }, { "epoch": 0.58, "learning_rate": 4.0341815041190536e-05, "loss": 0.7366, "step": 52335 }, { "epoch": 0.58, "learning_rate": 4.034089231405203e-05, "loss": 0.7996, "step": 52340 }, { "epoch": 0.58, "learning_rate": 4.033996958691352e-05, "loss": 0.7438, "step": 52345 }, { "epoch": 0.58, "learning_rate": 4.0339046859775007e-05, "loss": 0.7001, "step": 52350 }, { "epoch": 0.58, "learning_rate": 4.033812413263649e-05, "loss": 0.7796, "step": 52355 }, { "epoch": 0.58, "learning_rate": 4.033720140549798e-05, "loss": 0.6853, "step": 52360 }, { "epoch": 0.58, "learning_rate": 4.033627867835947e-05, "loss": 0.7025, "step": 52365 }, { "epoch": 0.58, "learning_rate": 4.033535595122095e-05, "loss": 0.7755, "step": 52370 }, { "epoch": 0.58, "learning_rate": 4.033443322408244e-05, "loss": 0.7542, "step": 52375 }, { "epoch": 0.58, "learning_rate": 4.033351049694393e-05, "loss": 0.7378, "step": 52380 }, { "epoch": 0.58, "learning_rate": 4.033258776980542e-05, "loss": 0.7759, "step": 52385 }, { "epoch": 0.58, "learning_rate": 4.03316650426669e-05, "loss": 0.6852, "step": 52390 }, { "epoch": 0.58, "learning_rate": 4.033074231552839e-05, "loss": 0.7475, "step": 52395 }, { "epoch": 0.58, "learning_rate": 4.032981958838988e-05, "loss": 0.735, "step": 52400 }, { "epoch": 0.58, "learning_rate": 4.032889686125137e-05, "loss": 0.7368, "step": 52405 }, { "epoch": 0.58, "learning_rate": 4.0327974134112854e-05, "loss": 0.775, "step": 52410 }, { "epoch": 0.58, "learning_rate": 4.032705140697434e-05, "loss": 0.668, "step": 52415 }, { "epoch": 0.58, "learning_rate": 4.032612867983583e-05, "loss": 0.7885, "step": 52420 }, { "epoch": 0.58, "learning_rate": 4.032520595269732e-05, "loss": 0.7786, "step": 52425 }, { "epoch": 0.58, "learning_rate": 4.0324283225558805e-05, "loss": 0.7675, "step": 52430 }, { "epoch": 0.58, "learning_rate": 4.032336049842029e-05, "loss": 0.7956, "step": 52435 }, { "epoch": 0.58, "learning_rate": 4.032243777128178e-05, "loss": 0.6697, "step": 52440 }, { "epoch": 0.58, "learning_rate": 4.032151504414327e-05, "loss": 0.7568, "step": 52445 }, { "epoch": 0.58, "learning_rate": 4.032059231700476e-05, "loss": 0.7598, "step": 52450 }, { "epoch": 0.58, "learning_rate": 4.0319669589866245e-05, "loss": 0.7468, "step": 52455 }, { "epoch": 0.58, "learning_rate": 4.031874686272773e-05, "loss": 0.7352, "step": 52460 }, { "epoch": 0.58, "learning_rate": 4.0317824135589214e-05, "loss": 0.7789, "step": 52465 }, { "epoch": 0.58, "learning_rate": 4.031690140845071e-05, "loss": 0.7077, "step": 52470 }, { "epoch": 0.58, "learning_rate": 4.0315978681312196e-05, "loss": 0.71, "step": 52475 }, { "epoch": 0.58, "learning_rate": 4.0315055954173684e-05, "loss": 0.8306, "step": 52480 }, { "epoch": 0.58, "learning_rate": 4.0314133227035165e-05, "loss": 0.7083, "step": 52485 }, { "epoch": 0.58, "learning_rate": 4.031321049989666e-05, "loss": 0.7828, "step": 52490 }, { "epoch": 0.58, "learning_rate": 4.031228777275815e-05, "loss": 0.7205, "step": 52495 }, { "epoch": 0.58, "learning_rate": 4.031136504561963e-05, "loss": 0.672, "step": 52500 }, { "epoch": 0.58, "learning_rate": 4.0310442318481116e-05, "loss": 0.7981, "step": 52505 }, { "epoch": 0.58, "learning_rate": 4.030951959134261e-05, "loss": 0.739, "step": 52510 }, { "epoch": 0.58, "learning_rate": 4.03085968642041e-05, "loss": 0.7356, "step": 52515 }, { "epoch": 0.58, "learning_rate": 4.030767413706558e-05, "loss": 0.73, "step": 52520 }, { "epoch": 0.58, "learning_rate": 4.030675140992707e-05, "loss": 0.7528, "step": 52525 }, { "epoch": 0.58, "learning_rate": 4.0305828682788556e-05, "loss": 0.7573, "step": 52530 }, { "epoch": 0.58, "learning_rate": 4.0304905955650043e-05, "loss": 0.6609, "step": 52535 }, { "epoch": 0.58, "learning_rate": 4.030398322851153e-05, "loss": 0.7286, "step": 52540 }, { "epoch": 0.58, "learning_rate": 4.030306050137302e-05, "loss": 0.6886, "step": 52545 }, { "epoch": 0.58, "learning_rate": 4.030213777423451e-05, "loss": 0.6848, "step": 52550 }, { "epoch": 0.58, "learning_rate": 4.0301215047095995e-05, "loss": 0.7277, "step": 52555 }, { "epoch": 0.58, "learning_rate": 4.030029231995748e-05, "loss": 0.6907, "step": 52560 }, { "epoch": 0.58, "learning_rate": 4.029936959281897e-05, "loss": 0.7693, "step": 52565 }, { "epoch": 0.58, "learning_rate": 4.029844686568046e-05, "loss": 0.7604, "step": 52570 }, { "epoch": 0.58, "learning_rate": 4.0297524138541946e-05, "loss": 0.7109, "step": 52575 }, { "epoch": 0.58, "learning_rate": 4.0296601411403434e-05, "loss": 0.7252, "step": 52580 }, { "epoch": 0.58, "learning_rate": 4.029567868426492e-05, "loss": 0.7878, "step": 52585 }, { "epoch": 0.58, "learning_rate": 4.029475595712641e-05, "loss": 0.7708, "step": 52590 }, { "epoch": 0.58, "learning_rate": 4.02938332299879e-05, "loss": 0.7763, "step": 52595 }, { "epoch": 0.58, "learning_rate": 4.0292910502849385e-05, "loss": 0.7051, "step": 52600 }, { "epoch": 0.58, "learning_rate": 4.029198777571087e-05, "loss": 0.8202, "step": 52605 }, { "epoch": 0.58, "learning_rate": 4.0291065048572354e-05, "loss": 0.7285, "step": 52610 }, { "epoch": 0.58, "learning_rate": 4.029014232143384e-05, "loss": 0.7587, "step": 52615 }, { "epoch": 0.58, "learning_rate": 4.028921959429534e-05, "loss": 0.734, "step": 52620 }, { "epoch": 0.58, "learning_rate": 4.0288296867156825e-05, "loss": 0.719, "step": 52625 }, { "epoch": 0.58, "learning_rate": 4.0287374140018306e-05, "loss": 0.8215, "step": 52630 }, { "epoch": 0.58, "learning_rate": 4.0286451412879794e-05, "loss": 0.7337, "step": 52635 }, { "epoch": 0.58, "learning_rate": 4.028552868574129e-05, "loss": 0.7099, "step": 52640 }, { "epoch": 0.58, "learning_rate": 4.028460595860277e-05, "loss": 0.7216, "step": 52645 }, { "epoch": 0.58, "learning_rate": 4.028368323146426e-05, "loss": 0.7768, "step": 52650 }, { "epoch": 0.58, "learning_rate": 4.0282760504325745e-05, "loss": 0.741, "step": 52655 }, { "epoch": 0.58, "learning_rate": 4.028183777718724e-05, "loss": 0.666, "step": 52660 }, { "epoch": 0.58, "learning_rate": 4.028091505004872e-05, "loss": 0.6957, "step": 52665 }, { "epoch": 0.58, "learning_rate": 4.027999232291021e-05, "loss": 0.6652, "step": 52670 }, { "epoch": 0.58, "learning_rate": 4.0279069595771696e-05, "loss": 0.7181, "step": 52675 }, { "epoch": 0.58, "learning_rate": 4.0278146868633184e-05, "loss": 0.7223, "step": 52680 }, { "epoch": 0.58, "learning_rate": 4.027722414149467e-05, "loss": 0.7092, "step": 52685 }, { "epoch": 0.58, "learning_rate": 4.027630141435616e-05, "loss": 0.7543, "step": 52690 }, { "epoch": 0.58, "learning_rate": 4.027537868721765e-05, "loss": 0.7061, "step": 52695 }, { "epoch": 0.58, "learning_rate": 4.0274455960079136e-05, "loss": 0.7102, "step": 52700 }, { "epoch": 0.58, "learning_rate": 4.0273533232940623e-05, "loss": 0.7208, "step": 52705 }, { "epoch": 0.58, "learning_rate": 4.027261050580211e-05, "loss": 0.7361, "step": 52710 }, { "epoch": 0.58, "learning_rate": 4.02716877786636e-05, "loss": 0.7124, "step": 52715 }, { "epoch": 0.58, "learning_rate": 4.027076505152508e-05, "loss": 0.7241, "step": 52720 }, { "epoch": 0.58, "learning_rate": 4.0269842324386575e-05, "loss": 0.7576, "step": 52725 }, { "epoch": 0.58, "learning_rate": 4.026891959724806e-05, "loss": 0.7501, "step": 52730 }, { "epoch": 0.58, "learning_rate": 4.026799687010955e-05, "loss": 0.7144, "step": 52735 }, { "epoch": 0.58, "learning_rate": 4.026707414297103e-05, "loss": 0.7109, "step": 52740 }, { "epoch": 0.58, "learning_rate": 4.0266151415832526e-05, "loss": 0.7492, "step": 52745 }, { "epoch": 0.58, "learning_rate": 4.0265228688694014e-05, "loss": 0.7344, "step": 52750 }, { "epoch": 0.58, "learning_rate": 4.0264305961555495e-05, "loss": 0.7164, "step": 52755 }, { "epoch": 0.58, "learning_rate": 4.026338323441698e-05, "loss": 0.7383, "step": 52760 }, { "epoch": 0.58, "learning_rate": 4.026246050727847e-05, "loss": 0.692, "step": 52765 }, { "epoch": 0.58, "learning_rate": 4.0261537780139966e-05, "loss": 0.7173, "step": 52770 }, { "epoch": 0.58, "learning_rate": 4.0260615053001447e-05, "loss": 0.6871, "step": 52775 }, { "epoch": 0.58, "learning_rate": 4.0259692325862934e-05, "loss": 0.7234, "step": 52780 }, { "epoch": 0.58, "learning_rate": 4.025876959872442e-05, "loss": 0.6915, "step": 52785 }, { "epoch": 0.58, "learning_rate": 4.025784687158592e-05, "loss": 0.7868, "step": 52790 }, { "epoch": 0.58, "learning_rate": 4.02569241444474e-05, "loss": 0.6908, "step": 52795 }, { "epoch": 0.58, "learning_rate": 4.0256001417308886e-05, "loss": 0.759, "step": 52800 }, { "epoch": 0.58, "learning_rate": 4.0255078690170374e-05, "loss": 0.731, "step": 52805 }, { "epoch": 0.58, "learning_rate": 4.025415596303186e-05, "loss": 0.7561, "step": 52810 }, { "epoch": 0.58, "learning_rate": 4.025323323589335e-05, "loss": 0.7556, "step": 52815 }, { "epoch": 0.58, "learning_rate": 4.025231050875484e-05, "loss": 0.6953, "step": 52820 }, { "epoch": 0.58, "learning_rate": 4.0251387781616325e-05, "loss": 0.7629, "step": 52825 }, { "epoch": 0.58, "learning_rate": 4.0250465054477806e-05, "loss": 0.7271, "step": 52830 }, { "epoch": 0.59, "learning_rate": 4.02495423273393e-05, "loss": 0.7318, "step": 52835 }, { "epoch": 0.59, "learning_rate": 4.024861960020079e-05, "loss": 0.761, "step": 52840 }, { "epoch": 0.59, "learning_rate": 4.0247696873062276e-05, "loss": 0.6702, "step": 52845 }, { "epoch": 0.59, "learning_rate": 4.024677414592376e-05, "loss": 0.7306, "step": 52850 }, { "epoch": 0.59, "learning_rate": 4.024585141878525e-05, "loss": 0.732, "step": 52855 }, { "epoch": 0.59, "learning_rate": 4.024492869164674e-05, "loss": 0.6635, "step": 52860 }, { "epoch": 0.59, "learning_rate": 4.024400596450823e-05, "loss": 0.7998, "step": 52865 }, { "epoch": 0.59, "learning_rate": 4.024308323736971e-05, "loss": 0.7463, "step": 52870 }, { "epoch": 0.59, "learning_rate": 4.0242160510231204e-05, "loss": 0.7197, "step": 52875 }, { "epoch": 0.59, "learning_rate": 4.024123778309269e-05, "loss": 0.7351, "step": 52880 }, { "epoch": 0.59, "learning_rate": 4.024031505595417e-05, "loss": 0.7662, "step": 52885 }, { "epoch": 0.59, "learning_rate": 4.023939232881566e-05, "loss": 0.7776, "step": 52890 }, { "epoch": 0.59, "learning_rate": 4.0238469601677155e-05, "loss": 0.7452, "step": 52895 }, { "epoch": 0.59, "learning_rate": 4.023754687453864e-05, "loss": 0.7574, "step": 52900 }, { "epoch": 0.59, "learning_rate": 4.0236624147400124e-05, "loss": 0.7351, "step": 52905 }, { "epoch": 0.59, "learning_rate": 4.023570142026161e-05, "loss": 0.7257, "step": 52910 }, { "epoch": 0.59, "learning_rate": 4.02347786931231e-05, "loss": 0.7469, "step": 52915 }, { "epoch": 0.59, "learning_rate": 4.023385596598459e-05, "loss": 0.6787, "step": 52920 }, { "epoch": 0.59, "learning_rate": 4.0232933238846075e-05, "loss": 0.716, "step": 52925 }, { "epoch": 0.59, "learning_rate": 4.023201051170756e-05, "loss": 0.8233, "step": 52930 }, { "epoch": 0.59, "learning_rate": 4.023108778456905e-05, "loss": 0.7285, "step": 52935 }, { "epoch": 0.59, "learning_rate": 4.023016505743054e-05, "loss": 0.6783, "step": 52940 }, { "epoch": 0.59, "learning_rate": 4.022924233029203e-05, "loss": 0.7733, "step": 52945 }, { "epoch": 0.59, "learning_rate": 4.0228319603153515e-05, "loss": 0.7672, "step": 52950 }, { "epoch": 0.59, "learning_rate": 4.0227396876015e-05, "loss": 0.7758, "step": 52955 }, { "epoch": 0.59, "learning_rate": 4.022647414887649e-05, "loss": 0.7189, "step": 52960 }, { "epoch": 0.59, "learning_rate": 4.022555142173798e-05, "loss": 0.7731, "step": 52965 }, { "epoch": 0.59, "learning_rate": 4.0224628694599466e-05, "loss": 0.7841, "step": 52970 }, { "epoch": 0.59, "learning_rate": 4.0223705967460954e-05, "loss": 0.7395, "step": 52975 }, { "epoch": 0.59, "learning_rate": 4.0222783240322435e-05, "loss": 0.7164, "step": 52980 }, { "epoch": 0.59, "learning_rate": 4.022186051318393e-05, "loss": 0.7505, "step": 52985 }, { "epoch": 0.59, "learning_rate": 4.022093778604542e-05, "loss": 0.738, "step": 52990 }, { "epoch": 0.59, "learning_rate": 4.02200150589069e-05, "loss": 0.7108, "step": 52995 }, { "epoch": 0.59, "learning_rate": 4.0219092331768386e-05, "loss": 0.7389, "step": 53000 }, { "epoch": 0.59, "eval_loss": 0.6861677169799805, "eval_runtime": 110.3454, "eval_samples_per_second": 18.125, "eval_steps_per_second": 9.062, "step": 53000 }, { "epoch": 0.59, "learning_rate": 4.021816960462988e-05, "loss": 0.7626, "step": 53005 }, { "epoch": 0.59, "learning_rate": 4.021724687749137e-05, "loss": 0.7471, "step": 53010 }, { "epoch": 0.59, "learning_rate": 4.021632415035285e-05, "loss": 0.7369, "step": 53015 }, { "epoch": 0.59, "learning_rate": 4.021540142321434e-05, "loss": 0.7419, "step": 53020 }, { "epoch": 0.59, "learning_rate": 4.021447869607583e-05, "loss": 0.6816, "step": 53025 }, { "epoch": 0.59, "learning_rate": 4.021355596893731e-05, "loss": 0.7355, "step": 53030 }, { "epoch": 0.59, "learning_rate": 4.02126332417988e-05, "loss": 0.7464, "step": 53035 }, { "epoch": 0.59, "learning_rate": 4.021171051466029e-05, "loss": 0.7327, "step": 53040 }, { "epoch": 0.59, "learning_rate": 4.0210787787521784e-05, "loss": 0.7363, "step": 53045 }, { "epoch": 0.59, "learning_rate": 4.0209865060383265e-05, "loss": 0.6999, "step": 53050 }, { "epoch": 0.59, "learning_rate": 4.020894233324475e-05, "loss": 0.7011, "step": 53055 }, { "epoch": 0.59, "learning_rate": 4.020801960610624e-05, "loss": 0.7025, "step": 53060 }, { "epoch": 0.59, "learning_rate": 4.020709687896773e-05, "loss": 0.7085, "step": 53065 }, { "epoch": 0.59, "learning_rate": 4.0206174151829216e-05, "loss": 0.6854, "step": 53070 }, { "epoch": 0.59, "learning_rate": 4.0205251424690704e-05, "loss": 0.7253, "step": 53075 }, { "epoch": 0.59, "learning_rate": 4.020432869755219e-05, "loss": 0.7376, "step": 53080 }, { "epoch": 0.59, "learning_rate": 4.020340597041368e-05, "loss": 0.6935, "step": 53085 }, { "epoch": 0.59, "learning_rate": 4.020248324327517e-05, "loss": 0.6854, "step": 53090 }, { "epoch": 0.59, "learning_rate": 4.0201560516136655e-05, "loss": 0.757, "step": 53095 }, { "epoch": 0.59, "learning_rate": 4.020063778899814e-05, "loss": 0.7638, "step": 53100 }, { "epoch": 0.59, "learning_rate": 4.0199715061859624e-05, "loss": 0.7878, "step": 53105 }, { "epoch": 0.59, "learning_rate": 4.019879233472112e-05, "loss": 0.7101, "step": 53110 }, { "epoch": 0.59, "learning_rate": 4.019786960758261e-05, "loss": 0.6938, "step": 53115 }, { "epoch": 0.59, "learning_rate": 4.0196946880444095e-05, "loss": 0.7072, "step": 53120 }, { "epoch": 0.59, "learning_rate": 4.0196024153305576e-05, "loss": 0.7202, "step": 53125 }, { "epoch": 0.59, "learning_rate": 4.019510142616707e-05, "loss": 0.7935, "step": 53130 }, { "epoch": 0.59, "learning_rate": 4.019417869902856e-05, "loss": 0.7725, "step": 53135 }, { "epoch": 0.59, "learning_rate": 4.019325597189004e-05, "loss": 0.7578, "step": 53140 }, { "epoch": 0.59, "learning_rate": 4.019233324475153e-05, "loss": 0.704, "step": 53145 }, { "epoch": 0.59, "learning_rate": 4.0191410517613015e-05, "loss": 0.7016, "step": 53150 }, { "epoch": 0.59, "learning_rate": 4.019048779047451e-05, "loss": 0.7652, "step": 53155 }, { "epoch": 0.59, "learning_rate": 4.018956506333599e-05, "loss": 0.7404, "step": 53160 }, { "epoch": 0.59, "learning_rate": 4.018864233619748e-05, "loss": 0.7194, "step": 53165 }, { "epoch": 0.59, "learning_rate": 4.0187719609058966e-05, "loss": 0.6897, "step": 53170 }, { "epoch": 0.59, "learning_rate": 4.018679688192046e-05, "loss": 0.8001, "step": 53175 }, { "epoch": 0.59, "learning_rate": 4.018587415478194e-05, "loss": 0.7692, "step": 53180 }, { "epoch": 0.59, "learning_rate": 4.018495142764343e-05, "loss": 0.7088, "step": 53185 }, { "epoch": 0.59, "learning_rate": 4.018402870050492e-05, "loss": 0.7415, "step": 53190 }, { "epoch": 0.59, "learning_rate": 4.0183105973366406e-05, "loss": 0.6941, "step": 53195 }, { "epoch": 0.59, "learning_rate": 4.018218324622789e-05, "loss": 0.6858, "step": 53200 }, { "epoch": 0.59, "learning_rate": 4.018126051908938e-05, "loss": 0.7726, "step": 53205 }, { "epoch": 0.59, "learning_rate": 4.018033779195087e-05, "loss": 0.7093, "step": 53210 }, { "epoch": 0.59, "learning_rate": 4.017941506481235e-05, "loss": 0.7269, "step": 53215 }, { "epoch": 0.59, "learning_rate": 4.0178492337673845e-05, "loss": 0.797, "step": 53220 }, { "epoch": 0.59, "learning_rate": 4.017756961053533e-05, "loss": 0.7105, "step": 53225 }, { "epoch": 0.59, "learning_rate": 4.017664688339682e-05, "loss": 0.6817, "step": 53230 }, { "epoch": 0.59, "learning_rate": 4.01757241562583e-05, "loss": 0.7289, "step": 53235 }, { "epoch": 0.59, "learning_rate": 4.0174801429119796e-05, "loss": 0.7745, "step": 53240 }, { "epoch": 0.59, "learning_rate": 4.0173878701981284e-05, "loss": 0.7273, "step": 53245 }, { "epoch": 0.59, "learning_rate": 4.017295597484277e-05, "loss": 0.752, "step": 53250 }, { "epoch": 0.59, "learning_rate": 4.017203324770425e-05, "loss": 0.8102, "step": 53255 }, { "epoch": 0.59, "learning_rate": 4.017111052056575e-05, "loss": 0.7165, "step": 53260 }, { "epoch": 0.59, "learning_rate": 4.0170187793427235e-05, "loss": 0.6866, "step": 53265 }, { "epoch": 0.59, "learning_rate": 4.0169265066288716e-05, "loss": 0.7169, "step": 53270 }, { "epoch": 0.59, "learning_rate": 4.0168342339150204e-05, "loss": 0.7117, "step": 53275 }, { "epoch": 0.59, "learning_rate": 4.01674196120117e-05, "loss": 0.7256, "step": 53280 }, { "epoch": 0.59, "learning_rate": 4.016649688487319e-05, "loss": 0.7155, "step": 53285 }, { "epoch": 0.59, "learning_rate": 4.016557415773467e-05, "loss": 0.7326, "step": 53290 }, { "epoch": 0.59, "learning_rate": 4.0164651430596156e-05, "loss": 0.7716, "step": 53295 }, { "epoch": 0.59, "learning_rate": 4.0163728703457644e-05, "loss": 0.7177, "step": 53300 }, { "epoch": 0.59, "learning_rate": 4.016280597631913e-05, "loss": 0.7812, "step": 53305 }, { "epoch": 0.59, "learning_rate": 4.016188324918062e-05, "loss": 0.7045, "step": 53310 }, { "epoch": 0.59, "learning_rate": 4.016096052204211e-05, "loss": 0.7714, "step": 53315 }, { "epoch": 0.59, "learning_rate": 4.0160037794903595e-05, "loss": 0.7438, "step": 53320 }, { "epoch": 0.59, "learning_rate": 4.015911506776508e-05, "loss": 0.7069, "step": 53325 }, { "epoch": 0.59, "learning_rate": 4.015819234062657e-05, "loss": 0.732, "step": 53330 }, { "epoch": 0.59, "learning_rate": 4.015726961348806e-05, "loss": 0.7003, "step": 53335 }, { "epoch": 0.59, "learning_rate": 4.0156346886349546e-05, "loss": 0.7444, "step": 53340 }, { "epoch": 0.59, "learning_rate": 4.0155424159211034e-05, "loss": 0.7263, "step": 53345 }, { "epoch": 0.59, "learning_rate": 4.015450143207252e-05, "loss": 0.803, "step": 53350 }, { "epoch": 0.59, "learning_rate": 4.015357870493401e-05, "loss": 0.7537, "step": 53355 }, { "epoch": 0.59, "learning_rate": 4.01526559777955e-05, "loss": 0.7438, "step": 53360 }, { "epoch": 0.59, "learning_rate": 4.015173325065698e-05, "loss": 0.6983, "step": 53365 }, { "epoch": 0.59, "learning_rate": 4.0150810523518473e-05, "loss": 0.757, "step": 53370 }, { "epoch": 0.59, "learning_rate": 4.014988779637996e-05, "loss": 0.617, "step": 53375 }, { "epoch": 0.59, "learning_rate": 4.014896506924144e-05, "loss": 0.7366, "step": 53380 }, { "epoch": 0.59, "learning_rate": 4.014804234210293e-05, "loss": 0.7155, "step": 53385 }, { "epoch": 0.59, "learning_rate": 4.0147119614964425e-05, "loss": 0.7221, "step": 53390 }, { "epoch": 0.59, "learning_rate": 4.014619688782591e-05, "loss": 0.7003, "step": 53395 }, { "epoch": 0.59, "learning_rate": 4.0145274160687394e-05, "loss": 0.7521, "step": 53400 }, { "epoch": 0.59, "learning_rate": 4.014435143354888e-05, "loss": 0.7011, "step": 53405 }, { "epoch": 0.59, "learning_rate": 4.0143428706410376e-05, "loss": 0.6983, "step": 53410 }, { "epoch": 0.59, "learning_rate": 4.014250597927186e-05, "loss": 0.7082, "step": 53415 }, { "epoch": 0.59, "learning_rate": 4.0141583252133345e-05, "loss": 0.7386, "step": 53420 }, { "epoch": 0.59, "learning_rate": 4.014066052499483e-05, "loss": 0.7502, "step": 53425 }, { "epoch": 0.59, "learning_rate": 4.013973779785633e-05, "loss": 0.7475, "step": 53430 }, { "epoch": 0.59, "learning_rate": 4.013881507071781e-05, "loss": 0.7437, "step": 53435 }, { "epoch": 0.59, "learning_rate": 4.0137892343579297e-05, "loss": 0.7448, "step": 53440 }, { "epoch": 0.59, "learning_rate": 4.0136969616440784e-05, "loss": 0.7512, "step": 53445 }, { "epoch": 0.59, "learning_rate": 4.013604688930227e-05, "loss": 0.7545, "step": 53450 }, { "epoch": 0.59, "learning_rate": 4.013512416216376e-05, "loss": 0.7058, "step": 53455 }, { "epoch": 0.59, "learning_rate": 4.013420143502525e-05, "loss": 0.6725, "step": 53460 }, { "epoch": 0.59, "learning_rate": 4.0133278707886736e-05, "loss": 0.7202, "step": 53465 }, { "epoch": 0.59, "learning_rate": 4.0132355980748224e-05, "loss": 0.7437, "step": 53470 }, { "epoch": 0.59, "learning_rate": 4.013143325360971e-05, "loss": 0.7513, "step": 53475 }, { "epoch": 0.59, "learning_rate": 4.01305105264712e-05, "loss": 0.7306, "step": 53480 }, { "epoch": 0.59, "learning_rate": 4.012958779933269e-05, "loss": 0.7638, "step": 53485 }, { "epoch": 0.59, "learning_rate": 4.012866507219417e-05, "loss": 0.7168, "step": 53490 }, { "epoch": 0.59, "learning_rate": 4.012774234505566e-05, "loss": 0.7156, "step": 53495 }, { "epoch": 0.59, "learning_rate": 4.012681961791715e-05, "loss": 0.7828, "step": 53500 }, { "epoch": 0.59, "learning_rate": 4.012589689077864e-05, "loss": 0.7518, "step": 53505 }, { "epoch": 0.59, "learning_rate": 4.012497416364012e-05, "loss": 0.7282, "step": 53510 }, { "epoch": 0.59, "learning_rate": 4.012405143650161e-05, "loss": 0.7344, "step": 53515 }, { "epoch": 0.59, "learning_rate": 4.01231287093631e-05, "loss": 0.7809, "step": 53520 }, { "epoch": 0.59, "learning_rate": 4.012220598222458e-05, "loss": 0.7009, "step": 53525 }, { "epoch": 0.59, "learning_rate": 4.012128325508607e-05, "loss": 0.6485, "step": 53530 }, { "epoch": 0.59, "learning_rate": 4.012036052794756e-05, "loss": 0.6968, "step": 53535 }, { "epoch": 0.59, "learning_rate": 4.0119437800809054e-05, "loss": 0.8128, "step": 53540 }, { "epoch": 0.59, "learning_rate": 4.0118515073670535e-05, "loss": 0.745, "step": 53545 }, { "epoch": 0.59, "learning_rate": 4.011759234653202e-05, "loss": 0.7717, "step": 53550 }, { "epoch": 0.59, "learning_rate": 4.011666961939351e-05, "loss": 0.7405, "step": 53555 }, { "epoch": 0.59, "learning_rate": 4.0115746892255005e-05, "loss": 0.7248, "step": 53560 }, { "epoch": 0.59, "learning_rate": 4.0114824165116486e-05, "loss": 0.75, "step": 53565 }, { "epoch": 0.59, "learning_rate": 4.0113901437977974e-05, "loss": 0.6866, "step": 53570 }, { "epoch": 0.59, "learning_rate": 4.011297871083946e-05, "loss": 0.7179, "step": 53575 }, { "epoch": 0.59, "learning_rate": 4.011205598370095e-05, "loss": 0.7571, "step": 53580 }, { "epoch": 0.59, "learning_rate": 4.011113325656244e-05, "loss": 0.7069, "step": 53585 }, { "epoch": 0.59, "learning_rate": 4.0110210529423925e-05, "loss": 0.7138, "step": 53590 }, { "epoch": 0.59, "learning_rate": 4.010928780228541e-05, "loss": 0.7844, "step": 53595 }, { "epoch": 0.59, "learning_rate": 4.0108365075146894e-05, "loss": 0.7625, "step": 53600 }, { "epoch": 0.59, "learning_rate": 4.010744234800839e-05, "loss": 0.6588, "step": 53605 }, { "epoch": 0.59, "learning_rate": 4.010651962086988e-05, "loss": 0.7062, "step": 53610 }, { "epoch": 0.59, "learning_rate": 4.0105596893731364e-05, "loss": 0.7395, "step": 53615 }, { "epoch": 0.59, "learning_rate": 4.0104674166592846e-05, "loss": 0.7082, "step": 53620 }, { "epoch": 0.59, "learning_rate": 4.010375143945434e-05, "loss": 0.7431, "step": 53625 }, { "epoch": 0.59, "learning_rate": 4.010282871231583e-05, "loss": 0.7286, "step": 53630 }, { "epoch": 0.59, "learning_rate": 4.0101905985177316e-05, "loss": 0.7264, "step": 53635 }, { "epoch": 0.59, "learning_rate": 4.01009832580388e-05, "loss": 0.7096, "step": 53640 }, { "epoch": 0.59, "learning_rate": 4.010006053090029e-05, "loss": 0.7285, "step": 53645 }, { "epoch": 0.59, "learning_rate": 4.009913780376178e-05, "loss": 0.7594, "step": 53650 }, { "epoch": 0.59, "learning_rate": 4.009821507662326e-05, "loss": 0.7648, "step": 53655 }, { "epoch": 0.59, "learning_rate": 4.009729234948475e-05, "loss": 0.7344, "step": 53660 }, { "epoch": 0.59, "learning_rate": 4.0096369622346236e-05, "loss": 0.7009, "step": 53665 }, { "epoch": 0.59, "learning_rate": 4.009544689520773e-05, "loss": 0.7852, "step": 53670 }, { "epoch": 0.59, "learning_rate": 4.009452416806921e-05, "loss": 0.7248, "step": 53675 }, { "epoch": 0.59, "learning_rate": 4.00936014409307e-05, "loss": 0.7679, "step": 53680 }, { "epoch": 0.59, "learning_rate": 4.009267871379219e-05, "loss": 0.7558, "step": 53685 }, { "epoch": 0.59, "learning_rate": 4.0091755986653675e-05, "loss": 0.7184, "step": 53690 }, { "epoch": 0.59, "learning_rate": 4.009083325951516e-05, "loss": 0.7202, "step": 53695 }, { "epoch": 0.59, "learning_rate": 4.008991053237665e-05, "loss": 0.7375, "step": 53700 }, { "epoch": 0.59, "learning_rate": 4.008898780523814e-05, "loss": 0.7702, "step": 53705 }, { "epoch": 0.59, "learning_rate": 4.008806507809963e-05, "loss": 0.751, "step": 53710 }, { "epoch": 0.59, "learning_rate": 4.0087142350961115e-05, "loss": 0.7839, "step": 53715 }, { "epoch": 0.59, "learning_rate": 4.00862196238226e-05, "loss": 0.7704, "step": 53720 }, { "epoch": 0.59, "learning_rate": 4.008529689668409e-05, "loss": 0.7027, "step": 53725 }, { "epoch": 0.59, "learning_rate": 4.008437416954558e-05, "loss": 0.7122, "step": 53730 }, { "epoch": 0.59, "learning_rate": 4.0083451442407066e-05, "loss": 0.7495, "step": 53735 }, { "epoch": 0.6, "learning_rate": 4.0082528715268554e-05, "loss": 0.8281, "step": 53740 }, { "epoch": 0.6, "learning_rate": 4.008160598813004e-05, "loss": 0.7406, "step": 53745 }, { "epoch": 0.6, "learning_rate": 4.008068326099152e-05, "loss": 0.7037, "step": 53750 }, { "epoch": 0.6, "learning_rate": 4.007976053385302e-05, "loss": 0.741, "step": 53755 }, { "epoch": 0.6, "learning_rate": 4.0078837806714505e-05, "loss": 0.7305, "step": 53760 }, { "epoch": 0.6, "learning_rate": 4.0077915079575986e-05, "loss": 0.7898, "step": 53765 }, { "epoch": 0.6, "learning_rate": 4.0076992352437474e-05, "loss": 0.738, "step": 53770 }, { "epoch": 0.6, "learning_rate": 4.007606962529897e-05, "loss": 0.7599, "step": 53775 }, { "epoch": 0.6, "learning_rate": 4.007514689816046e-05, "loss": 0.7366, "step": 53780 }, { "epoch": 0.6, "learning_rate": 4.007422417102194e-05, "loss": 0.7496, "step": 53785 }, { "epoch": 0.6, "learning_rate": 4.0073301443883426e-05, "loss": 0.7725, "step": 53790 }, { "epoch": 0.6, "learning_rate": 4.007237871674492e-05, "loss": 0.7663, "step": 53795 }, { "epoch": 0.6, "learning_rate": 4.00714559896064e-05, "loss": 0.7588, "step": 53800 }, { "epoch": 0.6, "learning_rate": 4.007053326246789e-05, "loss": 0.6599, "step": 53805 }, { "epoch": 0.6, "learning_rate": 4.006961053532938e-05, "loss": 0.7262, "step": 53810 }, { "epoch": 0.6, "learning_rate": 4.0068687808190865e-05, "loss": 0.7172, "step": 53815 }, { "epoch": 0.6, "learning_rate": 4.006776508105235e-05, "loss": 0.7099, "step": 53820 }, { "epoch": 0.6, "learning_rate": 4.006684235391384e-05, "loss": 0.7326, "step": 53825 }, { "epoch": 0.6, "learning_rate": 4.006591962677533e-05, "loss": 0.6922, "step": 53830 }, { "epoch": 0.6, "learning_rate": 4.0064996899636816e-05, "loss": 0.7686, "step": 53835 }, { "epoch": 0.6, "learning_rate": 4.0064074172498304e-05, "loss": 0.7114, "step": 53840 }, { "epoch": 0.6, "learning_rate": 4.006315144535979e-05, "loss": 0.7384, "step": 53845 }, { "epoch": 0.6, "learning_rate": 4.006222871822128e-05, "loss": 0.7353, "step": 53850 }, { "epoch": 0.6, "learning_rate": 4.006130599108277e-05, "loss": 0.7265, "step": 53855 }, { "epoch": 0.6, "learning_rate": 4.0060383263944256e-05, "loss": 0.7865, "step": 53860 }, { "epoch": 0.6, "learning_rate": 4.005946053680574e-05, "loss": 0.7412, "step": 53865 }, { "epoch": 0.6, "learning_rate": 4.005853780966723e-05, "loss": 0.7414, "step": 53870 }, { "epoch": 0.6, "learning_rate": 4.005761508252871e-05, "loss": 0.7636, "step": 53875 }, { "epoch": 0.6, "learning_rate": 4.005669235539021e-05, "loss": 0.7288, "step": 53880 }, { "epoch": 0.6, "learning_rate": 4.0055769628251695e-05, "loss": 0.6915, "step": 53885 }, { "epoch": 0.6, "learning_rate": 4.005484690111318e-05, "loss": 0.7213, "step": 53890 }, { "epoch": 0.6, "learning_rate": 4.0053924173974664e-05, "loss": 0.722, "step": 53895 }, { "epoch": 0.6, "learning_rate": 4.005300144683615e-05, "loss": 0.7502, "step": 53900 }, { "epoch": 0.6, "learning_rate": 4.0052078719697646e-05, "loss": 0.7315, "step": 53905 }, { "epoch": 0.6, "learning_rate": 4.0051155992559134e-05, "loss": 0.7523, "step": 53910 }, { "epoch": 0.6, "learning_rate": 4.0050233265420615e-05, "loss": 0.657, "step": 53915 }, { "epoch": 0.6, "learning_rate": 4.00493105382821e-05, "loss": 0.6933, "step": 53920 }, { "epoch": 0.6, "learning_rate": 4.00483878111436e-05, "loss": 0.7801, "step": 53925 }, { "epoch": 0.6, "learning_rate": 4.004746508400508e-05, "loss": 0.7127, "step": 53930 }, { "epoch": 0.6, "learning_rate": 4.0046542356866566e-05, "loss": 0.7755, "step": 53935 }, { "epoch": 0.6, "learning_rate": 4.0045619629728054e-05, "loss": 0.7139, "step": 53940 }, { "epoch": 0.6, "learning_rate": 4.004469690258955e-05, "loss": 0.6739, "step": 53945 }, { "epoch": 0.6, "learning_rate": 4.004377417545103e-05, "loss": 0.7205, "step": 53950 }, { "epoch": 0.6, "learning_rate": 4.004285144831252e-05, "loss": 0.7624, "step": 53955 }, { "epoch": 0.6, "learning_rate": 4.0041928721174006e-05, "loss": 0.7652, "step": 53960 }, { "epoch": 0.6, "learning_rate": 4.0041005994035494e-05, "loss": 0.7585, "step": 53965 }, { "epoch": 0.6, "learning_rate": 4.004008326689698e-05, "loss": 0.7131, "step": 53970 }, { "epoch": 0.6, "learning_rate": 4.003916053975847e-05, "loss": 0.7874, "step": 53975 }, { "epoch": 0.6, "learning_rate": 4.003823781261996e-05, "loss": 0.7922, "step": 53980 }, { "epoch": 0.6, "learning_rate": 4.0037315085481445e-05, "loss": 0.7079, "step": 53985 }, { "epoch": 0.6, "learning_rate": 4.003639235834293e-05, "loss": 0.7469, "step": 53990 }, { "epoch": 0.6, "learning_rate": 4.003546963120442e-05, "loss": 0.7781, "step": 53995 }, { "epoch": 0.6, "learning_rate": 4.003454690406591e-05, "loss": 0.7529, "step": 54000 }, { "epoch": 0.6, "eval_loss": 0.7174919247627258, "eval_runtime": 108.8297, "eval_samples_per_second": 18.377, "eval_steps_per_second": 9.189, "step": 54000 }, { "epoch": 0.6, "learning_rate": 4.003362417692739e-05, "loss": 0.7404, "step": 54005 }, { "epoch": 0.6, "learning_rate": 4.0032701449788884e-05, "loss": 0.7045, "step": 54010 }, { "epoch": 0.6, "learning_rate": 4.003177872265037e-05, "loss": 0.7854, "step": 54015 }, { "epoch": 0.6, "learning_rate": 4.003085599551186e-05, "loss": 0.7384, "step": 54020 }, { "epoch": 0.6, "learning_rate": 4.002993326837334e-05, "loss": 0.7817, "step": 54025 }, { "epoch": 0.6, "learning_rate": 4.0029010541234836e-05, "loss": 0.7338, "step": 54030 }, { "epoch": 0.6, "learning_rate": 4.0028087814096323e-05, "loss": 0.7242, "step": 54035 }, { "epoch": 0.6, "learning_rate": 4.0027165086957805e-05, "loss": 0.7357, "step": 54040 }, { "epoch": 0.6, "learning_rate": 4.002624235981929e-05, "loss": 0.773, "step": 54045 }, { "epoch": 0.6, "learning_rate": 4.002531963268078e-05, "loss": 0.7668, "step": 54050 }, { "epoch": 0.6, "learning_rate": 4.0024396905542275e-05, "loss": 0.7166, "step": 54055 }, { "epoch": 0.6, "learning_rate": 4.0023474178403756e-05, "loss": 0.6688, "step": 54060 }, { "epoch": 0.6, "learning_rate": 4.0022551451265244e-05, "loss": 0.7542, "step": 54065 }, { "epoch": 0.6, "learning_rate": 4.002162872412673e-05, "loss": 0.738, "step": 54070 }, { "epoch": 0.6, "learning_rate": 4.002070599698822e-05, "loss": 0.7605, "step": 54075 }, { "epoch": 0.6, "learning_rate": 4.001978326984971e-05, "loss": 0.7775, "step": 54080 }, { "epoch": 0.6, "learning_rate": 4.0018860542711195e-05, "loss": 0.73, "step": 54085 }, { "epoch": 0.6, "learning_rate": 4.001793781557268e-05, "loss": 0.77, "step": 54090 }, { "epoch": 0.6, "learning_rate": 4.001701508843417e-05, "loss": 0.7537, "step": 54095 }, { "epoch": 0.6, "learning_rate": 4.001609236129566e-05, "loss": 0.7355, "step": 54100 }, { "epoch": 0.6, "learning_rate": 4.0015169634157147e-05, "loss": 0.7277, "step": 54105 }, { "epoch": 0.6, "learning_rate": 4.0014246907018634e-05, "loss": 0.7141, "step": 54110 }, { "epoch": 0.6, "learning_rate": 4.001332417988012e-05, "loss": 0.7253, "step": 54115 }, { "epoch": 0.6, "learning_rate": 4.001240145274161e-05, "loss": 0.6989, "step": 54120 }, { "epoch": 0.6, "learning_rate": 4.00114787256031e-05, "loss": 0.7635, "step": 54125 }, { "epoch": 0.6, "learning_rate": 4.0010555998464586e-05, "loss": 0.7189, "step": 54130 }, { "epoch": 0.6, "learning_rate": 4.000963327132607e-05, "loss": 0.716, "step": 54135 }, { "epoch": 0.6, "learning_rate": 4.000871054418756e-05, "loss": 0.7808, "step": 54140 }, { "epoch": 0.6, "learning_rate": 4.000778781704905e-05, "loss": 0.7441, "step": 54145 }, { "epoch": 0.6, "learning_rate": 4.000686508991053e-05, "loss": 0.7546, "step": 54150 }, { "epoch": 0.6, "learning_rate": 4.000594236277202e-05, "loss": 0.7729, "step": 54155 }, { "epoch": 0.6, "learning_rate": 4.000501963563351e-05, "loss": 0.6683, "step": 54160 }, { "epoch": 0.6, "learning_rate": 4.0004096908495e-05, "loss": 0.7136, "step": 54165 }, { "epoch": 0.6, "learning_rate": 4.000317418135648e-05, "loss": 0.7767, "step": 54170 }, { "epoch": 0.6, "learning_rate": 4.000225145421797e-05, "loss": 0.747, "step": 54175 }, { "epoch": 0.6, "learning_rate": 4.0001328727079464e-05, "loss": 0.7128, "step": 54180 }, { "epoch": 0.6, "learning_rate": 4.0000405999940945e-05, "loss": 0.7074, "step": 54185 }, { "epoch": 0.6, "learning_rate": 3.999948327280243e-05, "loss": 0.7464, "step": 54190 }, { "epoch": 0.6, "learning_rate": 3.999856054566392e-05, "loss": 0.7606, "step": 54195 }, { "epoch": 0.6, "learning_rate": 3.999763781852541e-05, "loss": 0.7615, "step": 54200 }, { "epoch": 0.6, "learning_rate": 3.99967150913869e-05, "loss": 0.7114, "step": 54205 }, { "epoch": 0.6, "learning_rate": 3.9995792364248385e-05, "loss": 0.7248, "step": 54210 }, { "epoch": 0.6, "learning_rate": 3.999486963710987e-05, "loss": 0.7682, "step": 54215 }, { "epoch": 0.6, "learning_rate": 3.999394690997136e-05, "loss": 0.7674, "step": 54220 }, { "epoch": 0.6, "learning_rate": 3.999302418283285e-05, "loss": 0.7258, "step": 54225 }, { "epoch": 0.6, "learning_rate": 3.9992101455694336e-05, "loss": 0.7386, "step": 54230 }, { "epoch": 0.6, "learning_rate": 3.9991178728555824e-05, "loss": 0.7813, "step": 54235 }, { "epoch": 0.6, "learning_rate": 3.999025600141731e-05, "loss": 0.7525, "step": 54240 }, { "epoch": 0.6, "learning_rate": 3.99893332742788e-05, "loss": 0.7467, "step": 54245 }, { "epoch": 0.6, "learning_rate": 3.998841054714029e-05, "loss": 0.7118, "step": 54250 }, { "epoch": 0.6, "learning_rate": 3.9987487820001775e-05, "loss": 0.7107, "step": 54255 }, { "epoch": 0.6, "learning_rate": 3.9986565092863256e-05, "loss": 0.724, "step": 54260 }, { "epoch": 0.6, "learning_rate": 3.998564236572475e-05, "loss": 0.6964, "step": 54265 }, { "epoch": 0.6, "learning_rate": 3.998471963858624e-05, "loss": 0.7503, "step": 54270 }, { "epoch": 0.6, "learning_rate": 3.998379691144773e-05, "loss": 0.7233, "step": 54275 }, { "epoch": 0.6, "learning_rate": 3.998287418430921e-05, "loss": 0.6885, "step": 54280 }, { "epoch": 0.6, "learning_rate": 3.9981951457170696e-05, "loss": 0.7336, "step": 54285 }, { "epoch": 0.6, "learning_rate": 3.998102873003219e-05, "loss": 0.741, "step": 54290 }, { "epoch": 0.6, "learning_rate": 3.998010600289368e-05, "loss": 0.7714, "step": 54295 }, { "epoch": 0.6, "learning_rate": 3.997918327575516e-05, "loss": 0.6839, "step": 54300 }, { "epoch": 0.6, "learning_rate": 3.997826054861665e-05, "loss": 0.7592, "step": 54305 }, { "epoch": 0.6, "learning_rate": 3.997733782147814e-05, "loss": 0.7291, "step": 54310 }, { "epoch": 0.6, "learning_rate": 3.997641509433962e-05, "loss": 0.8349, "step": 54315 }, { "epoch": 0.6, "learning_rate": 3.997549236720111e-05, "loss": 0.7214, "step": 54320 }, { "epoch": 0.6, "learning_rate": 3.99745696400626e-05, "loss": 0.7574, "step": 54325 }, { "epoch": 0.6, "learning_rate": 3.997364691292409e-05, "loss": 0.7238, "step": 54330 }, { "epoch": 0.6, "learning_rate": 3.9972724185785574e-05, "loss": 0.7792, "step": 54335 }, { "epoch": 0.6, "learning_rate": 3.997180145864706e-05, "loss": 0.7103, "step": 54340 }, { "epoch": 0.6, "learning_rate": 3.997087873150855e-05, "loss": 0.7358, "step": 54345 }, { "epoch": 0.6, "learning_rate": 3.996995600437004e-05, "loss": 0.6863, "step": 54350 }, { "epoch": 0.6, "learning_rate": 3.9969033277231525e-05, "loss": 0.7172, "step": 54355 }, { "epoch": 0.6, "learning_rate": 3.996811055009301e-05, "loss": 0.7266, "step": 54360 }, { "epoch": 0.6, "learning_rate": 3.99671878229545e-05, "loss": 0.8032, "step": 54365 }, { "epoch": 0.6, "learning_rate": 3.996626509581599e-05, "loss": 0.7361, "step": 54370 }, { "epoch": 0.6, "learning_rate": 3.996534236867748e-05, "loss": 0.7474, "step": 54375 }, { "epoch": 0.6, "learning_rate": 3.9964419641538965e-05, "loss": 0.6702, "step": 54380 }, { "epoch": 0.6, "learning_rate": 3.996349691440045e-05, "loss": 0.698, "step": 54385 }, { "epoch": 0.6, "learning_rate": 3.9962574187261934e-05, "loss": 0.7741, "step": 54390 }, { "epoch": 0.6, "learning_rate": 3.996165146012343e-05, "loss": 0.7232, "step": 54395 }, { "epoch": 0.6, "learning_rate": 3.9960728732984916e-05, "loss": 0.7119, "step": 54400 }, { "epoch": 0.6, "learning_rate": 3.9959806005846404e-05, "loss": 0.6901, "step": 54405 }, { "epoch": 0.6, "learning_rate": 3.9958883278707885e-05, "loss": 0.758, "step": 54410 }, { "epoch": 0.6, "learning_rate": 3.995796055156938e-05, "loss": 0.7727, "step": 54415 }, { "epoch": 0.6, "learning_rate": 3.995703782443087e-05, "loss": 0.7544, "step": 54420 }, { "epoch": 0.6, "learning_rate": 3.995611509729235e-05, "loss": 0.7419, "step": 54425 }, { "epoch": 0.6, "learning_rate": 3.9955192370153836e-05, "loss": 0.7506, "step": 54430 }, { "epoch": 0.6, "learning_rate": 3.9954269643015324e-05, "loss": 0.7463, "step": 54435 }, { "epoch": 0.6, "learning_rate": 3.995334691587682e-05, "loss": 0.7715, "step": 54440 }, { "epoch": 0.6, "learning_rate": 3.99524241887383e-05, "loss": 0.7211, "step": 54445 }, { "epoch": 0.6, "learning_rate": 3.995150146159979e-05, "loss": 0.7312, "step": 54450 }, { "epoch": 0.6, "learning_rate": 3.9950578734461276e-05, "loss": 0.8261, "step": 54455 }, { "epoch": 0.6, "learning_rate": 3.9949656007322763e-05, "loss": 0.7616, "step": 54460 }, { "epoch": 0.6, "learning_rate": 3.994873328018425e-05, "loss": 0.7112, "step": 54465 }, { "epoch": 0.6, "learning_rate": 3.994781055304574e-05, "loss": 0.7703, "step": 54470 }, { "epoch": 0.6, "learning_rate": 3.994688782590723e-05, "loss": 0.7616, "step": 54475 }, { "epoch": 0.6, "learning_rate": 3.9945965098768715e-05, "loss": 0.741, "step": 54480 }, { "epoch": 0.6, "learning_rate": 3.99450423716302e-05, "loss": 0.76, "step": 54485 }, { "epoch": 0.6, "learning_rate": 3.994411964449169e-05, "loss": 0.7475, "step": 54490 }, { "epoch": 0.6, "learning_rate": 3.994319691735318e-05, "loss": 0.8051, "step": 54495 }, { "epoch": 0.6, "learning_rate": 3.994227419021466e-05, "loss": 0.7758, "step": 54500 }, { "epoch": 0.6, "learning_rate": 3.9941351463076154e-05, "loss": 0.7707, "step": 54505 }, { "epoch": 0.6, "learning_rate": 3.994042873593764e-05, "loss": 0.7383, "step": 54510 }, { "epoch": 0.6, "learning_rate": 3.993950600879913e-05, "loss": 0.7137, "step": 54515 }, { "epoch": 0.6, "learning_rate": 3.993858328166061e-05, "loss": 0.7426, "step": 54520 }, { "epoch": 0.6, "learning_rate": 3.9937660554522106e-05, "loss": 0.8067, "step": 54525 }, { "epoch": 0.6, "learning_rate": 3.993673782738359e-05, "loss": 0.7722, "step": 54530 }, { "epoch": 0.6, "learning_rate": 3.9935815100245074e-05, "loss": 0.7759, "step": 54535 }, { "epoch": 0.6, "learning_rate": 3.993489237310656e-05, "loss": 0.7149, "step": 54540 }, { "epoch": 0.6, "learning_rate": 3.993396964596806e-05, "loss": 0.7402, "step": 54545 }, { "epoch": 0.6, "learning_rate": 3.9933046918829545e-05, "loss": 0.773, "step": 54550 }, { "epoch": 0.6, "learning_rate": 3.9932124191691026e-05, "loss": 0.7188, "step": 54555 }, { "epoch": 0.6, "learning_rate": 3.9931201464552514e-05, "loss": 0.79, "step": 54560 }, { "epoch": 0.6, "learning_rate": 3.993027873741401e-05, "loss": 0.7642, "step": 54565 }, { "epoch": 0.6, "learning_rate": 3.992935601027549e-05, "loss": 0.742, "step": 54570 }, { "epoch": 0.6, "learning_rate": 3.992843328313698e-05, "loss": 0.7228, "step": 54575 }, { "epoch": 0.6, "learning_rate": 3.9927510555998465e-05, "loss": 0.773, "step": 54580 }, { "epoch": 0.6, "learning_rate": 3.992658782885995e-05, "loss": 0.7541, "step": 54585 }, { "epoch": 0.6, "learning_rate": 3.992566510172144e-05, "loss": 0.7703, "step": 54590 }, { "epoch": 0.6, "learning_rate": 3.992474237458293e-05, "loss": 0.7975, "step": 54595 }, { "epoch": 0.6, "learning_rate": 3.9923819647444416e-05, "loss": 0.708, "step": 54600 }, { "epoch": 0.6, "learning_rate": 3.9922896920305904e-05, "loss": 0.7275, "step": 54605 }, { "epoch": 0.6, "learning_rate": 3.992197419316739e-05, "loss": 0.7259, "step": 54610 }, { "epoch": 0.6, "learning_rate": 3.992105146602888e-05, "loss": 0.7134, "step": 54615 }, { "epoch": 0.6, "learning_rate": 3.992012873889037e-05, "loss": 0.7334, "step": 54620 }, { "epoch": 0.6, "learning_rate": 3.9919206011751856e-05, "loss": 0.7817, "step": 54625 }, { "epoch": 0.6, "learning_rate": 3.9918283284613344e-05, "loss": 0.6867, "step": 54630 }, { "epoch": 0.6, "learning_rate": 3.991736055747483e-05, "loss": 0.7431, "step": 54635 }, { "epoch": 0.61, "learning_rate": 3.991643783033632e-05, "loss": 0.761, "step": 54640 }, { "epoch": 0.61, "learning_rate": 3.99155151031978e-05, "loss": 0.734, "step": 54645 }, { "epoch": 0.61, "learning_rate": 3.991459237605929e-05, "loss": 0.6775, "step": 54650 }, { "epoch": 0.61, "learning_rate": 3.991366964892078e-05, "loss": 0.7319, "step": 54655 }, { "epoch": 0.61, "learning_rate": 3.991274692178227e-05, "loss": 0.7639, "step": 54660 }, { "epoch": 0.61, "learning_rate": 3.991182419464375e-05, "loss": 0.7194, "step": 54665 }, { "epoch": 0.61, "learning_rate": 3.991090146750524e-05, "loss": 0.6901, "step": 54670 }, { "epoch": 0.61, "learning_rate": 3.9909978740366734e-05, "loss": 0.7611, "step": 54675 }, { "epoch": 0.61, "learning_rate": 3.990905601322822e-05, "loss": 0.7477, "step": 54680 }, { "epoch": 0.61, "learning_rate": 3.99081332860897e-05, "loss": 0.7746, "step": 54685 }, { "epoch": 0.61, "learning_rate": 3.990721055895119e-05, "loss": 0.7371, "step": 54690 }, { "epoch": 0.61, "learning_rate": 3.9906287831812686e-05, "loss": 0.7279, "step": 54695 }, { "epoch": 0.61, "learning_rate": 3.990536510467417e-05, "loss": 0.7271, "step": 54700 }, { "epoch": 0.61, "learning_rate": 3.9904442377535655e-05, "loss": 0.7622, "step": 54705 }, { "epoch": 0.61, "learning_rate": 3.990351965039714e-05, "loss": 0.7166, "step": 54710 }, { "epoch": 0.61, "learning_rate": 3.990259692325864e-05, "loss": 0.6817, "step": 54715 }, { "epoch": 0.61, "learning_rate": 3.990167419612012e-05, "loss": 0.6696, "step": 54720 }, { "epoch": 0.61, "learning_rate": 3.9900751468981606e-05, "loss": 0.7481, "step": 54725 }, { "epoch": 0.61, "learning_rate": 3.9899828741843094e-05, "loss": 0.7481, "step": 54730 }, { "epoch": 0.61, "learning_rate": 3.989890601470458e-05, "loss": 0.7174, "step": 54735 }, { "epoch": 0.61, "learning_rate": 3.989798328756607e-05, "loss": 0.7098, "step": 54740 }, { "epoch": 0.61, "learning_rate": 3.989706056042756e-05, "loss": 0.7446, "step": 54745 }, { "epoch": 0.61, "learning_rate": 3.9896137833289045e-05, "loss": 0.7133, "step": 54750 }, { "epoch": 0.61, "learning_rate": 3.989521510615053e-05, "loss": 0.76, "step": 54755 }, { "epoch": 0.61, "learning_rate": 3.989429237901202e-05, "loss": 0.7454, "step": 54760 }, { "epoch": 0.61, "learning_rate": 3.989336965187351e-05, "loss": 0.6904, "step": 54765 }, { "epoch": 0.61, "learning_rate": 3.9892446924734997e-05, "loss": 0.6674, "step": 54770 }, { "epoch": 0.61, "learning_rate": 3.989152419759648e-05, "loss": 0.729, "step": 54775 }, { "epoch": 0.61, "learning_rate": 3.989060147045797e-05, "loss": 0.7364, "step": 54780 }, { "epoch": 0.61, "learning_rate": 3.988967874331946e-05, "loss": 0.741, "step": 54785 }, { "epoch": 0.61, "learning_rate": 3.988875601618095e-05, "loss": 0.6585, "step": 54790 }, { "epoch": 0.61, "learning_rate": 3.988783328904243e-05, "loss": 0.7497, "step": 54795 }, { "epoch": 0.61, "learning_rate": 3.9886910561903924e-05, "loss": 0.6858, "step": 54800 }, { "epoch": 0.61, "learning_rate": 3.988598783476541e-05, "loss": 0.6975, "step": 54805 }, { "epoch": 0.61, "learning_rate": 3.988506510762689e-05, "loss": 0.7372, "step": 54810 }, { "epoch": 0.61, "learning_rate": 3.988414238048838e-05, "loss": 0.7374, "step": 54815 }, { "epoch": 0.61, "learning_rate": 3.988321965334987e-05, "loss": 0.6697, "step": 54820 }, { "epoch": 0.61, "learning_rate": 3.988229692621136e-05, "loss": 0.771, "step": 54825 }, { "epoch": 0.61, "learning_rate": 3.9881374199072844e-05, "loss": 0.7105, "step": 54830 }, { "epoch": 0.61, "learning_rate": 3.988045147193433e-05, "loss": 0.741, "step": 54835 }, { "epoch": 0.61, "learning_rate": 3.987952874479582e-05, "loss": 0.7165, "step": 54840 }, { "epoch": 0.61, "learning_rate": 3.987860601765731e-05, "loss": 0.7395, "step": 54845 }, { "epoch": 0.61, "learning_rate": 3.9877683290518795e-05, "loss": 0.702, "step": 54850 }, { "epoch": 0.61, "learning_rate": 3.987676056338028e-05, "loss": 0.766, "step": 54855 }, { "epoch": 0.61, "learning_rate": 3.987583783624177e-05, "loss": 0.7108, "step": 54860 }, { "epoch": 0.61, "learning_rate": 3.987491510910326e-05, "loss": 0.7334, "step": 54865 }, { "epoch": 0.61, "learning_rate": 3.987399238196475e-05, "loss": 0.8003, "step": 54870 }, { "epoch": 0.61, "learning_rate": 3.9873069654826235e-05, "loss": 0.6687, "step": 54875 }, { "epoch": 0.61, "learning_rate": 3.987214692768772e-05, "loss": 0.7206, "step": 54880 }, { "epoch": 0.61, "learning_rate": 3.9871224200549204e-05, "loss": 0.7204, "step": 54885 }, { "epoch": 0.61, "learning_rate": 3.98703014734107e-05, "loss": 0.7931, "step": 54890 }, { "epoch": 0.61, "learning_rate": 3.9869378746272186e-05, "loss": 0.7136, "step": 54895 }, { "epoch": 0.61, "learning_rate": 3.9868456019133674e-05, "loss": 0.7352, "step": 54900 }, { "epoch": 0.61, "learning_rate": 3.9867533291995155e-05, "loss": 0.7214, "step": 54905 }, { "epoch": 0.61, "learning_rate": 3.986661056485665e-05, "loss": 0.6794, "step": 54910 }, { "epoch": 0.61, "learning_rate": 3.986568783771814e-05, "loss": 0.7124, "step": 54915 }, { "epoch": 0.61, "learning_rate": 3.986476511057962e-05, "loss": 0.8161, "step": 54920 }, { "epoch": 0.61, "learning_rate": 3.9863842383441106e-05, "loss": 0.6844, "step": 54925 }, { "epoch": 0.61, "learning_rate": 3.98629196563026e-05, "loss": 0.7219, "step": 54930 }, { "epoch": 0.61, "learning_rate": 3.986199692916409e-05, "loss": 0.6779, "step": 54935 }, { "epoch": 0.61, "learning_rate": 3.986107420202557e-05, "loss": 0.7131, "step": 54940 }, { "epoch": 0.61, "learning_rate": 3.986015147488706e-05, "loss": 0.7053, "step": 54945 }, { "epoch": 0.61, "learning_rate": 3.985922874774855e-05, "loss": 0.803, "step": 54950 }, { "epoch": 0.61, "learning_rate": 3.9858306020610033e-05, "loss": 0.6669, "step": 54955 }, { "epoch": 0.61, "learning_rate": 3.985738329347152e-05, "loss": 0.6732, "step": 54960 }, { "epoch": 0.61, "learning_rate": 3.985646056633301e-05, "loss": 0.7157, "step": 54965 }, { "epoch": 0.61, "learning_rate": 3.98555378391945e-05, "loss": 0.7356, "step": 54970 }, { "epoch": 0.61, "learning_rate": 3.9854615112055985e-05, "loss": 0.7995, "step": 54975 }, { "epoch": 0.61, "learning_rate": 3.985369238491747e-05, "loss": 0.7395, "step": 54980 }, { "epoch": 0.61, "learning_rate": 3.985276965777896e-05, "loss": 0.7497, "step": 54985 }, { "epoch": 0.61, "learning_rate": 3.985184693064045e-05, "loss": 0.7584, "step": 54990 }, { "epoch": 0.61, "learning_rate": 3.9850924203501936e-05, "loss": 0.7315, "step": 54995 }, { "epoch": 0.61, "learning_rate": 3.9850001476363424e-05, "loss": 0.7345, "step": 55000 }, { "epoch": 0.61, "eval_loss": 0.6969789862632751, "eval_runtime": 69.5787, "eval_samples_per_second": 28.744, "eval_steps_per_second": 14.372, "step": 55000 }, { "epoch": 0.61, "learning_rate": 3.984907874922491e-05, "loss": 0.8117, "step": 55005 }, { "epoch": 0.61, "learning_rate": 3.98481560220864e-05, "loss": 0.7208, "step": 55010 }, { "epoch": 0.61, "learning_rate": 3.984723329494789e-05, "loss": 0.7481, "step": 55015 }, { "epoch": 0.61, "learning_rate": 3.9846310567809375e-05, "loss": 0.7548, "step": 55020 }, { "epoch": 0.61, "learning_rate": 3.984538784067086e-05, "loss": 0.6793, "step": 55025 }, { "epoch": 0.61, "learning_rate": 3.9844465113532344e-05, "loss": 0.736, "step": 55030 }, { "epoch": 0.61, "learning_rate": 3.984354238639383e-05, "loss": 0.7417, "step": 55035 }, { "epoch": 0.61, "learning_rate": 3.984261965925533e-05, "loss": 0.7143, "step": 55040 }, { "epoch": 0.61, "learning_rate": 3.9841696932116815e-05, "loss": 0.7111, "step": 55045 }, { "epoch": 0.61, "learning_rate": 3.9840774204978296e-05, "loss": 0.797, "step": 55050 }, { "epoch": 0.61, "learning_rate": 3.9839851477839784e-05, "loss": 0.7483, "step": 55055 }, { "epoch": 0.61, "learning_rate": 3.983892875070128e-05, "loss": 0.7445, "step": 55060 }, { "epoch": 0.61, "learning_rate": 3.9838006023562766e-05, "loss": 0.7693, "step": 55065 }, { "epoch": 0.61, "learning_rate": 3.983708329642425e-05, "loss": 0.814, "step": 55070 }, { "epoch": 0.61, "learning_rate": 3.9836160569285735e-05, "loss": 0.703, "step": 55075 }, { "epoch": 0.61, "learning_rate": 3.983523784214723e-05, "loss": 0.7136, "step": 55080 }, { "epoch": 0.61, "learning_rate": 3.983431511500871e-05, "loss": 0.7387, "step": 55085 }, { "epoch": 0.61, "learning_rate": 3.98333923878702e-05, "loss": 0.7227, "step": 55090 }, { "epoch": 0.61, "learning_rate": 3.9832469660731686e-05, "loss": 0.7649, "step": 55095 }, { "epoch": 0.61, "learning_rate": 3.983154693359318e-05, "loss": 0.701, "step": 55100 }, { "epoch": 0.61, "learning_rate": 3.983062420645466e-05, "loss": 0.7675, "step": 55105 }, { "epoch": 0.61, "learning_rate": 3.982970147931615e-05, "loss": 0.7174, "step": 55110 }, { "epoch": 0.61, "learning_rate": 3.982877875217764e-05, "loss": 0.7196, "step": 55115 }, { "epoch": 0.61, "learning_rate": 3.9827856025039126e-05, "loss": 0.7468, "step": 55120 }, { "epoch": 0.61, "learning_rate": 3.9826933297900613e-05, "loss": 0.7437, "step": 55125 }, { "epoch": 0.61, "learning_rate": 3.98260105707621e-05, "loss": 0.7685, "step": 55130 }, { "epoch": 0.61, "learning_rate": 3.982508784362359e-05, "loss": 0.7326, "step": 55135 }, { "epoch": 0.61, "learning_rate": 3.982416511648508e-05, "loss": 0.7326, "step": 55140 }, { "epoch": 0.61, "learning_rate": 3.9823242389346565e-05, "loss": 0.6721, "step": 55145 }, { "epoch": 0.61, "learning_rate": 3.982231966220805e-05, "loss": 0.7584, "step": 55150 }, { "epoch": 0.61, "learning_rate": 3.982139693506954e-05, "loss": 0.7704, "step": 55155 }, { "epoch": 0.61, "learning_rate": 3.982047420793102e-05, "loss": 0.7382, "step": 55160 }, { "epoch": 0.61, "learning_rate": 3.9819551480792516e-05, "loss": 0.6875, "step": 55165 }, { "epoch": 0.61, "learning_rate": 3.9818628753654004e-05, "loss": 0.6918, "step": 55170 }, { "epoch": 0.61, "learning_rate": 3.981770602651549e-05, "loss": 0.7686, "step": 55175 }, { "epoch": 0.61, "learning_rate": 3.981678329937697e-05, "loss": 0.7207, "step": 55180 }, { "epoch": 0.61, "learning_rate": 3.981586057223846e-05, "loss": 0.7567, "step": 55185 }, { "epoch": 0.61, "learning_rate": 3.9814937845099956e-05, "loss": 0.659, "step": 55190 }, { "epoch": 0.61, "learning_rate": 3.9814015117961437e-05, "loss": 0.7577, "step": 55195 }, { "epoch": 0.61, "learning_rate": 3.9813092390822924e-05, "loss": 0.7724, "step": 55200 }, { "epoch": 0.61, "learning_rate": 3.981216966368441e-05, "loss": 0.741, "step": 55205 }, { "epoch": 0.61, "learning_rate": 3.981124693654591e-05, "loss": 0.752, "step": 55210 }, { "epoch": 0.61, "learning_rate": 3.981032420940739e-05, "loss": 0.7339, "step": 55215 }, { "epoch": 0.61, "learning_rate": 3.9809401482268876e-05, "loss": 0.7454, "step": 55220 }, { "epoch": 0.61, "learning_rate": 3.9808478755130364e-05, "loss": 0.733, "step": 55225 }, { "epoch": 0.61, "learning_rate": 3.980755602799185e-05, "loss": 0.7951, "step": 55230 }, { "epoch": 0.61, "learning_rate": 3.980663330085334e-05, "loss": 0.705, "step": 55235 }, { "epoch": 0.61, "learning_rate": 3.980571057371483e-05, "loss": 0.6913, "step": 55240 }, { "epoch": 0.61, "learning_rate": 3.9804787846576315e-05, "loss": 0.7263, "step": 55245 }, { "epoch": 0.61, "learning_rate": 3.98038651194378e-05, "loss": 0.7377, "step": 55250 }, { "epoch": 0.61, "learning_rate": 3.980294239229929e-05, "loss": 0.6819, "step": 55255 }, { "epoch": 0.61, "learning_rate": 3.980201966516078e-05, "loss": 0.7019, "step": 55260 }, { "epoch": 0.61, "learning_rate": 3.9801096938022266e-05, "loss": 0.7299, "step": 55265 }, { "epoch": 0.61, "learning_rate": 3.980017421088375e-05, "loss": 0.7693, "step": 55270 }, { "epoch": 0.61, "learning_rate": 3.979925148374524e-05, "loss": 0.7744, "step": 55275 }, { "epoch": 0.61, "learning_rate": 3.979832875660673e-05, "loss": 0.7241, "step": 55280 }, { "epoch": 0.61, "learning_rate": 3.979740602946822e-05, "loss": 0.7448, "step": 55285 }, { "epoch": 0.61, "learning_rate": 3.97964833023297e-05, "loss": 0.7516, "step": 55290 }, { "epoch": 0.61, "learning_rate": 3.9795560575191194e-05, "loss": 0.7293, "step": 55295 }, { "epoch": 0.61, "learning_rate": 3.979463784805268e-05, "loss": 0.7132, "step": 55300 }, { "epoch": 0.61, "learning_rate": 3.979371512091416e-05, "loss": 0.7227, "step": 55305 }, { "epoch": 0.61, "learning_rate": 3.979279239377565e-05, "loss": 0.7405, "step": 55310 }, { "epoch": 0.61, "learning_rate": 3.9791869666637145e-05, "loss": 0.7162, "step": 55315 }, { "epoch": 0.61, "learning_rate": 3.979094693949863e-05, "loss": 0.7749, "step": 55320 }, { "epoch": 0.61, "learning_rate": 3.9790024212360114e-05, "loss": 0.727, "step": 55325 }, { "epoch": 0.61, "learning_rate": 3.97891014852216e-05, "loss": 0.7312, "step": 55330 }, { "epoch": 0.61, "learning_rate": 3.978817875808309e-05, "loss": 0.7066, "step": 55335 }, { "epoch": 0.61, "learning_rate": 3.978725603094458e-05, "loss": 0.7485, "step": 55340 }, { "epoch": 0.61, "learning_rate": 3.9786333303806065e-05, "loss": 0.7514, "step": 55345 }, { "epoch": 0.61, "learning_rate": 3.978541057666755e-05, "loss": 0.7372, "step": 55350 }, { "epoch": 0.61, "learning_rate": 3.978448784952904e-05, "loss": 0.7251, "step": 55355 }, { "epoch": 0.61, "learning_rate": 3.978356512239053e-05, "loss": 0.7241, "step": 55360 }, { "epoch": 0.61, "learning_rate": 3.978264239525202e-05, "loss": 0.7151, "step": 55365 }, { "epoch": 0.61, "learning_rate": 3.9781719668113505e-05, "loss": 0.701, "step": 55370 }, { "epoch": 0.61, "learning_rate": 3.978079694097499e-05, "loss": 0.7309, "step": 55375 }, { "epoch": 0.61, "learning_rate": 3.977987421383648e-05, "loss": 0.7065, "step": 55380 }, { "epoch": 0.61, "learning_rate": 3.977895148669797e-05, "loss": 0.6846, "step": 55385 }, { "epoch": 0.61, "learning_rate": 3.9778028759559456e-05, "loss": 0.7644, "step": 55390 }, { "epoch": 0.61, "learning_rate": 3.9777106032420944e-05, "loss": 0.7404, "step": 55395 }, { "epoch": 0.61, "learning_rate": 3.977618330528243e-05, "loss": 0.6828, "step": 55400 }, { "epoch": 0.61, "learning_rate": 3.977526057814392e-05, "loss": 0.7846, "step": 55405 }, { "epoch": 0.61, "learning_rate": 3.977433785100541e-05, "loss": 0.752, "step": 55410 }, { "epoch": 0.61, "learning_rate": 3.977341512386689e-05, "loss": 0.7672, "step": 55415 }, { "epoch": 0.61, "learning_rate": 3.9772492396728376e-05, "loss": 0.782, "step": 55420 }, { "epoch": 0.61, "learning_rate": 3.977156966958987e-05, "loss": 0.6969, "step": 55425 }, { "epoch": 0.61, "learning_rate": 3.977064694245136e-05, "loss": 0.7114, "step": 55430 }, { "epoch": 0.61, "learning_rate": 3.976972421531284e-05, "loss": 0.7448, "step": 55435 }, { "epoch": 0.61, "learning_rate": 3.976880148817433e-05, "loss": 0.7525, "step": 55440 }, { "epoch": 0.61, "learning_rate": 3.976787876103582e-05, "loss": 0.7415, "step": 55445 }, { "epoch": 0.61, "learning_rate": 3.976695603389731e-05, "loss": 0.7441, "step": 55450 }, { "epoch": 0.61, "learning_rate": 3.976603330675879e-05, "loss": 0.779, "step": 55455 }, { "epoch": 0.61, "learning_rate": 3.976511057962028e-05, "loss": 0.685, "step": 55460 }, { "epoch": 0.61, "learning_rate": 3.9764187852481774e-05, "loss": 0.7296, "step": 55465 }, { "epoch": 0.61, "learning_rate": 3.9763265125343255e-05, "loss": 0.711, "step": 55470 }, { "epoch": 0.61, "learning_rate": 3.976234239820474e-05, "loss": 0.7327, "step": 55475 }, { "epoch": 0.61, "learning_rate": 3.976141967106623e-05, "loss": 0.6954, "step": 55480 }, { "epoch": 0.61, "learning_rate": 3.976049694392772e-05, "loss": 0.7469, "step": 55485 }, { "epoch": 0.61, "learning_rate": 3.9759574216789206e-05, "loss": 0.7614, "step": 55490 }, { "epoch": 0.61, "learning_rate": 3.9758651489650694e-05, "loss": 0.7764, "step": 55495 }, { "epoch": 0.61, "learning_rate": 3.975772876251218e-05, "loss": 0.7488, "step": 55500 }, { "epoch": 0.61, "learning_rate": 3.975680603537367e-05, "loss": 0.7429, "step": 55505 }, { "epoch": 0.61, "learning_rate": 3.975588330823516e-05, "loss": 0.7269, "step": 55510 }, { "epoch": 0.61, "learning_rate": 3.9754960581096645e-05, "loss": 0.7015, "step": 55515 }, { "epoch": 0.61, "learning_rate": 3.975403785395813e-05, "loss": 0.7041, "step": 55520 }, { "epoch": 0.61, "learning_rate": 3.975311512681962e-05, "loss": 0.736, "step": 55525 }, { "epoch": 0.61, "learning_rate": 3.975219239968111e-05, "loss": 0.6632, "step": 55530 }, { "epoch": 0.61, "learning_rate": 3.97512696725426e-05, "loss": 0.7467, "step": 55535 }, { "epoch": 0.61, "learning_rate": 3.9750346945404085e-05, "loss": 0.7699, "step": 55540 }, { "epoch": 0.62, "learning_rate": 3.9749424218265566e-05, "loss": 0.7382, "step": 55545 }, { "epoch": 0.62, "learning_rate": 3.974850149112706e-05, "loss": 0.7123, "step": 55550 }, { "epoch": 0.62, "learning_rate": 3.974757876398855e-05, "loss": 0.722, "step": 55555 }, { "epoch": 0.62, "learning_rate": 3.9746656036850036e-05, "loss": 0.7417, "step": 55560 }, { "epoch": 0.62, "learning_rate": 3.974573330971152e-05, "loss": 0.716, "step": 55565 }, { "epoch": 0.62, "learning_rate": 3.9744810582573005e-05, "loss": 0.7792, "step": 55570 }, { "epoch": 0.62, "learning_rate": 3.97438878554345e-05, "loss": 0.7117, "step": 55575 }, { "epoch": 0.62, "learning_rate": 3.974296512829598e-05, "loss": 0.7042, "step": 55580 }, { "epoch": 0.62, "learning_rate": 3.974204240115747e-05, "loss": 0.7544, "step": 55585 }, { "epoch": 0.62, "learning_rate": 3.9741119674018956e-05, "loss": 0.7542, "step": 55590 }, { "epoch": 0.62, "learning_rate": 3.974019694688045e-05, "loss": 0.6811, "step": 55595 }, { "epoch": 0.62, "learning_rate": 3.973927421974193e-05, "loss": 0.7631, "step": 55600 }, { "epoch": 0.62, "learning_rate": 3.973835149260342e-05, "loss": 0.8098, "step": 55605 }, { "epoch": 0.62, "learning_rate": 3.973742876546491e-05, "loss": 0.7648, "step": 55610 }, { "epoch": 0.62, "learning_rate": 3.9736506038326396e-05, "loss": 0.7766, "step": 55615 }, { "epoch": 0.62, "learning_rate": 3.9735583311187883e-05, "loss": 0.7458, "step": 55620 }, { "epoch": 0.62, "learning_rate": 3.973466058404937e-05, "loss": 0.7234, "step": 55625 }, { "epoch": 0.62, "learning_rate": 3.973373785691086e-05, "loss": 0.7164, "step": 55630 }, { "epoch": 0.62, "learning_rate": 3.973281512977235e-05, "loss": 0.7295, "step": 55635 }, { "epoch": 0.62, "learning_rate": 3.9731892402633835e-05, "loss": 0.7275, "step": 55640 }, { "epoch": 0.62, "learning_rate": 3.973096967549532e-05, "loss": 0.7348, "step": 55645 }, { "epoch": 0.62, "learning_rate": 3.973004694835681e-05, "loss": 0.6819, "step": 55650 }, { "epoch": 0.62, "learning_rate": 3.972912422121829e-05, "loss": 0.6932, "step": 55655 }, { "epoch": 0.62, "learning_rate": 3.9728201494079786e-05, "loss": 0.7487, "step": 55660 }, { "epoch": 0.62, "learning_rate": 3.9727278766941274e-05, "loss": 0.8205, "step": 55665 }, { "epoch": 0.62, "learning_rate": 3.972635603980276e-05, "loss": 0.6889, "step": 55670 }, { "epoch": 0.62, "learning_rate": 3.972543331266424e-05, "loss": 0.7559, "step": 55675 }, { "epoch": 0.62, "learning_rate": 3.972451058552574e-05, "loss": 0.7016, "step": 55680 }, { "epoch": 0.62, "learning_rate": 3.9723587858387225e-05, "loss": 0.8089, "step": 55685 }, { "epoch": 0.62, "learning_rate": 3.9722665131248707e-05, "loss": 0.7367, "step": 55690 }, { "epoch": 0.62, "learning_rate": 3.9721742404110194e-05, "loss": 0.7501, "step": 55695 }, { "epoch": 0.62, "learning_rate": 3.972081967697169e-05, "loss": 0.7301, "step": 55700 }, { "epoch": 0.62, "learning_rate": 3.971989694983318e-05, "loss": 0.6728, "step": 55705 }, { "epoch": 0.62, "learning_rate": 3.971897422269466e-05, "loss": 0.7459, "step": 55710 }, { "epoch": 0.62, "learning_rate": 3.9718051495556146e-05, "loss": 0.7187, "step": 55715 }, { "epoch": 0.62, "learning_rate": 3.9717128768417634e-05, "loss": 0.7205, "step": 55720 }, { "epoch": 0.62, "learning_rate": 3.971620604127912e-05, "loss": 0.699, "step": 55725 }, { "epoch": 0.62, "learning_rate": 3.971528331414061e-05, "loss": 0.711, "step": 55730 }, { "epoch": 0.62, "learning_rate": 3.97143605870021e-05, "loss": 0.7072, "step": 55735 }, { "epoch": 0.62, "learning_rate": 3.9713437859863585e-05, "loss": 0.7394, "step": 55740 }, { "epoch": 0.62, "learning_rate": 3.971251513272507e-05, "loss": 0.6754, "step": 55745 }, { "epoch": 0.62, "learning_rate": 3.971159240558656e-05, "loss": 0.7396, "step": 55750 }, { "epoch": 0.62, "learning_rate": 3.971066967844805e-05, "loss": 0.6984, "step": 55755 }, { "epoch": 0.62, "learning_rate": 3.9709746951309536e-05, "loss": 0.7106, "step": 55760 }, { "epoch": 0.62, "learning_rate": 3.9708824224171024e-05, "loss": 0.7193, "step": 55765 }, { "epoch": 0.62, "learning_rate": 3.970790149703251e-05, "loss": 0.6843, "step": 55770 }, { "epoch": 0.62, "learning_rate": 3.9706978769894e-05, "loss": 0.693, "step": 55775 }, { "epoch": 0.62, "learning_rate": 3.970605604275549e-05, "loss": 0.7554, "step": 55780 }, { "epoch": 0.62, "learning_rate": 3.9705133315616976e-05, "loss": 0.7381, "step": 55785 }, { "epoch": 0.62, "learning_rate": 3.9704210588478463e-05, "loss": 0.7515, "step": 55790 }, { "epoch": 0.62, "learning_rate": 3.970328786133995e-05, "loss": 0.6986, "step": 55795 }, { "epoch": 0.62, "learning_rate": 3.970236513420143e-05, "loss": 0.7496, "step": 55800 }, { "epoch": 0.62, "learning_rate": 3.970144240706292e-05, "loss": 0.7422, "step": 55805 }, { "epoch": 0.62, "learning_rate": 3.9700519679924415e-05, "loss": 0.7309, "step": 55810 }, { "epoch": 0.62, "learning_rate": 3.96995969527859e-05, "loss": 0.7679, "step": 55815 }, { "epoch": 0.62, "learning_rate": 3.9698674225647384e-05, "loss": 0.8177, "step": 55820 }, { "epoch": 0.62, "learning_rate": 3.969775149850887e-05, "loss": 0.7363, "step": 55825 }, { "epoch": 0.62, "learning_rate": 3.9696828771370366e-05, "loss": 0.71, "step": 55830 }, { "epoch": 0.62, "learning_rate": 3.9695906044231854e-05, "loss": 0.7427, "step": 55835 }, { "epoch": 0.62, "learning_rate": 3.9694983317093335e-05, "loss": 0.7291, "step": 55840 }, { "epoch": 0.62, "learning_rate": 3.969406058995482e-05, "loss": 0.7318, "step": 55845 }, { "epoch": 0.62, "learning_rate": 3.969313786281632e-05, "loss": 0.6978, "step": 55850 }, { "epoch": 0.62, "learning_rate": 3.96922151356778e-05, "loss": 0.7355, "step": 55855 }, { "epoch": 0.62, "learning_rate": 3.9691292408539287e-05, "loss": 0.7009, "step": 55860 }, { "epoch": 0.62, "learning_rate": 3.9690369681400774e-05, "loss": 0.7527, "step": 55865 }, { "epoch": 0.62, "learning_rate": 3.968944695426226e-05, "loss": 0.7618, "step": 55870 }, { "epoch": 0.62, "learning_rate": 3.968852422712375e-05, "loss": 0.7717, "step": 55875 }, { "epoch": 0.62, "learning_rate": 3.968760149998524e-05, "loss": 0.7123, "step": 55880 }, { "epoch": 0.62, "learning_rate": 3.9686678772846726e-05, "loss": 0.7769, "step": 55885 }, { "epoch": 0.62, "learning_rate": 3.9685756045708214e-05, "loss": 0.7269, "step": 55890 }, { "epoch": 0.62, "learning_rate": 3.96848333185697e-05, "loss": 0.7455, "step": 55895 }, { "epoch": 0.62, "learning_rate": 3.968391059143119e-05, "loss": 0.7005, "step": 55900 }, { "epoch": 0.62, "learning_rate": 3.968298786429268e-05, "loss": 0.733, "step": 55905 }, { "epoch": 0.62, "learning_rate": 3.9682065137154165e-05, "loss": 0.7596, "step": 55910 }, { "epoch": 0.62, "learning_rate": 3.968114241001565e-05, "loss": 0.7533, "step": 55915 }, { "epoch": 0.62, "learning_rate": 3.968021968287714e-05, "loss": 0.6976, "step": 55920 }, { "epoch": 0.62, "learning_rate": 3.967929695573863e-05, "loss": 0.7403, "step": 55925 }, { "epoch": 0.62, "learning_rate": 3.967837422860011e-05, "loss": 0.7417, "step": 55930 }, { "epoch": 0.62, "learning_rate": 3.9677451501461604e-05, "loss": 0.7692, "step": 55935 }, { "epoch": 0.62, "learning_rate": 3.967652877432309e-05, "loss": 0.7811, "step": 55940 }, { "epoch": 0.62, "learning_rate": 3.967560604718458e-05, "loss": 0.7547, "step": 55945 }, { "epoch": 0.62, "learning_rate": 3.967468332004606e-05, "loss": 0.734, "step": 55950 }, { "epoch": 0.62, "learning_rate": 3.967376059290755e-05, "loss": 0.7075, "step": 55955 }, { "epoch": 0.62, "learning_rate": 3.9672837865769044e-05, "loss": 0.7718, "step": 55960 }, { "epoch": 0.62, "learning_rate": 3.9671915138630525e-05, "loss": 0.7401, "step": 55965 }, { "epoch": 0.62, "learning_rate": 3.967099241149201e-05, "loss": 0.6763, "step": 55970 }, { "epoch": 0.62, "learning_rate": 3.96700696843535e-05, "loss": 0.7109, "step": 55975 }, { "epoch": 0.62, "learning_rate": 3.9669146957214995e-05, "loss": 0.7196, "step": 55980 }, { "epoch": 0.62, "learning_rate": 3.9668224230076476e-05, "loss": 0.7653, "step": 55985 }, { "epoch": 0.62, "learning_rate": 3.9667301502937964e-05, "loss": 0.7908, "step": 55990 }, { "epoch": 0.62, "learning_rate": 3.966637877579945e-05, "loss": 0.7288, "step": 55995 }, { "epoch": 0.62, "learning_rate": 3.966545604866094e-05, "loss": 0.766, "step": 56000 }, { "epoch": 0.62, "eval_loss": 0.7016978859901428, "eval_runtime": 69.3933, "eval_samples_per_second": 28.821, "eval_steps_per_second": 14.411, "step": 56000 }, { "epoch": 0.62, "learning_rate": 3.966453332152243e-05, "loss": 0.7584, "step": 56005 }, { "epoch": 0.62, "learning_rate": 3.9663610594383915e-05, "loss": 0.7001, "step": 56010 }, { "epoch": 0.62, "learning_rate": 3.96626878672454e-05, "loss": 0.7745, "step": 56015 }, { "epoch": 0.62, "learning_rate": 3.966176514010689e-05, "loss": 0.7125, "step": 56020 }, { "epoch": 0.62, "learning_rate": 3.966084241296838e-05, "loss": 0.7535, "step": 56025 }, { "epoch": 0.62, "learning_rate": 3.965991968582987e-05, "loss": 0.7581, "step": 56030 }, { "epoch": 0.62, "learning_rate": 3.9658996958691355e-05, "loss": 0.7455, "step": 56035 }, { "epoch": 0.62, "learning_rate": 3.9658074231552836e-05, "loss": 0.7823, "step": 56040 }, { "epoch": 0.62, "learning_rate": 3.965715150441433e-05, "loss": 0.7251, "step": 56045 }, { "epoch": 0.62, "learning_rate": 3.965622877727582e-05, "loss": 0.7494, "step": 56050 }, { "epoch": 0.62, "learning_rate": 3.9655306050137306e-05, "loss": 0.7276, "step": 56055 }, { "epoch": 0.62, "learning_rate": 3.965438332299879e-05, "loss": 0.679, "step": 56060 }, { "epoch": 0.62, "learning_rate": 3.965346059586028e-05, "loss": 0.7511, "step": 56065 }, { "epoch": 0.62, "learning_rate": 3.965253786872177e-05, "loss": 0.815, "step": 56070 }, { "epoch": 0.62, "learning_rate": 3.965161514158325e-05, "loss": 0.7398, "step": 56075 }, { "epoch": 0.62, "learning_rate": 3.965069241444474e-05, "loss": 0.6809, "step": 56080 }, { "epoch": 0.62, "learning_rate": 3.964976968730623e-05, "loss": 0.6567, "step": 56085 }, { "epoch": 0.62, "learning_rate": 3.964884696016772e-05, "loss": 0.743, "step": 56090 }, { "epoch": 0.62, "learning_rate": 3.96479242330292e-05, "loss": 0.6662, "step": 56095 }, { "epoch": 0.62, "learning_rate": 3.964700150589069e-05, "loss": 0.7965, "step": 56100 }, { "epoch": 0.62, "learning_rate": 3.964607877875218e-05, "loss": 0.713, "step": 56105 }, { "epoch": 0.62, "learning_rate": 3.964515605161367e-05, "loss": 0.6908, "step": 56110 }, { "epoch": 0.62, "learning_rate": 3.964423332447515e-05, "loss": 0.6607, "step": 56115 }, { "epoch": 0.62, "learning_rate": 3.964331059733664e-05, "loss": 0.7701, "step": 56120 }, { "epoch": 0.62, "learning_rate": 3.964238787019813e-05, "loss": 0.8173, "step": 56125 }, { "epoch": 0.62, "learning_rate": 3.964146514305962e-05, "loss": 0.6744, "step": 56130 }, { "epoch": 0.62, "learning_rate": 3.9640542415921105e-05, "loss": 0.7318, "step": 56135 }, { "epoch": 0.62, "learning_rate": 3.963961968878259e-05, "loss": 0.7211, "step": 56140 }, { "epoch": 0.62, "learning_rate": 3.963869696164408e-05, "loss": 0.7011, "step": 56145 }, { "epoch": 0.62, "learning_rate": 3.963777423450557e-05, "loss": 0.7698, "step": 56150 }, { "epoch": 0.62, "learning_rate": 3.9636851507367056e-05, "loss": 0.7229, "step": 56155 }, { "epoch": 0.62, "learning_rate": 3.9635928780228544e-05, "loss": 0.7343, "step": 56160 }, { "epoch": 0.62, "learning_rate": 3.963500605309003e-05, "loss": 0.7018, "step": 56165 }, { "epoch": 0.62, "learning_rate": 3.963408332595151e-05, "loss": 0.6766, "step": 56170 }, { "epoch": 0.62, "learning_rate": 3.963316059881301e-05, "loss": 0.7822, "step": 56175 }, { "epoch": 0.62, "learning_rate": 3.9632237871674495e-05, "loss": 0.7522, "step": 56180 }, { "epoch": 0.62, "learning_rate": 3.963131514453598e-05, "loss": 0.7278, "step": 56185 }, { "epoch": 0.62, "learning_rate": 3.9630392417397464e-05, "loss": 0.7421, "step": 56190 }, { "epoch": 0.62, "learning_rate": 3.962946969025896e-05, "loss": 0.7742, "step": 56195 }, { "epoch": 0.62, "learning_rate": 3.962854696312045e-05, "loss": 0.7713, "step": 56200 }, { "epoch": 0.62, "learning_rate": 3.962762423598193e-05, "loss": 0.7629, "step": 56205 }, { "epoch": 0.62, "learning_rate": 3.9626701508843416e-05, "loss": 0.7331, "step": 56210 }, { "epoch": 0.62, "learning_rate": 3.962577878170491e-05, "loss": 0.7537, "step": 56215 }, { "epoch": 0.62, "learning_rate": 3.96248560545664e-05, "loss": 0.7227, "step": 56220 }, { "epoch": 0.62, "learning_rate": 3.962393332742788e-05, "loss": 0.7392, "step": 56225 }, { "epoch": 0.62, "learning_rate": 3.962301060028937e-05, "loss": 0.6752, "step": 56230 }, { "epoch": 0.62, "learning_rate": 3.962208787315086e-05, "loss": 0.6969, "step": 56235 }, { "epoch": 0.62, "learning_rate": 3.962116514601234e-05, "loss": 0.7268, "step": 56240 }, { "epoch": 0.62, "learning_rate": 3.962024241887383e-05, "loss": 0.7259, "step": 56245 }, { "epoch": 0.62, "learning_rate": 3.961931969173532e-05, "loss": 0.7248, "step": 56250 }, { "epoch": 0.62, "learning_rate": 3.9618396964596806e-05, "loss": 0.6982, "step": 56255 }, { "epoch": 0.62, "learning_rate": 3.9617474237458294e-05, "loss": 0.6775, "step": 56260 }, { "epoch": 0.62, "learning_rate": 3.961655151031978e-05, "loss": 0.7694, "step": 56265 }, { "epoch": 0.62, "learning_rate": 3.961562878318127e-05, "loss": 0.6878, "step": 56270 }, { "epoch": 0.62, "learning_rate": 3.961470605604276e-05, "loss": 0.6748, "step": 56275 }, { "epoch": 0.62, "learning_rate": 3.9613783328904246e-05, "loss": 0.7206, "step": 56280 }, { "epoch": 0.62, "learning_rate": 3.9612860601765733e-05, "loss": 0.7414, "step": 56285 }, { "epoch": 0.62, "learning_rate": 3.961193787462722e-05, "loss": 0.715, "step": 56290 }, { "epoch": 0.62, "learning_rate": 3.961101514748871e-05, "loss": 0.7094, "step": 56295 }, { "epoch": 0.62, "learning_rate": 3.96100924203502e-05, "loss": 0.7021, "step": 56300 }, { "epoch": 0.62, "learning_rate": 3.9609169693211685e-05, "loss": 0.7108, "step": 56305 }, { "epoch": 0.62, "learning_rate": 3.960824696607317e-05, "loss": 0.7294, "step": 56310 }, { "epoch": 0.62, "learning_rate": 3.9607324238934654e-05, "loss": 0.7058, "step": 56315 }, { "epoch": 0.62, "learning_rate": 3.960640151179614e-05, "loss": 0.7208, "step": 56320 }, { "epoch": 0.62, "learning_rate": 3.9605478784657636e-05, "loss": 0.673, "step": 56325 }, { "epoch": 0.62, "learning_rate": 3.9604556057519124e-05, "loss": 0.7394, "step": 56330 }, { "epoch": 0.62, "learning_rate": 3.9603633330380605e-05, "loss": 0.7646, "step": 56335 }, { "epoch": 0.62, "learning_rate": 3.960271060324209e-05, "loss": 0.7456, "step": 56340 }, { "epoch": 0.62, "learning_rate": 3.960178787610359e-05, "loss": 0.7755, "step": 56345 }, { "epoch": 0.62, "learning_rate": 3.960086514896507e-05, "loss": 0.7602, "step": 56350 }, { "epoch": 0.62, "learning_rate": 3.9599942421826556e-05, "loss": 0.8044, "step": 56355 }, { "epoch": 0.62, "learning_rate": 3.9599019694688044e-05, "loss": 0.7882, "step": 56360 }, { "epoch": 0.62, "learning_rate": 3.959809696754954e-05, "loss": 0.8017, "step": 56365 }, { "epoch": 0.62, "learning_rate": 3.959717424041102e-05, "loss": 0.7273, "step": 56370 }, { "epoch": 0.62, "learning_rate": 3.959625151327251e-05, "loss": 0.7332, "step": 56375 }, { "epoch": 0.62, "learning_rate": 3.9595328786133996e-05, "loss": 0.7211, "step": 56380 }, { "epoch": 0.62, "learning_rate": 3.9594406058995484e-05, "loss": 0.7381, "step": 56385 }, { "epoch": 0.62, "learning_rate": 3.959348333185697e-05, "loss": 0.7502, "step": 56390 }, { "epoch": 0.62, "learning_rate": 3.959256060471846e-05, "loss": 0.7283, "step": 56395 }, { "epoch": 0.62, "learning_rate": 3.959163787757995e-05, "loss": 0.7227, "step": 56400 }, { "epoch": 0.62, "learning_rate": 3.9590715150441435e-05, "loss": 0.6996, "step": 56405 }, { "epoch": 0.62, "learning_rate": 3.958979242330292e-05, "loss": 0.7527, "step": 56410 }, { "epoch": 0.62, "learning_rate": 3.958886969616441e-05, "loss": 0.7012, "step": 56415 }, { "epoch": 0.62, "learning_rate": 3.95879469690259e-05, "loss": 0.7453, "step": 56420 }, { "epoch": 0.62, "learning_rate": 3.958702424188738e-05, "loss": 0.6865, "step": 56425 }, { "epoch": 0.62, "learning_rate": 3.9586101514748874e-05, "loss": 0.7176, "step": 56430 }, { "epoch": 0.62, "learning_rate": 3.958517878761036e-05, "loss": 0.6979, "step": 56435 }, { "epoch": 0.62, "learning_rate": 3.958425606047185e-05, "loss": 0.7025, "step": 56440 }, { "epoch": 0.62, "learning_rate": 3.958333333333333e-05, "loss": 0.6988, "step": 56445 }, { "epoch": 0.63, "learning_rate": 3.9582410606194826e-05, "loss": 0.7211, "step": 56450 }, { "epoch": 0.63, "learning_rate": 3.9581487879056313e-05, "loss": 0.6762, "step": 56455 }, { "epoch": 0.63, "learning_rate": 3.9580565151917795e-05, "loss": 0.7575, "step": 56460 }, { "epoch": 0.63, "learning_rate": 3.957964242477928e-05, "loss": 0.7307, "step": 56465 }, { "epoch": 0.63, "learning_rate": 3.957871969764078e-05, "loss": 0.7298, "step": 56470 }, { "epoch": 0.63, "learning_rate": 3.9577796970502265e-05, "loss": 0.7119, "step": 56475 }, { "epoch": 0.63, "learning_rate": 3.9576874243363746e-05, "loss": 0.7514, "step": 56480 }, { "epoch": 0.63, "learning_rate": 3.9575951516225234e-05, "loss": 0.7704, "step": 56485 }, { "epoch": 0.63, "learning_rate": 3.957502878908672e-05, "loss": 0.7278, "step": 56490 }, { "epoch": 0.63, "learning_rate": 3.9574106061948216e-05, "loss": 0.7237, "step": 56495 }, { "epoch": 0.63, "learning_rate": 3.95731833348097e-05, "loss": 0.6883, "step": 56500 }, { "epoch": 0.63, "learning_rate": 3.9572260607671185e-05, "loss": 0.7942, "step": 56505 }, { "epoch": 0.63, "learning_rate": 3.957133788053267e-05, "loss": 0.7265, "step": 56510 }, { "epoch": 0.63, "learning_rate": 3.957041515339416e-05, "loss": 0.6523, "step": 56515 }, { "epoch": 0.63, "learning_rate": 3.956949242625565e-05, "loss": 0.7404, "step": 56520 }, { "epoch": 0.63, "learning_rate": 3.9568569699117137e-05, "loss": 0.7142, "step": 56525 }, { "epoch": 0.63, "learning_rate": 3.9567646971978624e-05, "loss": 0.7209, "step": 56530 }, { "epoch": 0.63, "learning_rate": 3.956672424484011e-05, "loss": 0.7012, "step": 56535 }, { "epoch": 0.63, "learning_rate": 3.95658015177016e-05, "loss": 0.7176, "step": 56540 }, { "epoch": 0.63, "learning_rate": 3.956487879056309e-05, "loss": 0.7546, "step": 56545 }, { "epoch": 0.63, "learning_rate": 3.9563956063424576e-05, "loss": 0.737, "step": 56550 }, { "epoch": 0.63, "learning_rate": 3.956303333628606e-05, "loss": 0.6939, "step": 56555 }, { "epoch": 0.63, "learning_rate": 3.956211060914755e-05, "loss": 0.7011, "step": 56560 }, { "epoch": 0.63, "learning_rate": 3.956118788200904e-05, "loss": 0.674, "step": 56565 }, { "epoch": 0.63, "learning_rate": 3.956026515487053e-05, "loss": 0.7204, "step": 56570 }, { "epoch": 0.63, "learning_rate": 3.955934242773201e-05, "loss": 0.7595, "step": 56575 }, { "epoch": 0.63, "learning_rate": 3.95584197005935e-05, "loss": 0.7015, "step": 56580 }, { "epoch": 0.63, "learning_rate": 3.955749697345499e-05, "loss": 0.7711, "step": 56585 }, { "epoch": 0.63, "learning_rate": 3.955657424631647e-05, "loss": 0.6709, "step": 56590 }, { "epoch": 0.63, "learning_rate": 3.955565151917796e-05, "loss": 0.7231, "step": 56595 }, { "epoch": 0.63, "learning_rate": 3.9554728792039454e-05, "loss": 0.6952, "step": 56600 }, { "epoch": 0.63, "learning_rate": 3.955380606490094e-05, "loss": 0.7452, "step": 56605 }, { "epoch": 0.63, "learning_rate": 3.955288333776242e-05, "loss": 0.7497, "step": 56610 }, { "epoch": 0.63, "learning_rate": 3.955196061062391e-05, "loss": 0.7187, "step": 56615 }, { "epoch": 0.63, "learning_rate": 3.9551037883485406e-05, "loss": 0.7174, "step": 56620 }, { "epoch": 0.63, "learning_rate": 3.955011515634689e-05, "loss": 0.8001, "step": 56625 }, { "epoch": 0.63, "learning_rate": 3.9549192429208375e-05, "loss": 0.6731, "step": 56630 }, { "epoch": 0.63, "learning_rate": 3.954826970206986e-05, "loss": 0.6997, "step": 56635 }, { "epoch": 0.63, "learning_rate": 3.954734697493135e-05, "loss": 0.715, "step": 56640 }, { "epoch": 0.63, "learning_rate": 3.954642424779284e-05, "loss": 0.7223, "step": 56645 }, { "epoch": 0.63, "learning_rate": 3.9545501520654326e-05, "loss": 0.7263, "step": 56650 }, { "epoch": 0.63, "learning_rate": 3.9544578793515814e-05, "loss": 0.7202, "step": 56655 }, { "epoch": 0.63, "learning_rate": 3.95436560663773e-05, "loss": 0.6591, "step": 56660 }, { "epoch": 0.63, "learning_rate": 3.954273333923879e-05, "loss": 0.7145, "step": 56665 }, { "epoch": 0.63, "learning_rate": 3.954181061210028e-05, "loss": 0.7529, "step": 56670 }, { "epoch": 0.63, "learning_rate": 3.9540887884961765e-05, "loss": 0.7408, "step": 56675 }, { "epoch": 0.63, "learning_rate": 3.953996515782325e-05, "loss": 0.7297, "step": 56680 }, { "epoch": 0.63, "learning_rate": 3.953904243068474e-05, "loss": 0.7409, "step": 56685 }, { "epoch": 0.63, "learning_rate": 3.953811970354623e-05, "loss": 0.7753, "step": 56690 }, { "epoch": 0.63, "learning_rate": 3.953719697640772e-05, "loss": 0.6792, "step": 56695 }, { "epoch": 0.63, "learning_rate": 3.95362742492692e-05, "loss": 0.7043, "step": 56700 }, { "epoch": 0.63, "learning_rate": 3.9535351522130686e-05, "loss": 0.6949, "step": 56705 }, { "epoch": 0.63, "learning_rate": 3.953442879499218e-05, "loss": 0.7585, "step": 56710 }, { "epoch": 0.63, "learning_rate": 3.953350606785367e-05, "loss": 0.7337, "step": 56715 }, { "epoch": 0.63, "learning_rate": 3.953258334071515e-05, "loss": 0.7382, "step": 56720 }, { "epoch": 0.63, "learning_rate": 3.953166061357664e-05, "loss": 0.7136, "step": 56725 }, { "epoch": 0.63, "learning_rate": 3.953073788643813e-05, "loss": 0.7439, "step": 56730 }, { "epoch": 0.63, "learning_rate": 3.952981515929961e-05, "loss": 0.7369, "step": 56735 }, { "epoch": 0.63, "learning_rate": 3.95288924321611e-05, "loss": 0.775, "step": 56740 }, { "epoch": 0.63, "learning_rate": 3.952796970502259e-05, "loss": 0.7392, "step": 56745 }, { "epoch": 0.63, "learning_rate": 3.952704697788408e-05, "loss": 0.6735, "step": 56750 }, { "epoch": 0.63, "learning_rate": 3.9526124250745564e-05, "loss": 0.7474, "step": 56755 }, { "epoch": 0.63, "learning_rate": 3.952520152360705e-05, "loss": 0.695, "step": 56760 }, { "epoch": 0.63, "learning_rate": 3.952427879646854e-05, "loss": 0.7184, "step": 56765 }, { "epoch": 0.63, "learning_rate": 3.952335606933003e-05, "loss": 0.651, "step": 56770 }, { "epoch": 0.63, "learning_rate": 3.9522433342191515e-05, "loss": 0.7379, "step": 56775 }, { "epoch": 0.63, "learning_rate": 3.9521510615053e-05, "loss": 0.7351, "step": 56780 }, { "epoch": 0.63, "learning_rate": 3.952058788791449e-05, "loss": 0.6872, "step": 56785 }, { "epoch": 0.63, "learning_rate": 3.951966516077598e-05, "loss": 0.7582, "step": 56790 }, { "epoch": 0.63, "learning_rate": 3.951874243363747e-05, "loss": 0.7114, "step": 56795 }, { "epoch": 0.63, "learning_rate": 3.9517819706498955e-05, "loss": 0.7407, "step": 56800 }, { "epoch": 0.63, "learning_rate": 3.951689697936044e-05, "loss": 0.7147, "step": 56805 }, { "epoch": 0.63, "learning_rate": 3.9515974252221924e-05, "loss": 0.6778, "step": 56810 }, { "epoch": 0.63, "learning_rate": 3.951505152508342e-05, "loss": 0.6687, "step": 56815 }, { "epoch": 0.63, "learning_rate": 3.9514128797944906e-05, "loss": 0.7477, "step": 56820 }, { "epoch": 0.63, "learning_rate": 3.9513206070806394e-05, "loss": 0.7686, "step": 56825 }, { "epoch": 0.63, "learning_rate": 3.9512283343667875e-05, "loss": 0.7351, "step": 56830 }, { "epoch": 0.63, "learning_rate": 3.951136061652937e-05, "loss": 0.7466, "step": 56835 }, { "epoch": 0.63, "learning_rate": 3.951043788939086e-05, "loss": 0.7484, "step": 56840 }, { "epoch": 0.63, "learning_rate": 3.950951516225234e-05, "loss": 0.6885, "step": 56845 }, { "epoch": 0.63, "learning_rate": 3.9508592435113826e-05, "loss": 0.7222, "step": 56850 }, { "epoch": 0.63, "learning_rate": 3.9507669707975314e-05, "loss": 0.7378, "step": 56855 }, { "epoch": 0.63, "learning_rate": 3.950674698083681e-05, "loss": 0.7602, "step": 56860 }, { "epoch": 0.63, "learning_rate": 3.950582425369829e-05, "loss": 0.746, "step": 56865 }, { "epoch": 0.63, "learning_rate": 3.950490152655978e-05, "loss": 0.7612, "step": 56870 }, { "epoch": 0.63, "learning_rate": 3.9503978799421266e-05, "loss": 0.7416, "step": 56875 }, { "epoch": 0.63, "learning_rate": 3.950305607228276e-05, "loss": 0.7199, "step": 56880 }, { "epoch": 0.63, "learning_rate": 3.950213334514424e-05, "loss": 0.7142, "step": 56885 }, { "epoch": 0.63, "learning_rate": 3.950121061800573e-05, "loss": 0.7213, "step": 56890 }, { "epoch": 0.63, "learning_rate": 3.950028789086722e-05, "loss": 0.7097, "step": 56895 }, { "epoch": 0.63, "learning_rate": 3.9499365163728705e-05, "loss": 0.7161, "step": 56900 }, { "epoch": 0.63, "learning_rate": 3.949844243659019e-05, "loss": 0.743, "step": 56905 }, { "epoch": 0.63, "learning_rate": 3.949751970945168e-05, "loss": 0.7149, "step": 56910 }, { "epoch": 0.63, "learning_rate": 3.949659698231317e-05, "loss": 0.7563, "step": 56915 }, { "epoch": 0.63, "learning_rate": 3.9495674255174656e-05, "loss": 0.739, "step": 56920 }, { "epoch": 0.63, "learning_rate": 3.9494751528036144e-05, "loss": 0.744, "step": 56925 }, { "epoch": 0.63, "learning_rate": 3.949382880089763e-05, "loss": 0.7582, "step": 56930 }, { "epoch": 0.63, "learning_rate": 3.949290607375912e-05, "loss": 0.7516, "step": 56935 }, { "epoch": 0.63, "learning_rate": 3.94919833466206e-05, "loss": 0.719, "step": 56940 }, { "epoch": 0.63, "learning_rate": 3.9491060619482096e-05, "loss": 0.7439, "step": 56945 }, { "epoch": 0.63, "learning_rate": 3.949013789234358e-05, "loss": 0.7387, "step": 56950 }, { "epoch": 0.63, "learning_rate": 3.948921516520507e-05, "loss": 0.7319, "step": 56955 }, { "epoch": 0.63, "learning_rate": 3.948829243806655e-05, "loss": 0.7144, "step": 56960 }, { "epoch": 0.63, "learning_rate": 3.948736971092805e-05, "loss": 0.8213, "step": 56965 }, { "epoch": 0.63, "learning_rate": 3.9486446983789535e-05, "loss": 0.7183, "step": 56970 }, { "epoch": 0.63, "learning_rate": 3.9485524256651016e-05, "loss": 0.7558, "step": 56975 }, { "epoch": 0.63, "learning_rate": 3.9484601529512504e-05, "loss": 0.717, "step": 56980 }, { "epoch": 0.63, "learning_rate": 3.9483678802374e-05, "loss": 0.7356, "step": 56985 }, { "epoch": 0.63, "learning_rate": 3.9482756075235486e-05, "loss": 0.716, "step": 56990 }, { "epoch": 0.63, "learning_rate": 3.948183334809697e-05, "loss": 0.6884, "step": 56995 }, { "epoch": 0.63, "learning_rate": 3.9480910620958455e-05, "loss": 0.7043, "step": 57000 }, { "epoch": 0.63, "eval_loss": 0.6915789246559143, "eval_runtime": 69.4216, "eval_samples_per_second": 28.809, "eval_steps_per_second": 14.405, "step": 57000 }, { "epoch": 0.63, "learning_rate": 3.947998789381994e-05, "loss": 0.7815, "step": 57005 }, { "epoch": 0.63, "learning_rate": 3.947906516668143e-05, "loss": 0.7549, "step": 57010 }, { "epoch": 0.63, "learning_rate": 3.947814243954292e-05, "loss": 0.6664, "step": 57015 }, { "epoch": 0.63, "learning_rate": 3.9477219712404406e-05, "loss": 0.66, "step": 57020 }, { "epoch": 0.63, "learning_rate": 3.9476296985265894e-05, "loss": 0.8052, "step": 57025 }, { "epoch": 0.63, "learning_rate": 3.947537425812738e-05, "loss": 0.7567, "step": 57030 }, { "epoch": 0.63, "learning_rate": 3.947445153098887e-05, "loss": 0.7916, "step": 57035 }, { "epoch": 0.63, "learning_rate": 3.947352880385036e-05, "loss": 0.6653, "step": 57040 }, { "epoch": 0.63, "learning_rate": 3.9472606076711846e-05, "loss": 0.769, "step": 57045 }, { "epoch": 0.63, "learning_rate": 3.9471683349573334e-05, "loss": 0.7885, "step": 57050 }, { "epoch": 0.63, "learning_rate": 3.947076062243482e-05, "loss": 0.7342, "step": 57055 }, { "epoch": 0.63, "learning_rate": 3.946983789529631e-05, "loss": 0.7167, "step": 57060 }, { "epoch": 0.63, "learning_rate": 3.94689151681578e-05, "loss": 0.7684, "step": 57065 }, { "epoch": 0.63, "learning_rate": 3.9467992441019285e-05, "loss": 0.7622, "step": 57070 }, { "epoch": 0.63, "learning_rate": 3.946706971388077e-05, "loss": 0.7138, "step": 57075 }, { "epoch": 0.63, "learning_rate": 3.946614698674226e-05, "loss": 0.8057, "step": 57080 }, { "epoch": 0.63, "learning_rate": 3.946522425960374e-05, "loss": 0.7343, "step": 57085 }, { "epoch": 0.63, "learning_rate": 3.946430153246523e-05, "loss": 0.7455, "step": 57090 }, { "epoch": 0.63, "learning_rate": 3.9463378805326724e-05, "loss": 0.7389, "step": 57095 }, { "epoch": 0.63, "learning_rate": 3.946245607818821e-05, "loss": 0.7268, "step": 57100 }, { "epoch": 0.63, "learning_rate": 3.946153335104969e-05, "loss": 0.7809, "step": 57105 }, { "epoch": 0.63, "learning_rate": 3.946061062391118e-05, "loss": 0.8038, "step": 57110 }, { "epoch": 0.63, "learning_rate": 3.9459687896772676e-05, "loss": 0.6658, "step": 57115 }, { "epoch": 0.63, "learning_rate": 3.945876516963416e-05, "loss": 0.7043, "step": 57120 }, { "epoch": 0.63, "learning_rate": 3.9457842442495645e-05, "loss": 0.7322, "step": 57125 }, { "epoch": 0.63, "learning_rate": 3.945691971535713e-05, "loss": 0.7629, "step": 57130 }, { "epoch": 0.63, "learning_rate": 3.945599698821863e-05, "loss": 0.7102, "step": 57135 }, { "epoch": 0.63, "learning_rate": 3.945507426108011e-05, "loss": 0.7558, "step": 57140 }, { "epoch": 0.63, "learning_rate": 3.9454151533941596e-05, "loss": 0.7563, "step": 57145 }, { "epoch": 0.63, "learning_rate": 3.9453228806803084e-05, "loss": 0.7761, "step": 57150 }, { "epoch": 0.63, "learning_rate": 3.945230607966457e-05, "loss": 0.7443, "step": 57155 }, { "epoch": 0.63, "learning_rate": 3.945138335252606e-05, "loss": 0.7131, "step": 57160 }, { "epoch": 0.63, "learning_rate": 3.945046062538755e-05, "loss": 0.7713, "step": 57165 }, { "epoch": 0.63, "learning_rate": 3.9449537898249035e-05, "loss": 0.7037, "step": 57170 }, { "epoch": 0.63, "learning_rate": 3.944861517111052e-05, "loss": 0.6698, "step": 57175 }, { "epoch": 0.63, "learning_rate": 3.944769244397201e-05, "loss": 0.7721, "step": 57180 }, { "epoch": 0.63, "learning_rate": 3.94467697168335e-05, "loss": 0.6909, "step": 57185 }, { "epoch": 0.63, "learning_rate": 3.9445846989694987e-05, "loss": 0.743, "step": 57190 }, { "epoch": 0.63, "learning_rate": 3.944492426255647e-05, "loss": 0.7329, "step": 57195 }, { "epoch": 0.63, "learning_rate": 3.944400153541796e-05, "loss": 0.6701, "step": 57200 }, { "epoch": 0.63, "learning_rate": 3.944307880827945e-05, "loss": 0.7536, "step": 57205 }, { "epoch": 0.63, "learning_rate": 3.944215608114094e-05, "loss": 0.6976, "step": 57210 }, { "epoch": 0.63, "learning_rate": 3.944123335400242e-05, "loss": 0.7524, "step": 57215 }, { "epoch": 0.63, "learning_rate": 3.9440310626863914e-05, "loss": 0.6981, "step": 57220 }, { "epoch": 0.63, "learning_rate": 3.94393878997254e-05, "loss": 0.7187, "step": 57225 }, { "epoch": 0.63, "learning_rate": 3.943846517258688e-05, "loss": 0.7713, "step": 57230 }, { "epoch": 0.63, "learning_rate": 3.943754244544837e-05, "loss": 0.6809, "step": 57235 }, { "epoch": 0.63, "learning_rate": 3.943661971830986e-05, "loss": 0.7628, "step": 57240 }, { "epoch": 0.63, "learning_rate": 3.943569699117135e-05, "loss": 0.6953, "step": 57245 }, { "epoch": 0.63, "learning_rate": 3.9434774264032834e-05, "loss": 0.7445, "step": 57250 }, { "epoch": 0.63, "learning_rate": 3.943385153689432e-05, "loss": 0.6812, "step": 57255 }, { "epoch": 0.63, "learning_rate": 3.943292880975581e-05, "loss": 0.7926, "step": 57260 }, { "epoch": 0.63, "learning_rate": 3.9432006082617304e-05, "loss": 0.8125, "step": 57265 }, { "epoch": 0.63, "learning_rate": 3.9431083355478785e-05, "loss": 0.7814, "step": 57270 }, { "epoch": 0.63, "learning_rate": 3.943016062834027e-05, "loss": 0.7531, "step": 57275 }, { "epoch": 0.63, "learning_rate": 3.942923790120176e-05, "loss": 0.7676, "step": 57280 }, { "epoch": 0.63, "learning_rate": 3.942831517406325e-05, "loss": 0.7355, "step": 57285 }, { "epoch": 0.63, "learning_rate": 3.942739244692474e-05, "loss": 0.7073, "step": 57290 }, { "epoch": 0.63, "learning_rate": 3.9426469719786225e-05, "loss": 0.7183, "step": 57295 }, { "epoch": 0.63, "learning_rate": 3.942554699264771e-05, "loss": 0.7053, "step": 57300 }, { "epoch": 0.63, "learning_rate": 3.94246242655092e-05, "loss": 0.7047, "step": 57305 }, { "epoch": 0.63, "learning_rate": 3.942370153837069e-05, "loss": 0.7316, "step": 57310 }, { "epoch": 0.63, "learning_rate": 3.9422778811232176e-05, "loss": 0.699, "step": 57315 }, { "epoch": 0.63, "learning_rate": 3.9421856084093664e-05, "loss": 0.7229, "step": 57320 }, { "epoch": 0.63, "learning_rate": 3.9420933356955145e-05, "loss": 0.679, "step": 57325 }, { "epoch": 0.63, "learning_rate": 3.942001062981664e-05, "loss": 0.7452, "step": 57330 }, { "epoch": 0.63, "learning_rate": 3.941908790267813e-05, "loss": 0.6831, "step": 57335 }, { "epoch": 0.63, "learning_rate": 3.9418165175539615e-05, "loss": 0.7045, "step": 57340 }, { "epoch": 0.63, "learning_rate": 3.9417242448401096e-05, "loss": 0.6436, "step": 57345 }, { "epoch": 0.64, "learning_rate": 3.941631972126259e-05, "loss": 0.6955, "step": 57350 }, { "epoch": 0.64, "learning_rate": 3.941539699412408e-05, "loss": 0.7279, "step": 57355 }, { "epoch": 0.64, "learning_rate": 3.941447426698556e-05, "loss": 0.7963, "step": 57360 }, { "epoch": 0.64, "learning_rate": 3.941355153984705e-05, "loss": 0.7281, "step": 57365 }, { "epoch": 0.64, "learning_rate": 3.941262881270854e-05, "loss": 0.6975, "step": 57370 }, { "epoch": 0.64, "learning_rate": 3.941170608557003e-05, "loss": 0.7501, "step": 57375 }, { "epoch": 0.64, "learning_rate": 3.941078335843151e-05, "loss": 0.715, "step": 57380 }, { "epoch": 0.64, "learning_rate": 3.9409860631293e-05, "loss": 0.7055, "step": 57385 }, { "epoch": 0.64, "learning_rate": 3.940893790415449e-05, "loss": 0.7041, "step": 57390 }, { "epoch": 0.64, "learning_rate": 3.9408015177015975e-05, "loss": 0.739, "step": 57395 }, { "epoch": 0.64, "learning_rate": 3.940709244987746e-05, "loss": 0.7246, "step": 57400 }, { "epoch": 0.64, "learning_rate": 3.940616972273895e-05, "loss": 0.7019, "step": 57405 }, { "epoch": 0.64, "learning_rate": 3.940524699560044e-05, "loss": 0.7467, "step": 57410 }, { "epoch": 0.64, "learning_rate": 3.9404324268461926e-05, "loss": 0.7516, "step": 57415 }, { "epoch": 0.64, "learning_rate": 3.9403401541323414e-05, "loss": 0.7934, "step": 57420 }, { "epoch": 0.64, "learning_rate": 3.94024788141849e-05, "loss": 0.6732, "step": 57425 }, { "epoch": 0.64, "learning_rate": 3.940155608704639e-05, "loss": 0.7726, "step": 57430 }, { "epoch": 0.64, "learning_rate": 3.940063335990788e-05, "loss": 0.7011, "step": 57435 }, { "epoch": 0.64, "learning_rate": 3.9399710632769365e-05, "loss": 0.7578, "step": 57440 }, { "epoch": 0.64, "learning_rate": 3.939878790563085e-05, "loss": 0.7813, "step": 57445 }, { "epoch": 0.64, "learning_rate": 3.939786517849234e-05, "loss": 0.7383, "step": 57450 }, { "epoch": 0.64, "learning_rate": 3.939694245135383e-05, "loss": 0.7083, "step": 57455 }, { "epoch": 0.64, "learning_rate": 3.939601972421532e-05, "loss": 0.7643, "step": 57460 }, { "epoch": 0.64, "learning_rate": 3.9395096997076805e-05, "loss": 0.7433, "step": 57465 }, { "epoch": 0.64, "learning_rate": 3.9394174269938286e-05, "loss": 0.79, "step": 57470 }, { "epoch": 0.64, "learning_rate": 3.9393251542799774e-05, "loss": 0.7766, "step": 57475 }, { "epoch": 0.64, "learning_rate": 3.939232881566127e-05, "loss": 0.723, "step": 57480 }, { "epoch": 0.64, "learning_rate": 3.9391406088522756e-05, "loss": 0.7919, "step": 57485 }, { "epoch": 0.64, "learning_rate": 3.939048336138424e-05, "loss": 0.7535, "step": 57490 }, { "epoch": 0.64, "learning_rate": 3.9389560634245725e-05, "loss": 0.7149, "step": 57495 }, { "epoch": 0.64, "learning_rate": 3.938863790710722e-05, "loss": 0.7121, "step": 57500 }, { "epoch": 0.64, "learning_rate": 3.93877151799687e-05, "loss": 0.7405, "step": 57505 }, { "epoch": 0.64, "learning_rate": 3.938679245283019e-05, "loss": 0.6964, "step": 57510 }, { "epoch": 0.64, "learning_rate": 3.9385869725691676e-05, "loss": 0.7352, "step": 57515 }, { "epoch": 0.64, "learning_rate": 3.938494699855317e-05, "loss": 0.7473, "step": 57520 }, { "epoch": 0.64, "learning_rate": 3.938402427141465e-05, "loss": 0.7485, "step": 57525 }, { "epoch": 0.64, "learning_rate": 3.938310154427614e-05, "loss": 0.7409, "step": 57530 }, { "epoch": 0.64, "learning_rate": 3.938217881713763e-05, "loss": 0.7512, "step": 57535 }, { "epoch": 0.64, "learning_rate": 3.9381256089999116e-05, "loss": 0.7441, "step": 57540 }, { "epoch": 0.64, "learning_rate": 3.9380333362860603e-05, "loss": 0.771, "step": 57545 }, { "epoch": 0.64, "learning_rate": 3.937941063572209e-05, "loss": 0.7408, "step": 57550 }, { "epoch": 0.64, "learning_rate": 3.937848790858358e-05, "loss": 0.7187, "step": 57555 }, { "epoch": 0.64, "learning_rate": 3.937756518144507e-05, "loss": 0.7424, "step": 57560 }, { "epoch": 0.64, "learning_rate": 3.9376642454306555e-05, "loss": 0.7337, "step": 57565 }, { "epoch": 0.64, "learning_rate": 3.937571972716804e-05, "loss": 0.7058, "step": 57570 }, { "epoch": 0.64, "learning_rate": 3.937479700002953e-05, "loss": 0.7157, "step": 57575 }, { "epoch": 0.64, "learning_rate": 3.937387427289101e-05, "loss": 0.7218, "step": 57580 }, { "epoch": 0.64, "learning_rate": 3.9372951545752506e-05, "loss": 0.8024, "step": 57585 }, { "epoch": 0.64, "learning_rate": 3.9372028818613994e-05, "loss": 0.6623, "step": 57590 }, { "epoch": 0.64, "learning_rate": 3.937110609147548e-05, "loss": 0.6968, "step": 57595 }, { "epoch": 0.64, "learning_rate": 3.937018336433696e-05, "loss": 0.7458, "step": 57600 }, { "epoch": 0.64, "learning_rate": 3.936926063719846e-05, "loss": 0.8009, "step": 57605 }, { "epoch": 0.64, "learning_rate": 3.9368337910059946e-05, "loss": 0.7296, "step": 57610 }, { "epoch": 0.64, "learning_rate": 3.9367415182921427e-05, "loss": 0.688, "step": 57615 }, { "epoch": 0.64, "learning_rate": 3.9366492455782914e-05, "loss": 0.7335, "step": 57620 }, { "epoch": 0.64, "learning_rate": 3.93655697286444e-05, "loss": 0.7367, "step": 57625 }, { "epoch": 0.64, "learning_rate": 3.93646470015059e-05, "loss": 0.7293, "step": 57630 }, { "epoch": 0.64, "learning_rate": 3.936372427436738e-05, "loss": 0.725, "step": 57635 }, { "epoch": 0.64, "learning_rate": 3.9362801547228866e-05, "loss": 0.7161, "step": 57640 }, { "epoch": 0.64, "learning_rate": 3.9361878820090354e-05, "loss": 0.777, "step": 57645 }, { "epoch": 0.64, "learning_rate": 3.936095609295185e-05, "loss": 0.7445, "step": 57650 }, { "epoch": 0.64, "learning_rate": 3.936003336581333e-05, "loss": 0.6892, "step": 57655 }, { "epoch": 0.64, "learning_rate": 3.935911063867482e-05, "loss": 0.7285, "step": 57660 }, { "epoch": 0.64, "learning_rate": 3.9358187911536305e-05, "loss": 0.6758, "step": 57665 }, { "epoch": 0.64, "learning_rate": 3.935726518439779e-05, "loss": 0.7223, "step": 57670 }, { "epoch": 0.64, "learning_rate": 3.935634245725928e-05, "loss": 0.722, "step": 57675 }, { "epoch": 0.64, "learning_rate": 3.935541973012077e-05, "loss": 0.6843, "step": 57680 }, { "epoch": 0.64, "learning_rate": 3.9354497002982256e-05, "loss": 0.7163, "step": 57685 }, { "epoch": 0.64, "learning_rate": 3.935357427584374e-05, "loss": 0.7732, "step": 57690 }, { "epoch": 0.64, "learning_rate": 3.935265154870523e-05, "loss": 0.7096, "step": 57695 }, { "epoch": 0.64, "learning_rate": 3.935172882156672e-05, "loss": 0.7205, "step": 57700 }, { "epoch": 0.64, "learning_rate": 3.935080609442821e-05, "loss": 0.7069, "step": 57705 }, { "epoch": 0.64, "learning_rate": 3.934988336728969e-05, "loss": 0.6735, "step": 57710 }, { "epoch": 0.64, "learning_rate": 3.9348960640151184e-05, "loss": 0.7229, "step": 57715 }, { "epoch": 0.64, "learning_rate": 3.934803791301267e-05, "loss": 0.728, "step": 57720 }, { "epoch": 0.64, "learning_rate": 3.934711518587416e-05, "loss": 0.7404, "step": 57725 }, { "epoch": 0.64, "learning_rate": 3.934619245873564e-05, "loss": 0.7437, "step": 57730 }, { "epoch": 0.64, "learning_rate": 3.9345269731597135e-05, "loss": 0.708, "step": 57735 }, { "epoch": 0.64, "learning_rate": 3.934434700445862e-05, "loss": 0.7446, "step": 57740 }, { "epoch": 0.64, "learning_rate": 3.9343424277320104e-05, "loss": 0.7326, "step": 57745 }, { "epoch": 0.64, "learning_rate": 3.934250155018159e-05, "loss": 0.7921, "step": 57750 }, { "epoch": 0.64, "learning_rate": 3.9341578823043086e-05, "loss": 0.6948, "step": 57755 }, { "epoch": 0.64, "learning_rate": 3.9340656095904574e-05, "loss": 0.7347, "step": 57760 }, { "epoch": 0.64, "learning_rate": 3.9339733368766055e-05, "loss": 0.7147, "step": 57765 }, { "epoch": 0.64, "learning_rate": 3.933881064162754e-05, "loss": 0.7457, "step": 57770 }, { "epoch": 0.64, "learning_rate": 3.933788791448903e-05, "loss": 0.7026, "step": 57775 }, { "epoch": 0.64, "learning_rate": 3.933696518735052e-05, "loss": 0.7236, "step": 57780 }, { "epoch": 0.64, "learning_rate": 3.933604246021201e-05, "loss": 0.6773, "step": 57785 }, { "epoch": 0.64, "learning_rate": 3.9335119733073495e-05, "loss": 0.7519, "step": 57790 }, { "epoch": 0.64, "learning_rate": 3.933419700593498e-05, "loss": 0.7286, "step": 57795 }, { "epoch": 0.64, "learning_rate": 3.933327427879647e-05, "loss": 0.7198, "step": 57800 }, { "epoch": 0.64, "learning_rate": 3.933235155165796e-05, "loss": 0.7557, "step": 57805 }, { "epoch": 0.64, "learning_rate": 3.9331428824519446e-05, "loss": 0.6793, "step": 57810 }, { "epoch": 0.64, "learning_rate": 3.9330506097380934e-05, "loss": 0.696, "step": 57815 }, { "epoch": 0.64, "learning_rate": 3.932958337024242e-05, "loss": 0.7023, "step": 57820 }, { "epoch": 0.64, "learning_rate": 3.932866064310391e-05, "loss": 0.7706, "step": 57825 }, { "epoch": 0.64, "learning_rate": 3.93277379159654e-05, "loss": 0.7176, "step": 57830 }, { "epoch": 0.64, "learning_rate": 3.9326815188826885e-05, "loss": 0.7834, "step": 57835 }, { "epoch": 0.64, "learning_rate": 3.9325892461688366e-05, "loss": 0.7704, "step": 57840 }, { "epoch": 0.64, "learning_rate": 3.932496973454986e-05, "loss": 0.7673, "step": 57845 }, { "epoch": 0.64, "learning_rate": 3.932404700741135e-05, "loss": 0.7652, "step": 57850 }, { "epoch": 0.64, "learning_rate": 3.932312428027283e-05, "loss": 0.7371, "step": 57855 }, { "epoch": 0.64, "learning_rate": 3.932220155313432e-05, "loss": 0.7475, "step": 57860 }, { "epoch": 0.64, "learning_rate": 3.932127882599581e-05, "loss": 0.7383, "step": 57865 }, { "epoch": 0.64, "learning_rate": 3.93203560988573e-05, "loss": 0.7219, "step": 57870 }, { "epoch": 0.64, "learning_rate": 3.931943337171878e-05, "loss": 0.7174, "step": 57875 }, { "epoch": 0.64, "learning_rate": 3.931851064458027e-05, "loss": 0.7448, "step": 57880 }, { "epoch": 0.64, "learning_rate": 3.9317587917441764e-05, "loss": 0.759, "step": 57885 }, { "epoch": 0.64, "learning_rate": 3.9316665190303245e-05, "loss": 0.6906, "step": 57890 }, { "epoch": 0.64, "learning_rate": 3.931574246316473e-05, "loss": 0.7433, "step": 57895 }, { "epoch": 0.64, "learning_rate": 3.931481973602622e-05, "loss": 0.6953, "step": 57900 }, { "epoch": 0.64, "learning_rate": 3.9313897008887715e-05, "loss": 0.7109, "step": 57905 }, { "epoch": 0.64, "learning_rate": 3.9312974281749196e-05, "loss": 0.678, "step": 57910 }, { "epoch": 0.64, "learning_rate": 3.9312051554610684e-05, "loss": 0.7436, "step": 57915 }, { "epoch": 0.64, "learning_rate": 3.931112882747217e-05, "loss": 0.7091, "step": 57920 }, { "epoch": 0.64, "learning_rate": 3.931020610033366e-05, "loss": 0.7111, "step": 57925 }, { "epoch": 0.64, "learning_rate": 3.930928337319515e-05, "loss": 0.7527, "step": 57930 }, { "epoch": 0.64, "learning_rate": 3.9308360646056635e-05, "loss": 0.7718, "step": 57935 }, { "epoch": 0.64, "learning_rate": 3.930743791891812e-05, "loss": 0.7301, "step": 57940 }, { "epoch": 0.64, "learning_rate": 3.930651519177961e-05, "loss": 0.7171, "step": 57945 }, { "epoch": 0.64, "learning_rate": 3.93055924646411e-05, "loss": 0.6968, "step": 57950 }, { "epoch": 0.64, "learning_rate": 3.930466973750259e-05, "loss": 0.7779, "step": 57955 }, { "epoch": 0.64, "learning_rate": 3.9303747010364075e-05, "loss": 0.7315, "step": 57960 }, { "epoch": 0.64, "learning_rate": 3.9302824283225556e-05, "loss": 0.7485, "step": 57965 }, { "epoch": 0.64, "learning_rate": 3.930190155608705e-05, "loss": 0.7662, "step": 57970 }, { "epoch": 0.64, "learning_rate": 3.930097882894854e-05, "loss": 0.7689, "step": 57975 }, { "epoch": 0.64, "learning_rate": 3.9300056101810026e-05, "loss": 0.7383, "step": 57980 }, { "epoch": 0.64, "learning_rate": 3.929913337467151e-05, "loss": 0.7102, "step": 57985 }, { "epoch": 0.64, "learning_rate": 3.9298210647532995e-05, "loss": 0.7437, "step": 57990 }, { "epoch": 0.64, "learning_rate": 3.929728792039449e-05, "loss": 0.7136, "step": 57995 }, { "epoch": 0.64, "learning_rate": 3.929636519325597e-05, "loss": 0.6474, "step": 58000 }, { "epoch": 0.64, "eval_loss": 0.7129420042037964, "eval_runtime": 69.4601, "eval_samples_per_second": 28.793, "eval_steps_per_second": 14.397, "step": 58000 }, { "epoch": 0.64, "learning_rate": 3.929544246611746e-05, "loss": 0.7565, "step": 58005 }, { "epoch": 0.64, "learning_rate": 3.9294519738978946e-05, "loss": 0.7235, "step": 58010 }, { "epoch": 0.64, "learning_rate": 3.929359701184044e-05, "loss": 0.7534, "step": 58015 }, { "epoch": 0.64, "learning_rate": 3.929267428470192e-05, "loss": 0.7333, "step": 58020 }, { "epoch": 0.64, "learning_rate": 3.929175155756341e-05, "loss": 0.7522, "step": 58025 }, { "epoch": 0.64, "learning_rate": 3.92908288304249e-05, "loss": 0.7332, "step": 58030 }, { "epoch": 0.64, "learning_rate": 3.928990610328639e-05, "loss": 0.6957, "step": 58035 }, { "epoch": 0.64, "learning_rate": 3.9288983376147873e-05, "loss": 0.7451, "step": 58040 }, { "epoch": 0.64, "learning_rate": 3.928806064900936e-05, "loss": 0.7334, "step": 58045 }, { "epoch": 0.64, "learning_rate": 3.928713792187085e-05, "loss": 0.7124, "step": 58050 }, { "epoch": 0.64, "learning_rate": 3.928621519473234e-05, "loss": 0.7373, "step": 58055 }, { "epoch": 0.64, "learning_rate": 3.9285292467593825e-05, "loss": 0.7025, "step": 58060 }, { "epoch": 0.64, "learning_rate": 3.928436974045531e-05, "loss": 0.76, "step": 58065 }, { "epoch": 0.64, "learning_rate": 3.92834470133168e-05, "loss": 0.7479, "step": 58070 }, { "epoch": 0.64, "learning_rate": 3.928252428617828e-05, "loss": 0.7062, "step": 58075 }, { "epoch": 0.64, "learning_rate": 3.9281601559039776e-05, "loss": 0.7585, "step": 58080 }, { "epoch": 0.64, "learning_rate": 3.9280678831901264e-05, "loss": 0.7561, "step": 58085 }, { "epoch": 0.64, "learning_rate": 3.927975610476275e-05, "loss": 0.724, "step": 58090 }, { "epoch": 0.64, "learning_rate": 3.927883337762423e-05, "loss": 0.7193, "step": 58095 }, { "epoch": 0.64, "learning_rate": 3.927791065048573e-05, "loss": 0.6997, "step": 58100 }, { "epoch": 0.64, "learning_rate": 3.9276987923347215e-05, "loss": 0.7502, "step": 58105 }, { "epoch": 0.64, "learning_rate": 3.92760651962087e-05, "loss": 0.6684, "step": 58110 }, { "epoch": 0.64, "learning_rate": 3.9275142469070184e-05, "loss": 0.7002, "step": 58115 }, { "epoch": 0.64, "learning_rate": 3.927421974193168e-05, "loss": 0.7339, "step": 58120 }, { "epoch": 0.64, "learning_rate": 3.927329701479317e-05, "loss": 0.7355, "step": 58125 }, { "epoch": 0.64, "learning_rate": 3.927237428765465e-05, "loss": 0.7665, "step": 58130 }, { "epoch": 0.64, "learning_rate": 3.9271451560516136e-05, "loss": 0.7311, "step": 58135 }, { "epoch": 0.64, "learning_rate": 3.927052883337763e-05, "loss": 0.7094, "step": 58140 }, { "epoch": 0.64, "learning_rate": 3.926960610623912e-05, "loss": 0.7462, "step": 58145 }, { "epoch": 0.64, "learning_rate": 3.92686833791006e-05, "loss": 0.7154, "step": 58150 }, { "epoch": 0.64, "learning_rate": 3.926776065196209e-05, "loss": 0.6883, "step": 58155 }, { "epoch": 0.64, "learning_rate": 3.9266837924823575e-05, "loss": 0.7265, "step": 58160 }, { "epoch": 0.64, "learning_rate": 3.926591519768506e-05, "loss": 0.7271, "step": 58165 }, { "epoch": 0.64, "learning_rate": 3.926499247054655e-05, "loss": 0.7142, "step": 58170 }, { "epoch": 0.64, "learning_rate": 3.926406974340804e-05, "loss": 0.7352, "step": 58175 }, { "epoch": 0.64, "learning_rate": 3.9263147016269526e-05, "loss": 0.6942, "step": 58180 }, { "epoch": 0.64, "learning_rate": 3.9262224289131014e-05, "loss": 0.6703, "step": 58185 }, { "epoch": 0.64, "learning_rate": 3.92613015619925e-05, "loss": 0.7138, "step": 58190 }, { "epoch": 0.64, "learning_rate": 3.926037883485399e-05, "loss": 0.7145, "step": 58195 }, { "epoch": 0.64, "learning_rate": 3.925945610771548e-05, "loss": 0.7039, "step": 58200 }, { "epoch": 0.64, "learning_rate": 3.9258533380576966e-05, "loss": 0.6736, "step": 58205 }, { "epoch": 0.64, "learning_rate": 3.9257610653438453e-05, "loss": 0.7374, "step": 58210 }, { "epoch": 0.64, "learning_rate": 3.925668792629994e-05, "loss": 0.7171, "step": 58215 }, { "epoch": 0.64, "learning_rate": 3.925576519916143e-05, "loss": 0.663, "step": 58220 }, { "epoch": 0.64, "learning_rate": 3.925484247202291e-05, "loss": 0.7442, "step": 58225 }, { "epoch": 0.64, "learning_rate": 3.9253919744884405e-05, "loss": 0.6577, "step": 58230 }, { "epoch": 0.64, "learning_rate": 3.925299701774589e-05, "loss": 0.7664, "step": 58235 }, { "epoch": 0.64, "learning_rate": 3.9252074290607374e-05, "loss": 0.7269, "step": 58240 }, { "epoch": 0.64, "learning_rate": 3.925115156346886e-05, "loss": 0.7425, "step": 58245 }, { "epoch": 0.64, "learning_rate": 3.9250228836330356e-05, "loss": 0.7084, "step": 58250 }, { "epoch": 0.65, "learning_rate": 3.9249306109191844e-05, "loss": 0.6928, "step": 58255 }, { "epoch": 0.65, "learning_rate": 3.9248383382053325e-05, "loss": 0.7282, "step": 58260 }, { "epoch": 0.65, "learning_rate": 3.924746065491481e-05, "loss": 0.719, "step": 58265 }, { "epoch": 0.65, "learning_rate": 3.924653792777631e-05, "loss": 0.6977, "step": 58270 }, { "epoch": 0.65, "learning_rate": 3.924561520063779e-05, "loss": 0.7213, "step": 58275 }, { "epoch": 0.65, "learning_rate": 3.9244692473499277e-05, "loss": 0.6906, "step": 58280 }, { "epoch": 0.65, "learning_rate": 3.9243769746360764e-05, "loss": 0.7311, "step": 58285 }, { "epoch": 0.65, "learning_rate": 3.924284701922226e-05, "loss": 0.7868, "step": 58290 }, { "epoch": 0.65, "learning_rate": 3.924192429208374e-05, "loss": 0.7503, "step": 58295 }, { "epoch": 0.65, "learning_rate": 3.924100156494523e-05, "loss": 0.8075, "step": 58300 }, { "epoch": 0.65, "learning_rate": 3.9240078837806716e-05, "loss": 0.725, "step": 58305 }, { "epoch": 0.65, "learning_rate": 3.9239156110668204e-05, "loss": 0.7121, "step": 58310 }, { "epoch": 0.65, "learning_rate": 3.923823338352969e-05, "loss": 0.7124, "step": 58315 }, { "epoch": 0.65, "learning_rate": 3.923731065639118e-05, "loss": 0.7393, "step": 58320 }, { "epoch": 0.65, "learning_rate": 3.923638792925267e-05, "loss": 0.7638, "step": 58325 }, { "epoch": 0.65, "learning_rate": 3.9235465202114155e-05, "loss": 0.6931, "step": 58330 }, { "epoch": 0.65, "learning_rate": 3.923454247497564e-05, "loss": 0.6764, "step": 58335 }, { "epoch": 0.65, "learning_rate": 3.923361974783713e-05, "loss": 0.731, "step": 58340 }, { "epoch": 0.65, "learning_rate": 3.923269702069862e-05, "loss": 0.8224, "step": 58345 }, { "epoch": 0.65, "learning_rate": 3.92317742935601e-05, "loss": 0.6703, "step": 58350 }, { "epoch": 0.65, "learning_rate": 3.9230851566421594e-05, "loss": 0.6978, "step": 58355 }, { "epoch": 0.65, "learning_rate": 3.922992883928308e-05, "loss": 0.7082, "step": 58360 }, { "epoch": 0.65, "learning_rate": 3.922900611214457e-05, "loss": 0.7334, "step": 58365 }, { "epoch": 0.65, "learning_rate": 3.922808338500605e-05, "loss": 0.7131, "step": 58370 }, { "epoch": 0.65, "learning_rate": 3.922716065786754e-05, "loss": 0.7481, "step": 58375 }, { "epoch": 0.65, "learning_rate": 3.9226237930729034e-05, "loss": 0.7279, "step": 58380 }, { "epoch": 0.65, "learning_rate": 3.922531520359052e-05, "loss": 0.7058, "step": 58385 }, { "epoch": 0.65, "learning_rate": 3.9224392476452e-05, "loss": 0.7377, "step": 58390 }, { "epoch": 0.65, "learning_rate": 3.922346974931349e-05, "loss": 0.7742, "step": 58395 }, { "epoch": 0.65, "learning_rate": 3.9222547022174985e-05, "loss": 0.6622, "step": 58400 }, { "epoch": 0.65, "learning_rate": 3.9221624295036466e-05, "loss": 0.7681, "step": 58405 }, { "epoch": 0.65, "learning_rate": 3.9220701567897954e-05, "loss": 0.7038, "step": 58410 }, { "epoch": 0.65, "learning_rate": 3.921977884075944e-05, "loss": 0.7233, "step": 58415 }, { "epoch": 0.65, "learning_rate": 3.9218856113620936e-05, "loss": 0.7362, "step": 58420 }, { "epoch": 0.65, "learning_rate": 3.921793338648242e-05, "loss": 0.7022, "step": 58425 }, { "epoch": 0.65, "learning_rate": 3.9217010659343905e-05, "loss": 0.7547, "step": 58430 }, { "epoch": 0.65, "learning_rate": 3.921608793220539e-05, "loss": 0.7505, "step": 58435 }, { "epoch": 0.65, "learning_rate": 3.921516520506688e-05, "loss": 0.7818, "step": 58440 }, { "epoch": 0.65, "learning_rate": 3.921424247792837e-05, "loss": 0.8081, "step": 58445 }, { "epoch": 0.65, "learning_rate": 3.921331975078986e-05, "loss": 0.7262, "step": 58450 }, { "epoch": 0.65, "learning_rate": 3.9212397023651345e-05, "loss": 0.771, "step": 58455 }, { "epoch": 0.65, "learning_rate": 3.921147429651283e-05, "loss": 0.714, "step": 58460 }, { "epoch": 0.65, "learning_rate": 3.921055156937432e-05, "loss": 0.682, "step": 58465 }, { "epoch": 0.65, "learning_rate": 3.920962884223581e-05, "loss": 0.6879, "step": 58470 }, { "epoch": 0.65, "learning_rate": 3.9208706115097296e-05, "loss": 0.7074, "step": 58475 }, { "epoch": 0.65, "learning_rate": 3.920778338795878e-05, "loss": 0.7172, "step": 58480 }, { "epoch": 0.65, "learning_rate": 3.920686066082027e-05, "loss": 0.6861, "step": 58485 }, { "epoch": 0.65, "learning_rate": 3.920593793368176e-05, "loss": 0.6896, "step": 58490 }, { "epoch": 0.65, "learning_rate": 3.920501520654325e-05, "loss": 0.6999, "step": 58495 }, { "epoch": 0.65, "learning_rate": 3.920409247940473e-05, "loss": 0.7155, "step": 58500 }, { "epoch": 0.65, "learning_rate": 3.920316975226622e-05, "loss": 0.7658, "step": 58505 }, { "epoch": 0.65, "learning_rate": 3.920224702512771e-05, "loss": 0.7932, "step": 58510 }, { "epoch": 0.65, "learning_rate": 3.920132429798919e-05, "loss": 0.7259, "step": 58515 }, { "epoch": 0.65, "learning_rate": 3.920040157085068e-05, "loss": 0.7464, "step": 58520 }, { "epoch": 0.65, "learning_rate": 3.919947884371217e-05, "loss": 0.6399, "step": 58525 }, { "epoch": 0.65, "learning_rate": 3.919855611657366e-05, "loss": 0.693, "step": 58530 }, { "epoch": 0.65, "learning_rate": 3.919763338943514e-05, "loss": 0.679, "step": 58535 }, { "epoch": 0.65, "learning_rate": 3.919671066229663e-05, "loss": 0.7698, "step": 58540 }, { "epoch": 0.65, "learning_rate": 3.919578793515812e-05, "loss": 0.6517, "step": 58545 }, { "epoch": 0.65, "learning_rate": 3.919486520801961e-05, "loss": 0.7233, "step": 58550 }, { "epoch": 0.65, "learning_rate": 3.9193942480881095e-05, "loss": 0.678, "step": 58555 }, { "epoch": 0.65, "learning_rate": 3.919301975374258e-05, "loss": 0.7586, "step": 58560 }, { "epoch": 0.65, "learning_rate": 3.919209702660407e-05, "loss": 0.6906, "step": 58565 }, { "epoch": 0.65, "learning_rate": 3.919117429946556e-05, "loss": 0.7026, "step": 58570 }, { "epoch": 0.65, "learning_rate": 3.9190251572327046e-05, "loss": 0.7042, "step": 58575 }, { "epoch": 0.65, "learning_rate": 3.9189328845188534e-05, "loss": 0.6723, "step": 58580 }, { "epoch": 0.65, "learning_rate": 3.918840611805002e-05, "loss": 0.7605, "step": 58585 }, { "epoch": 0.65, "learning_rate": 3.918748339091151e-05, "loss": 0.7967, "step": 58590 }, { "epoch": 0.65, "learning_rate": 3.9186560663773e-05, "loss": 0.7325, "step": 58595 }, { "epoch": 0.65, "learning_rate": 3.9185637936634485e-05, "loss": 0.7789, "step": 58600 }, { "epoch": 0.65, "learning_rate": 3.918471520949597e-05, "loss": 0.7053, "step": 58605 }, { "epoch": 0.65, "learning_rate": 3.9183792482357454e-05, "loss": 0.7761, "step": 58610 }, { "epoch": 0.65, "learning_rate": 3.918286975521895e-05, "loss": 0.7488, "step": 58615 }, { "epoch": 0.65, "learning_rate": 3.918194702808044e-05, "loss": 0.6781, "step": 58620 }, { "epoch": 0.65, "learning_rate": 3.918102430094192e-05, "loss": 0.7218, "step": 58625 }, { "epoch": 0.65, "learning_rate": 3.9180101573803406e-05, "loss": 0.7778, "step": 58630 }, { "epoch": 0.65, "learning_rate": 3.91791788466649e-05, "loss": 0.7527, "step": 58635 }, { "epoch": 0.65, "learning_rate": 3.917825611952639e-05, "loss": 0.7006, "step": 58640 }, { "epoch": 0.65, "learning_rate": 3.917733339238787e-05, "loss": 0.777, "step": 58645 }, { "epoch": 0.65, "learning_rate": 3.917641066524936e-05, "loss": 0.6814, "step": 58650 }, { "epoch": 0.65, "learning_rate": 3.917548793811085e-05, "loss": 0.7371, "step": 58655 }, { "epoch": 0.65, "learning_rate": 3.917456521097233e-05, "loss": 0.8178, "step": 58660 }, { "epoch": 0.65, "learning_rate": 3.917364248383382e-05, "loss": 0.691, "step": 58665 }, { "epoch": 0.65, "learning_rate": 3.917271975669531e-05, "loss": 0.7455, "step": 58670 }, { "epoch": 0.65, "learning_rate": 3.9171797029556796e-05, "loss": 0.7929, "step": 58675 }, { "epoch": 0.65, "learning_rate": 3.9170874302418284e-05, "loss": 0.7323, "step": 58680 }, { "epoch": 0.65, "learning_rate": 3.916995157527977e-05, "loss": 0.7485, "step": 58685 }, { "epoch": 0.65, "learning_rate": 3.916902884814126e-05, "loss": 0.7259, "step": 58690 }, { "epoch": 0.65, "learning_rate": 3.916810612100275e-05, "loss": 0.7101, "step": 58695 }, { "epoch": 0.65, "learning_rate": 3.9167183393864236e-05, "loss": 0.7271, "step": 58700 }, { "epoch": 0.65, "learning_rate": 3.9166260666725723e-05, "loss": 0.7148, "step": 58705 }, { "epoch": 0.65, "learning_rate": 3.916533793958721e-05, "loss": 0.7268, "step": 58710 }, { "epoch": 0.65, "learning_rate": 3.91644152124487e-05, "loss": 0.7384, "step": 58715 }, { "epoch": 0.65, "learning_rate": 3.916349248531019e-05, "loss": 0.7342, "step": 58720 }, { "epoch": 0.65, "learning_rate": 3.9162569758171675e-05, "loss": 0.7548, "step": 58725 }, { "epoch": 0.65, "learning_rate": 3.916164703103316e-05, "loss": 0.7598, "step": 58730 }, { "epoch": 0.65, "learning_rate": 3.9160724303894644e-05, "loss": 0.7059, "step": 58735 }, { "epoch": 0.65, "learning_rate": 3.915980157675614e-05, "loss": 0.7106, "step": 58740 }, { "epoch": 0.65, "learning_rate": 3.9158878849617626e-05, "loss": 0.7055, "step": 58745 }, { "epoch": 0.65, "learning_rate": 3.9157956122479114e-05, "loss": 0.7549, "step": 58750 }, { "epoch": 0.65, "learning_rate": 3.9157033395340595e-05, "loss": 0.7243, "step": 58755 }, { "epoch": 0.65, "learning_rate": 3.915611066820208e-05, "loss": 0.7314, "step": 58760 }, { "epoch": 0.65, "learning_rate": 3.915518794106358e-05, "loss": 0.774, "step": 58765 }, { "epoch": 0.65, "learning_rate": 3.9154265213925065e-05, "loss": 0.7006, "step": 58770 }, { "epoch": 0.65, "learning_rate": 3.9153342486786547e-05, "loss": 0.7527, "step": 58775 }, { "epoch": 0.65, "learning_rate": 3.9152419759648034e-05, "loss": 0.681, "step": 58780 }, { "epoch": 0.65, "learning_rate": 3.915149703250953e-05, "loss": 0.735, "step": 58785 }, { "epoch": 0.65, "learning_rate": 3.915057430537101e-05, "loss": 0.7416, "step": 58790 }, { "epoch": 0.65, "learning_rate": 3.91496515782325e-05, "loss": 0.7063, "step": 58795 }, { "epoch": 0.65, "learning_rate": 3.9148728851093986e-05, "loss": 0.7187, "step": 58800 }, { "epoch": 0.65, "learning_rate": 3.914780612395548e-05, "loss": 0.6338, "step": 58805 }, { "epoch": 0.65, "learning_rate": 3.914688339681696e-05, "loss": 0.6951, "step": 58810 }, { "epoch": 0.65, "learning_rate": 3.914596066967845e-05, "loss": 0.7167, "step": 58815 }, { "epoch": 0.65, "learning_rate": 3.914503794253994e-05, "loss": 0.73, "step": 58820 }, { "epoch": 0.65, "learning_rate": 3.9144115215401425e-05, "loss": 0.7095, "step": 58825 }, { "epoch": 0.65, "learning_rate": 3.914319248826291e-05, "loss": 0.681, "step": 58830 }, { "epoch": 0.65, "learning_rate": 3.91422697611244e-05, "loss": 0.7482, "step": 58835 }, { "epoch": 0.65, "learning_rate": 3.914134703398589e-05, "loss": 0.7566, "step": 58840 }, { "epoch": 0.65, "learning_rate": 3.9140424306847376e-05, "loss": 0.6735, "step": 58845 }, { "epoch": 0.65, "learning_rate": 3.9139501579708864e-05, "loss": 0.7528, "step": 58850 }, { "epoch": 0.65, "learning_rate": 3.913857885257035e-05, "loss": 0.6892, "step": 58855 }, { "epoch": 0.65, "learning_rate": 3.913765612543184e-05, "loss": 0.752, "step": 58860 }, { "epoch": 0.65, "learning_rate": 3.913673339829332e-05, "loss": 0.7584, "step": 58865 }, { "epoch": 0.65, "learning_rate": 3.9135810671154816e-05, "loss": 0.7776, "step": 58870 }, { "epoch": 0.65, "learning_rate": 3.9134887944016303e-05, "loss": 0.6869, "step": 58875 }, { "epoch": 0.65, "learning_rate": 3.913396521687779e-05, "loss": 0.7191, "step": 58880 }, { "epoch": 0.65, "learning_rate": 3.913304248973927e-05, "loss": 0.741, "step": 58885 }, { "epoch": 0.65, "learning_rate": 3.913211976260077e-05, "loss": 0.7311, "step": 58890 }, { "epoch": 0.65, "learning_rate": 3.9131197035462255e-05, "loss": 0.7457, "step": 58895 }, { "epoch": 0.65, "learning_rate": 3.9130274308323736e-05, "loss": 0.757, "step": 58900 }, { "epoch": 0.65, "learning_rate": 3.9129351581185224e-05, "loss": 0.714, "step": 58905 }, { "epoch": 0.65, "learning_rate": 3.912842885404671e-05, "loss": 0.7691, "step": 58910 }, { "epoch": 0.65, "learning_rate": 3.9127506126908206e-05, "loss": 0.766, "step": 58915 }, { "epoch": 0.65, "learning_rate": 3.912658339976969e-05, "loss": 0.7122, "step": 58920 }, { "epoch": 0.65, "learning_rate": 3.9125660672631175e-05, "loss": 0.7317, "step": 58925 }, { "epoch": 0.65, "learning_rate": 3.912473794549266e-05, "loss": 0.6471, "step": 58930 }, { "epoch": 0.65, "learning_rate": 3.912381521835415e-05, "loss": 0.7558, "step": 58935 }, { "epoch": 0.65, "learning_rate": 3.912289249121564e-05, "loss": 0.693, "step": 58940 }, { "epoch": 0.65, "learning_rate": 3.9121969764077127e-05, "loss": 0.7027, "step": 58945 }, { "epoch": 0.65, "learning_rate": 3.9121047036938614e-05, "loss": 0.759, "step": 58950 }, { "epoch": 0.65, "learning_rate": 3.91201243098001e-05, "loss": 0.7025, "step": 58955 }, { "epoch": 0.65, "learning_rate": 3.911920158266159e-05, "loss": 0.6977, "step": 58960 }, { "epoch": 0.65, "learning_rate": 3.911827885552308e-05, "loss": 0.8051, "step": 58965 }, { "epoch": 0.65, "learning_rate": 3.9117356128384566e-05, "loss": 0.7428, "step": 58970 }, { "epoch": 0.65, "learning_rate": 3.9116433401246054e-05, "loss": 0.6983, "step": 58975 }, { "epoch": 0.65, "learning_rate": 3.911551067410754e-05, "loss": 0.7425, "step": 58980 }, { "epoch": 0.65, "learning_rate": 3.911458794696903e-05, "loss": 0.768, "step": 58985 }, { "epoch": 0.65, "learning_rate": 3.911366521983052e-05, "loss": 0.725, "step": 58990 }, { "epoch": 0.65, "learning_rate": 3.9112742492692e-05, "loss": 0.8059, "step": 58995 }, { "epoch": 0.65, "learning_rate": 3.911181976555349e-05, "loss": 0.7456, "step": 59000 }, { "epoch": 0.65, "eval_loss": 0.6802049279212952, "eval_runtime": 69.3729, "eval_samples_per_second": 28.83, "eval_steps_per_second": 14.415, "step": 59000 }, { "epoch": 0.65, "learning_rate": 3.911089703841498e-05, "loss": 0.8132, "step": 59005 }, { "epoch": 0.65, "learning_rate": 3.910997431127646e-05, "loss": 0.6589, "step": 59010 }, { "epoch": 0.65, "learning_rate": 3.910905158413795e-05, "loss": 0.7207, "step": 59015 }, { "epoch": 0.65, "learning_rate": 3.9108128856999444e-05, "loss": 0.7781, "step": 59020 }, { "epoch": 0.65, "learning_rate": 3.910720612986093e-05, "loss": 0.6977, "step": 59025 }, { "epoch": 0.65, "learning_rate": 3.910628340272241e-05, "loss": 0.7423, "step": 59030 }, { "epoch": 0.65, "learning_rate": 3.91053606755839e-05, "loss": 0.7155, "step": 59035 }, { "epoch": 0.65, "learning_rate": 3.9104437948445396e-05, "loss": 0.7148, "step": 59040 }, { "epoch": 0.65, "learning_rate": 3.910351522130688e-05, "loss": 0.7891, "step": 59045 }, { "epoch": 0.65, "learning_rate": 3.9102592494168365e-05, "loss": 0.7031, "step": 59050 }, { "epoch": 0.65, "learning_rate": 3.910166976702985e-05, "loss": 0.7109, "step": 59055 }, { "epoch": 0.65, "learning_rate": 3.910074703989134e-05, "loss": 0.7619, "step": 59060 }, { "epoch": 0.65, "learning_rate": 3.909982431275283e-05, "loss": 0.7112, "step": 59065 }, { "epoch": 0.65, "learning_rate": 3.9098901585614316e-05, "loss": 0.7401, "step": 59070 }, { "epoch": 0.65, "learning_rate": 3.9097978858475804e-05, "loss": 0.763, "step": 59075 }, { "epoch": 0.65, "learning_rate": 3.909705613133729e-05, "loss": 0.7015, "step": 59080 }, { "epoch": 0.65, "learning_rate": 3.909613340419878e-05, "loss": 0.7158, "step": 59085 }, { "epoch": 0.65, "learning_rate": 3.909521067706027e-05, "loss": 0.7447, "step": 59090 }, { "epoch": 0.65, "learning_rate": 3.9094287949921755e-05, "loss": 0.787, "step": 59095 }, { "epoch": 0.65, "learning_rate": 3.909336522278324e-05, "loss": 0.7496, "step": 59100 }, { "epoch": 0.65, "learning_rate": 3.909244249564473e-05, "loss": 0.8925, "step": 59105 }, { "epoch": 0.65, "learning_rate": 3.909151976850622e-05, "loss": 0.803, "step": 59110 }, { "epoch": 0.65, "learning_rate": 3.909059704136771e-05, "loss": 0.9613, "step": 59115 }, { "epoch": 0.65, "learning_rate": 3.908967431422919e-05, "loss": 0.8172, "step": 59120 }, { "epoch": 0.65, "learning_rate": 3.908875158709068e-05, "loss": 0.8765, "step": 59125 }, { "epoch": 0.65, "learning_rate": 3.908782885995217e-05, "loss": 0.8435, "step": 59130 }, { "epoch": 0.65, "learning_rate": 3.908690613281366e-05, "loss": 0.8198, "step": 59135 }, { "epoch": 0.65, "learning_rate": 3.908598340567514e-05, "loss": 0.7146, "step": 59140 }, { "epoch": 0.65, "learning_rate": 3.908506067853663e-05, "loss": 0.7276, "step": 59145 }, { "epoch": 0.65, "learning_rate": 3.908413795139812e-05, "loss": 0.6784, "step": 59150 }, { "epoch": 0.66, "learning_rate": 3.908321522425961e-05, "loss": 0.7291, "step": 59155 }, { "epoch": 0.66, "learning_rate": 3.908229249712109e-05, "loss": 0.7883, "step": 59160 }, { "epoch": 0.66, "learning_rate": 3.908136976998258e-05, "loss": 0.7717, "step": 59165 }, { "epoch": 0.66, "learning_rate": 3.908044704284407e-05, "loss": 0.7671, "step": 59170 }, { "epoch": 0.66, "learning_rate": 3.9079524315705554e-05, "loss": 0.742, "step": 59175 }, { "epoch": 0.66, "learning_rate": 3.907860158856704e-05, "loss": 0.7168, "step": 59180 }, { "epoch": 0.66, "learning_rate": 3.907767886142853e-05, "loss": 0.7368, "step": 59185 }, { "epoch": 0.66, "learning_rate": 3.9076756134290024e-05, "loss": 0.6925, "step": 59190 }, { "epoch": 0.66, "learning_rate": 3.9075833407151505e-05, "loss": 0.7919, "step": 59195 }, { "epoch": 0.66, "learning_rate": 3.907491068001299e-05, "loss": 0.7276, "step": 59200 }, { "epoch": 0.66, "learning_rate": 3.907398795287448e-05, "loss": 0.7389, "step": 59205 }, { "epoch": 0.66, "learning_rate": 3.907306522573597e-05, "loss": 0.7698, "step": 59210 }, { "epoch": 0.66, "learning_rate": 3.907214249859746e-05, "loss": 0.7217, "step": 59215 }, { "epoch": 0.66, "learning_rate": 3.9071219771458945e-05, "loss": 0.6561, "step": 59220 }, { "epoch": 0.66, "learning_rate": 3.907029704432043e-05, "loss": 0.7535, "step": 59225 }, { "epoch": 0.66, "learning_rate": 3.906937431718192e-05, "loss": 0.7367, "step": 59230 }, { "epoch": 0.66, "learning_rate": 3.906845159004341e-05, "loss": 0.7695, "step": 59235 }, { "epoch": 0.66, "learning_rate": 3.9067528862904896e-05, "loss": 0.724, "step": 59240 }, { "epoch": 0.66, "learning_rate": 3.9066606135766384e-05, "loss": 0.7193, "step": 59245 }, { "epoch": 0.66, "learning_rate": 3.9065683408627865e-05, "loss": 0.7735, "step": 59250 }, { "epoch": 0.66, "learning_rate": 3.906476068148936e-05, "loss": 0.7445, "step": 59255 }, { "epoch": 0.66, "learning_rate": 3.906383795435085e-05, "loss": 0.7737, "step": 59260 }, { "epoch": 0.66, "learning_rate": 3.9062915227212335e-05, "loss": 0.6993, "step": 59265 }, { "epoch": 0.66, "learning_rate": 3.9061992500073816e-05, "loss": 0.7573, "step": 59270 }, { "epoch": 0.66, "learning_rate": 3.906106977293531e-05, "loss": 0.7247, "step": 59275 }, { "epoch": 0.66, "learning_rate": 3.90601470457968e-05, "loss": 0.729, "step": 59280 }, { "epoch": 0.66, "learning_rate": 3.905922431865828e-05, "loss": 0.6962, "step": 59285 }, { "epoch": 0.66, "learning_rate": 3.905830159151977e-05, "loss": 0.7739, "step": 59290 }, { "epoch": 0.66, "learning_rate": 3.9057378864381256e-05, "loss": 0.7749, "step": 59295 }, { "epoch": 0.66, "learning_rate": 3.905645613724275e-05, "loss": 0.7327, "step": 59300 }, { "epoch": 0.66, "learning_rate": 3.905553341010423e-05, "loss": 0.705, "step": 59305 }, { "epoch": 0.66, "learning_rate": 3.905461068296572e-05, "loss": 0.688, "step": 59310 }, { "epoch": 0.66, "learning_rate": 3.905368795582721e-05, "loss": 0.6776, "step": 59315 }, { "epoch": 0.66, "learning_rate": 3.9052765228688695e-05, "loss": 0.7248, "step": 59320 }, { "epoch": 0.66, "learning_rate": 3.905184250155018e-05, "loss": 0.6879, "step": 59325 }, { "epoch": 0.66, "learning_rate": 3.905091977441167e-05, "loss": 0.7344, "step": 59330 }, { "epoch": 0.66, "learning_rate": 3.904999704727316e-05, "loss": 0.6868, "step": 59335 }, { "epoch": 0.66, "learning_rate": 3.9049074320134646e-05, "loss": 0.6706, "step": 59340 }, { "epoch": 0.66, "learning_rate": 3.9048151592996134e-05, "loss": 0.8096, "step": 59345 }, { "epoch": 0.66, "learning_rate": 3.904722886585762e-05, "loss": 0.7458, "step": 59350 }, { "epoch": 0.66, "learning_rate": 3.904630613871911e-05, "loss": 0.743, "step": 59355 }, { "epoch": 0.66, "learning_rate": 3.904538341158059e-05, "loss": 0.7416, "step": 59360 }, { "epoch": 0.66, "learning_rate": 3.9044460684442086e-05, "loss": 0.7158, "step": 59365 }, { "epoch": 0.66, "learning_rate": 3.9043537957303573e-05, "loss": 0.6924, "step": 59370 }, { "epoch": 0.66, "learning_rate": 3.904261523016506e-05, "loss": 0.7537, "step": 59375 }, { "epoch": 0.66, "learning_rate": 3.904169250302654e-05, "loss": 0.7265, "step": 59380 }, { "epoch": 0.66, "learning_rate": 3.904076977588804e-05, "loss": 0.7331, "step": 59385 }, { "epoch": 0.66, "learning_rate": 3.9039847048749525e-05, "loss": 0.6518, "step": 59390 }, { "epoch": 0.66, "learning_rate": 3.9038924321611006e-05, "loss": 0.7007, "step": 59395 }, { "epoch": 0.66, "learning_rate": 3.9038001594472494e-05, "loss": 0.7375, "step": 59400 }, { "epoch": 0.66, "learning_rate": 3.903707886733399e-05, "loss": 0.7121, "step": 59405 }, { "epoch": 0.66, "learning_rate": 3.9036156140195476e-05, "loss": 0.7047, "step": 59410 }, { "epoch": 0.66, "learning_rate": 3.903523341305696e-05, "loss": 0.7417, "step": 59415 }, { "epoch": 0.66, "learning_rate": 3.9034310685918445e-05, "loss": 0.6972, "step": 59420 }, { "epoch": 0.66, "learning_rate": 3.903338795877994e-05, "loss": 0.7673, "step": 59425 }, { "epoch": 0.66, "learning_rate": 3.903246523164142e-05, "loss": 0.7387, "step": 59430 }, { "epoch": 0.66, "learning_rate": 3.903154250450291e-05, "loss": 0.6988, "step": 59435 }, { "epoch": 0.66, "learning_rate": 3.9030619777364396e-05, "loss": 0.7344, "step": 59440 }, { "epoch": 0.66, "learning_rate": 3.9029697050225884e-05, "loss": 0.7197, "step": 59445 }, { "epoch": 0.66, "learning_rate": 3.902877432308737e-05, "loss": 0.6861, "step": 59450 }, { "epoch": 0.66, "learning_rate": 3.902785159594886e-05, "loss": 0.7186, "step": 59455 }, { "epoch": 0.66, "learning_rate": 3.902692886881035e-05, "loss": 0.7329, "step": 59460 }, { "epoch": 0.66, "learning_rate": 3.9026006141671836e-05, "loss": 0.679, "step": 59465 }, { "epoch": 0.66, "learning_rate": 3.9025083414533324e-05, "loss": 0.733, "step": 59470 }, { "epoch": 0.66, "learning_rate": 3.902416068739481e-05, "loss": 0.7117, "step": 59475 }, { "epoch": 0.66, "learning_rate": 3.90232379602563e-05, "loss": 0.697, "step": 59480 }, { "epoch": 0.66, "learning_rate": 3.902231523311779e-05, "loss": 0.7049, "step": 59485 }, { "epoch": 0.66, "learning_rate": 3.9021392505979275e-05, "loss": 0.7059, "step": 59490 }, { "epoch": 0.66, "learning_rate": 3.902046977884076e-05, "loss": 0.7865, "step": 59495 }, { "epoch": 0.66, "learning_rate": 3.901954705170225e-05, "loss": 0.7536, "step": 59500 }, { "epoch": 0.66, "learning_rate": 3.901862432456373e-05, "loss": 0.7155, "step": 59505 }, { "epoch": 0.66, "learning_rate": 3.901770159742522e-05, "loss": 0.7307, "step": 59510 }, { "epoch": 0.66, "learning_rate": 3.9016778870286714e-05, "loss": 0.7726, "step": 59515 }, { "epoch": 0.66, "learning_rate": 3.90158561431482e-05, "loss": 0.6896, "step": 59520 }, { "epoch": 0.66, "learning_rate": 3.901493341600968e-05, "loss": 0.7144, "step": 59525 }, { "epoch": 0.66, "learning_rate": 3.901401068887117e-05, "loss": 0.718, "step": 59530 }, { "epoch": 0.66, "learning_rate": 3.9013087961732666e-05, "loss": 0.6558, "step": 59535 }, { "epoch": 0.66, "learning_rate": 3.9012165234594153e-05, "loss": 0.742, "step": 59540 }, { "epoch": 0.66, "learning_rate": 3.9011242507455635e-05, "loss": 0.7384, "step": 59545 }, { "epoch": 0.66, "learning_rate": 3.901031978031712e-05, "loss": 0.7269, "step": 59550 }, { "epoch": 0.66, "learning_rate": 3.900939705317862e-05, "loss": 0.722, "step": 59555 }, { "epoch": 0.66, "learning_rate": 3.90084743260401e-05, "loss": 0.698, "step": 59560 }, { "epoch": 0.66, "learning_rate": 3.9007551598901586e-05, "loss": 0.7352, "step": 59565 }, { "epoch": 0.66, "learning_rate": 3.9006628871763074e-05, "loss": 0.7121, "step": 59570 }, { "epoch": 0.66, "learning_rate": 3.900570614462457e-05, "loss": 0.7039, "step": 59575 }, { "epoch": 0.66, "learning_rate": 3.900478341748605e-05, "loss": 0.7176, "step": 59580 }, { "epoch": 0.66, "learning_rate": 3.900386069034754e-05, "loss": 0.7476, "step": 59585 }, { "epoch": 0.66, "learning_rate": 3.9002937963209025e-05, "loss": 0.6947, "step": 59590 }, { "epoch": 0.66, "learning_rate": 3.900201523607051e-05, "loss": 0.7119, "step": 59595 }, { "epoch": 0.66, "learning_rate": 3.9001092508932e-05, "loss": 0.7333, "step": 59600 }, { "epoch": 0.66, "learning_rate": 3.900016978179349e-05, "loss": 0.691, "step": 59605 }, { "epoch": 0.66, "learning_rate": 3.8999247054654977e-05, "loss": 0.6782, "step": 59610 }, { "epoch": 0.66, "learning_rate": 3.8998324327516464e-05, "loss": 0.7037, "step": 59615 }, { "epoch": 0.66, "learning_rate": 3.899740160037795e-05, "loss": 0.7344, "step": 59620 }, { "epoch": 0.66, "learning_rate": 3.899647887323944e-05, "loss": 0.6875, "step": 59625 }, { "epoch": 0.66, "learning_rate": 3.899555614610093e-05, "loss": 0.7415, "step": 59630 }, { "epoch": 0.66, "learning_rate": 3.899463341896241e-05, "loss": 0.7133, "step": 59635 }, { "epoch": 0.66, "learning_rate": 3.8993710691823904e-05, "loss": 0.7748, "step": 59640 }, { "epoch": 0.66, "learning_rate": 3.899278796468539e-05, "loss": 0.7284, "step": 59645 }, { "epoch": 0.66, "learning_rate": 3.899186523754688e-05, "loss": 0.7432, "step": 59650 }, { "epoch": 0.66, "learning_rate": 3.899094251040836e-05, "loss": 0.7354, "step": 59655 }, { "epoch": 0.66, "learning_rate": 3.899001978326985e-05, "loss": 0.742, "step": 59660 }, { "epoch": 0.66, "learning_rate": 3.898909705613134e-05, "loss": 0.7468, "step": 59665 }, { "epoch": 0.66, "learning_rate": 3.8988174328992824e-05, "loss": 0.8016, "step": 59670 }, { "epoch": 0.66, "learning_rate": 3.898725160185431e-05, "loss": 0.7502, "step": 59675 }, { "epoch": 0.66, "learning_rate": 3.89863288747158e-05, "loss": 0.6886, "step": 59680 }, { "epoch": 0.66, "learning_rate": 3.8985406147577294e-05, "loss": 0.7384, "step": 59685 }, { "epoch": 0.66, "learning_rate": 3.8984483420438775e-05, "loss": 0.6363, "step": 59690 }, { "epoch": 0.66, "learning_rate": 3.898356069330026e-05, "loss": 0.7804, "step": 59695 }, { "epoch": 0.66, "learning_rate": 3.898263796616175e-05, "loss": 0.674, "step": 59700 }, { "epoch": 0.66, "learning_rate": 3.898171523902324e-05, "loss": 0.7427, "step": 59705 }, { "epoch": 0.66, "learning_rate": 3.898079251188473e-05, "loss": 0.7125, "step": 59710 }, { "epoch": 0.66, "learning_rate": 3.8979869784746215e-05, "loss": 0.6755, "step": 59715 }, { "epoch": 0.66, "learning_rate": 3.89789470576077e-05, "loss": 0.6383, "step": 59720 }, { "epoch": 0.66, "learning_rate": 3.897802433046919e-05, "loss": 0.7329, "step": 59725 }, { "epoch": 0.66, "learning_rate": 3.897710160333068e-05, "loss": 0.7573, "step": 59730 }, { "epoch": 0.66, "learning_rate": 3.8976178876192166e-05, "loss": 0.7382, "step": 59735 }, { "epoch": 0.66, "learning_rate": 3.8975256149053654e-05, "loss": 0.7601, "step": 59740 }, { "epoch": 0.66, "learning_rate": 3.8974333421915135e-05, "loss": 0.7574, "step": 59745 }, { "epoch": 0.66, "learning_rate": 3.897341069477663e-05, "loss": 0.7428, "step": 59750 }, { "epoch": 0.66, "learning_rate": 3.897248796763812e-05, "loss": 0.7116, "step": 59755 }, { "epoch": 0.66, "learning_rate": 3.8971565240499605e-05, "loss": 0.6987, "step": 59760 }, { "epoch": 0.66, "learning_rate": 3.8970642513361086e-05, "loss": 0.7394, "step": 59765 }, { "epoch": 0.66, "learning_rate": 3.896971978622258e-05, "loss": 0.7355, "step": 59770 }, { "epoch": 0.66, "learning_rate": 3.896879705908407e-05, "loss": 0.6749, "step": 59775 }, { "epoch": 0.66, "learning_rate": 3.896787433194555e-05, "loss": 0.7345, "step": 59780 }, { "epoch": 0.66, "learning_rate": 3.896695160480704e-05, "loss": 0.7562, "step": 59785 }, { "epoch": 0.66, "learning_rate": 3.896602887766853e-05, "loss": 0.7464, "step": 59790 }, { "epoch": 0.66, "learning_rate": 3.896510615053002e-05, "loss": 0.7378, "step": 59795 }, { "epoch": 0.66, "learning_rate": 3.89641834233915e-05, "loss": 0.7103, "step": 59800 }, { "epoch": 0.66, "learning_rate": 3.896326069625299e-05, "loss": 0.7259, "step": 59805 }, { "epoch": 0.66, "learning_rate": 3.896233796911448e-05, "loss": 0.6633, "step": 59810 }, { "epoch": 0.66, "learning_rate": 3.8961415241975965e-05, "loss": 0.7104, "step": 59815 }, { "epoch": 0.66, "learning_rate": 3.896049251483745e-05, "loss": 0.6769, "step": 59820 }, { "epoch": 0.66, "learning_rate": 3.895956978769894e-05, "loss": 0.7651, "step": 59825 }, { "epoch": 0.66, "learning_rate": 3.895864706056043e-05, "loss": 0.6358, "step": 59830 }, { "epoch": 0.66, "learning_rate": 3.8957724333421916e-05, "loss": 0.7172, "step": 59835 }, { "epoch": 0.66, "learning_rate": 3.8956801606283404e-05, "loss": 0.6989, "step": 59840 }, { "epoch": 0.66, "learning_rate": 3.895587887914489e-05, "loss": 0.6358, "step": 59845 }, { "epoch": 0.66, "learning_rate": 3.895495615200638e-05, "loss": 0.7424, "step": 59850 }, { "epoch": 0.66, "learning_rate": 3.895403342486787e-05, "loss": 0.6949, "step": 59855 }, { "epoch": 0.66, "learning_rate": 3.8953110697729355e-05, "loss": 0.7377, "step": 59860 }, { "epoch": 0.66, "learning_rate": 3.895218797059084e-05, "loss": 0.7675, "step": 59865 }, { "epoch": 0.66, "learning_rate": 3.895126524345233e-05, "loss": 0.6948, "step": 59870 }, { "epoch": 0.66, "learning_rate": 3.895034251631382e-05, "loss": 0.7532, "step": 59875 }, { "epoch": 0.66, "learning_rate": 3.894941978917531e-05, "loss": 0.7114, "step": 59880 }, { "epoch": 0.66, "learning_rate": 3.8948497062036795e-05, "loss": 0.6886, "step": 59885 }, { "epoch": 0.66, "learning_rate": 3.8947574334898276e-05, "loss": 0.7343, "step": 59890 }, { "epoch": 0.66, "learning_rate": 3.8946651607759764e-05, "loss": 0.6854, "step": 59895 }, { "epoch": 0.66, "learning_rate": 3.894572888062126e-05, "loss": 0.7246, "step": 59900 }, { "epoch": 0.66, "learning_rate": 3.8944806153482746e-05, "loss": 0.71, "step": 59905 }, { "epoch": 0.66, "learning_rate": 3.894388342634423e-05, "loss": 0.6654, "step": 59910 }, { "epoch": 0.66, "learning_rate": 3.8942960699205715e-05, "loss": 0.7409, "step": 59915 }, { "epoch": 0.66, "learning_rate": 3.894203797206721e-05, "loss": 0.7101, "step": 59920 }, { "epoch": 0.66, "learning_rate": 3.89411152449287e-05, "loss": 0.6905, "step": 59925 }, { "epoch": 0.66, "learning_rate": 3.894019251779018e-05, "loss": 0.7544, "step": 59930 }, { "epoch": 0.66, "learning_rate": 3.8939269790651666e-05, "loss": 0.74, "step": 59935 }, { "epoch": 0.66, "learning_rate": 3.893834706351316e-05, "loss": 0.7797, "step": 59940 }, { "epoch": 0.66, "learning_rate": 3.893742433637464e-05, "loss": 0.7196, "step": 59945 }, { "epoch": 0.66, "learning_rate": 3.893650160923613e-05, "loss": 0.735, "step": 59950 }, { "epoch": 0.66, "learning_rate": 3.893557888209762e-05, "loss": 0.7172, "step": 59955 }, { "epoch": 0.66, "learning_rate": 3.893465615495911e-05, "loss": 0.67, "step": 59960 }, { "epoch": 0.66, "learning_rate": 3.8933733427820594e-05, "loss": 0.7096, "step": 59965 }, { "epoch": 0.66, "learning_rate": 3.893281070068208e-05, "loss": 0.7503, "step": 59970 }, { "epoch": 0.66, "learning_rate": 3.893188797354357e-05, "loss": 0.6815, "step": 59975 }, { "epoch": 0.66, "learning_rate": 3.893096524640506e-05, "loss": 0.68, "step": 59980 }, { "epoch": 0.66, "learning_rate": 3.8930042519266545e-05, "loss": 0.7129, "step": 59985 }, { "epoch": 0.66, "learning_rate": 3.892911979212803e-05, "loss": 0.6792, "step": 59990 }, { "epoch": 0.66, "learning_rate": 3.892819706498952e-05, "loss": 0.7644, "step": 59995 }, { "epoch": 0.66, "learning_rate": 3.892727433785101e-05, "loss": 0.7512, "step": 60000 }, { "epoch": 0.66, "eval_loss": 0.6951414942741394, "eval_runtime": 69.3626, "eval_samples_per_second": 28.834, "eval_steps_per_second": 14.417, "step": 60000 }, { "epoch": 0.66, "learning_rate": 3.8926351610712496e-05, "loss": 0.7484, "step": 60005 }, { "epoch": 0.66, "learning_rate": 3.8925428883573984e-05, "loss": 0.7002, "step": 60010 }, { "epoch": 0.66, "learning_rate": 3.892450615643547e-05, "loss": 0.7452, "step": 60015 }, { "epoch": 0.66, "learning_rate": 3.892358342929695e-05, "loss": 0.7536, "step": 60020 }, { "epoch": 0.66, "learning_rate": 3.892266070215845e-05, "loss": 0.7181, "step": 60025 }, { "epoch": 0.66, "learning_rate": 3.8921737975019936e-05, "loss": 0.7671, "step": 60030 }, { "epoch": 0.66, "learning_rate": 3.892081524788142e-05, "loss": 0.7354, "step": 60035 }, { "epoch": 0.66, "learning_rate": 3.8919892520742904e-05, "loss": 0.7397, "step": 60040 }, { "epoch": 0.66, "learning_rate": 3.891896979360439e-05, "loss": 0.7627, "step": 60045 }, { "epoch": 0.66, "learning_rate": 3.891804706646589e-05, "loss": 0.7576, "step": 60050 }, { "epoch": 0.66, "learning_rate": 3.891712433932737e-05, "loss": 0.7708, "step": 60055 }, { "epoch": 0.67, "learning_rate": 3.8916201612188856e-05, "loss": 0.7146, "step": 60060 }, { "epoch": 0.67, "learning_rate": 3.8915278885050344e-05, "loss": 0.753, "step": 60065 }, { "epoch": 0.67, "learning_rate": 3.891435615791184e-05, "loss": 0.729, "step": 60070 }, { "epoch": 0.67, "learning_rate": 3.891343343077332e-05, "loss": 0.7687, "step": 60075 }, { "epoch": 0.67, "learning_rate": 3.891251070363481e-05, "loss": 0.7585, "step": 60080 }, { "epoch": 0.67, "learning_rate": 3.8911587976496295e-05, "loss": 0.7346, "step": 60085 }, { "epoch": 0.67, "learning_rate": 3.891066524935778e-05, "loss": 0.8067, "step": 60090 }, { "epoch": 0.67, "learning_rate": 3.890974252221927e-05, "loss": 0.7325, "step": 60095 }, { "epoch": 0.67, "learning_rate": 3.890881979508076e-05, "loss": 0.776, "step": 60100 }, { "epoch": 0.67, "learning_rate": 3.8907897067942246e-05, "loss": 0.7297, "step": 60105 }, { "epoch": 0.67, "learning_rate": 3.8906974340803734e-05, "loss": 0.7295, "step": 60110 }, { "epoch": 0.67, "learning_rate": 3.890605161366522e-05, "loss": 0.6636, "step": 60115 }, { "epoch": 0.67, "learning_rate": 3.890512888652671e-05, "loss": 0.7369, "step": 60120 }, { "epoch": 0.67, "learning_rate": 3.89042061593882e-05, "loss": 0.7324, "step": 60125 }, { "epoch": 0.67, "learning_rate": 3.890328343224968e-05, "loss": 0.7428, "step": 60130 }, { "epoch": 0.67, "learning_rate": 3.8902360705111174e-05, "loss": 0.6948, "step": 60135 }, { "epoch": 0.67, "learning_rate": 3.890143797797266e-05, "loss": 0.7059, "step": 60140 }, { "epoch": 0.67, "learning_rate": 3.890051525083415e-05, "loss": 0.6769, "step": 60145 }, { "epoch": 0.67, "learning_rate": 3.889959252369563e-05, "loss": 0.7511, "step": 60150 }, { "epoch": 0.67, "learning_rate": 3.8898669796557125e-05, "loss": 0.745, "step": 60155 }, { "epoch": 0.67, "learning_rate": 3.889774706941861e-05, "loss": 0.6702, "step": 60160 }, { "epoch": 0.67, "learning_rate": 3.8896824342280094e-05, "loss": 0.7764, "step": 60165 }, { "epoch": 0.67, "learning_rate": 3.889590161514158e-05, "loss": 0.71, "step": 60170 }, { "epoch": 0.67, "learning_rate": 3.8894978888003076e-05, "loss": 0.725, "step": 60175 }, { "epoch": 0.67, "learning_rate": 3.8894056160864564e-05, "loss": 0.7232, "step": 60180 }, { "epoch": 0.67, "learning_rate": 3.8893133433726045e-05, "loss": 0.6795, "step": 60185 }, { "epoch": 0.67, "learning_rate": 3.889221070658753e-05, "loss": 0.762, "step": 60190 }, { "epoch": 0.67, "learning_rate": 3.889128797944902e-05, "loss": 0.7332, "step": 60195 }, { "epoch": 0.67, "learning_rate": 3.889036525231051e-05, "loss": 0.7094, "step": 60200 }, { "epoch": 0.67, "learning_rate": 3.8889442525172e-05, "loss": 0.7152, "step": 60205 }, { "epoch": 0.67, "learning_rate": 3.8888519798033485e-05, "loss": 0.728, "step": 60210 }, { "epoch": 0.67, "learning_rate": 3.888759707089497e-05, "loss": 0.7339, "step": 60215 }, { "epoch": 0.67, "learning_rate": 3.888667434375646e-05, "loss": 0.7221, "step": 60220 }, { "epoch": 0.67, "learning_rate": 3.888575161661795e-05, "loss": 0.6493, "step": 60225 }, { "epoch": 0.67, "learning_rate": 3.8884828889479436e-05, "loss": 0.6876, "step": 60230 }, { "epoch": 0.67, "learning_rate": 3.8883906162340924e-05, "loss": 0.7288, "step": 60235 }, { "epoch": 0.67, "learning_rate": 3.888298343520241e-05, "loss": 0.7616, "step": 60240 }, { "epoch": 0.67, "learning_rate": 3.88820607080639e-05, "loss": 0.7171, "step": 60245 }, { "epoch": 0.67, "learning_rate": 3.888113798092539e-05, "loss": 0.7503, "step": 60250 }, { "epoch": 0.67, "learning_rate": 3.8880215253786875e-05, "loss": 0.7586, "step": 60255 }, { "epoch": 0.67, "learning_rate": 3.887929252664836e-05, "loss": 0.7423, "step": 60260 }, { "epoch": 0.67, "learning_rate": 3.887836979950985e-05, "loss": 0.6917, "step": 60265 }, { "epoch": 0.67, "learning_rate": 3.887744707237134e-05, "loss": 0.7158, "step": 60270 }, { "epoch": 0.67, "learning_rate": 3.887652434523282e-05, "loss": 0.7836, "step": 60275 }, { "epoch": 0.67, "learning_rate": 3.887560161809431e-05, "loss": 0.6849, "step": 60280 }, { "epoch": 0.67, "learning_rate": 3.88746788909558e-05, "loss": 0.7487, "step": 60285 }, { "epoch": 0.67, "learning_rate": 3.887375616381729e-05, "loss": 0.7285, "step": 60290 }, { "epoch": 0.67, "learning_rate": 3.887283343667877e-05, "loss": 0.7141, "step": 60295 }, { "epoch": 0.67, "learning_rate": 3.887191070954026e-05, "loss": 0.7223, "step": 60300 }, { "epoch": 0.67, "learning_rate": 3.8870987982401754e-05, "loss": 0.7232, "step": 60305 }, { "epoch": 0.67, "learning_rate": 3.887006525526324e-05, "loss": 0.6719, "step": 60310 }, { "epoch": 0.67, "learning_rate": 3.886914252812472e-05, "loss": 0.6647, "step": 60315 }, { "epoch": 0.67, "learning_rate": 3.886821980098621e-05, "loss": 0.7376, "step": 60320 }, { "epoch": 0.67, "learning_rate": 3.8867297073847705e-05, "loss": 0.7819, "step": 60325 }, { "epoch": 0.67, "learning_rate": 3.8866374346709186e-05, "loss": 0.7168, "step": 60330 }, { "epoch": 0.67, "learning_rate": 3.8865451619570674e-05, "loss": 0.6528, "step": 60335 }, { "epoch": 0.67, "learning_rate": 3.886452889243216e-05, "loss": 0.7111, "step": 60340 }, { "epoch": 0.67, "learning_rate": 3.886360616529365e-05, "loss": 0.6962, "step": 60345 }, { "epoch": 0.67, "learning_rate": 3.886268343815514e-05, "loss": 0.6814, "step": 60350 }, { "epoch": 0.67, "learning_rate": 3.8861760711016625e-05, "loss": 0.6876, "step": 60355 }, { "epoch": 0.67, "learning_rate": 3.886083798387811e-05, "loss": 0.726, "step": 60360 }, { "epoch": 0.67, "learning_rate": 3.88599152567396e-05, "loss": 0.701, "step": 60365 }, { "epoch": 0.67, "learning_rate": 3.885899252960109e-05, "loss": 0.7054, "step": 60370 }, { "epoch": 0.67, "learning_rate": 3.885806980246258e-05, "loss": 0.6868, "step": 60375 }, { "epoch": 0.67, "learning_rate": 3.8857147075324065e-05, "loss": 0.6717, "step": 60380 }, { "epoch": 0.67, "learning_rate": 3.885622434818555e-05, "loss": 0.7095, "step": 60385 }, { "epoch": 0.67, "learning_rate": 3.885530162104704e-05, "loss": 0.7222, "step": 60390 }, { "epoch": 0.67, "learning_rate": 3.885437889390853e-05, "loss": 0.7647, "step": 60395 }, { "epoch": 0.67, "learning_rate": 3.8853456166770016e-05, "loss": 0.7595, "step": 60400 }, { "epoch": 0.67, "learning_rate": 3.88525334396315e-05, "loss": 0.7915, "step": 60405 }, { "epoch": 0.67, "learning_rate": 3.885161071249299e-05, "loss": 0.6901, "step": 60410 }, { "epoch": 0.67, "learning_rate": 3.885068798535448e-05, "loss": 0.7383, "step": 60415 }, { "epoch": 0.67, "learning_rate": 3.884976525821597e-05, "loss": 0.7327, "step": 60420 }, { "epoch": 0.67, "learning_rate": 3.884884253107745e-05, "loss": 0.7313, "step": 60425 }, { "epoch": 0.67, "learning_rate": 3.8847919803938936e-05, "loss": 0.734, "step": 60430 }, { "epoch": 0.67, "learning_rate": 3.884699707680043e-05, "loss": 0.6348, "step": 60435 }, { "epoch": 0.67, "learning_rate": 3.884607434966191e-05, "loss": 0.7244, "step": 60440 }, { "epoch": 0.67, "learning_rate": 3.88451516225234e-05, "loss": 0.6992, "step": 60445 }, { "epoch": 0.67, "learning_rate": 3.884422889538489e-05, "loss": 0.691, "step": 60450 }, { "epoch": 0.67, "learning_rate": 3.884330616824638e-05, "loss": 0.6745, "step": 60455 }, { "epoch": 0.67, "learning_rate": 3.8842383441107863e-05, "loss": 0.676, "step": 60460 }, { "epoch": 0.67, "learning_rate": 3.884146071396935e-05, "loss": 0.6932, "step": 60465 }, { "epoch": 0.67, "learning_rate": 3.884053798683084e-05, "loss": 0.7915, "step": 60470 }, { "epoch": 0.67, "learning_rate": 3.883961525969233e-05, "loss": 0.686, "step": 60475 }, { "epoch": 0.67, "learning_rate": 3.8838692532553815e-05, "loss": 0.7216, "step": 60480 }, { "epoch": 0.67, "learning_rate": 3.88377698054153e-05, "loss": 0.7753, "step": 60485 }, { "epoch": 0.67, "learning_rate": 3.883684707827679e-05, "loss": 0.7495, "step": 60490 }, { "epoch": 0.67, "learning_rate": 3.883592435113828e-05, "loss": 0.7383, "step": 60495 }, { "epoch": 0.67, "learning_rate": 3.8835001623999766e-05, "loss": 0.6829, "step": 60500 }, { "epoch": 0.67, "learning_rate": 3.8834078896861254e-05, "loss": 0.7127, "step": 60505 }, { "epoch": 0.67, "learning_rate": 3.883315616972274e-05, "loss": 0.6841, "step": 60510 }, { "epoch": 0.67, "learning_rate": 3.883223344258422e-05, "loss": 0.6884, "step": 60515 }, { "epoch": 0.67, "learning_rate": 3.883131071544572e-05, "loss": 0.7627, "step": 60520 }, { "epoch": 0.67, "learning_rate": 3.8830387988307205e-05, "loss": 0.6852, "step": 60525 }, { "epoch": 0.67, "learning_rate": 3.882946526116869e-05, "loss": 0.71, "step": 60530 }, { "epoch": 0.67, "learning_rate": 3.8828542534030174e-05, "loss": 0.6967, "step": 60535 }, { "epoch": 0.67, "learning_rate": 3.882761980689167e-05, "loss": 0.7234, "step": 60540 }, { "epoch": 0.67, "learning_rate": 3.882669707975316e-05, "loss": 0.7218, "step": 60545 }, { "epoch": 0.67, "learning_rate": 3.882577435261464e-05, "loss": 0.8032, "step": 60550 }, { "epoch": 0.67, "learning_rate": 3.8824851625476126e-05, "loss": 0.7556, "step": 60555 }, { "epoch": 0.67, "learning_rate": 3.882392889833762e-05, "loss": 0.7406, "step": 60560 }, { "epoch": 0.67, "learning_rate": 3.882300617119911e-05, "loss": 0.7639, "step": 60565 }, { "epoch": 0.67, "learning_rate": 3.882208344406059e-05, "loss": 0.7116, "step": 60570 }, { "epoch": 0.67, "learning_rate": 3.882116071692208e-05, "loss": 0.7489, "step": 60575 }, { "epoch": 0.67, "learning_rate": 3.8820237989783565e-05, "loss": 0.7134, "step": 60580 }, { "epoch": 0.67, "learning_rate": 3.881931526264506e-05, "loss": 0.6806, "step": 60585 }, { "epoch": 0.67, "learning_rate": 3.881839253550654e-05, "loss": 0.7267, "step": 60590 }, { "epoch": 0.67, "learning_rate": 3.881746980836803e-05, "loss": 0.7304, "step": 60595 }, { "epoch": 0.67, "learning_rate": 3.8816547081229516e-05, "loss": 0.7411, "step": 60600 }, { "epoch": 0.67, "learning_rate": 3.8815624354091004e-05, "loss": 0.7464, "step": 60605 }, { "epoch": 0.67, "learning_rate": 3.881470162695249e-05, "loss": 0.6431, "step": 60610 }, { "epoch": 0.67, "learning_rate": 3.881377889981398e-05, "loss": 0.7669, "step": 60615 }, { "epoch": 0.67, "learning_rate": 3.881285617267547e-05, "loss": 0.7389, "step": 60620 }, { "epoch": 0.67, "learning_rate": 3.8811933445536956e-05, "loss": 0.7675, "step": 60625 }, { "epoch": 0.67, "learning_rate": 3.8811010718398443e-05, "loss": 0.6819, "step": 60630 }, { "epoch": 0.67, "learning_rate": 3.881008799125993e-05, "loss": 0.72, "step": 60635 }, { "epoch": 0.67, "learning_rate": 3.880916526412142e-05, "loss": 0.7634, "step": 60640 }, { "epoch": 0.67, "learning_rate": 3.88082425369829e-05, "loss": 0.6859, "step": 60645 }, { "epoch": 0.67, "learning_rate": 3.8807319809844395e-05, "loss": 0.7465, "step": 60650 }, { "epoch": 0.67, "learning_rate": 3.880639708270588e-05, "loss": 0.73, "step": 60655 }, { "epoch": 0.67, "learning_rate": 3.880547435556737e-05, "loss": 0.7233, "step": 60660 }, { "epoch": 0.67, "learning_rate": 3.880455162842885e-05, "loss": 0.7099, "step": 60665 }, { "epoch": 0.67, "learning_rate": 3.8803628901290346e-05, "loss": 0.7105, "step": 60670 }, { "epoch": 0.67, "learning_rate": 3.8802706174151834e-05, "loss": 0.7347, "step": 60675 }, { "epoch": 0.67, "learning_rate": 3.8801783447013315e-05, "loss": 0.7128, "step": 60680 }, { "epoch": 0.67, "learning_rate": 3.88008607198748e-05, "loss": 0.7126, "step": 60685 }, { "epoch": 0.67, "learning_rate": 3.87999379927363e-05, "loss": 0.7265, "step": 60690 }, { "epoch": 0.67, "learning_rate": 3.8799015265597786e-05, "loss": 0.743, "step": 60695 }, { "epoch": 0.67, "learning_rate": 3.8798092538459267e-05, "loss": 0.6741, "step": 60700 }, { "epoch": 0.67, "learning_rate": 3.8797169811320754e-05, "loss": 0.6892, "step": 60705 }, { "epoch": 0.67, "learning_rate": 3.879624708418225e-05, "loss": 0.7564, "step": 60710 }, { "epoch": 0.67, "learning_rate": 3.879532435704373e-05, "loss": 0.7007, "step": 60715 }, { "epoch": 0.67, "learning_rate": 3.879440162990522e-05, "loss": 0.7236, "step": 60720 }, { "epoch": 0.67, "learning_rate": 3.8793478902766706e-05, "loss": 0.674, "step": 60725 }, { "epoch": 0.67, "learning_rate": 3.8792556175628194e-05, "loss": 0.6567, "step": 60730 }, { "epoch": 0.67, "learning_rate": 3.879163344848968e-05, "loss": 0.7243, "step": 60735 }, { "epoch": 0.67, "learning_rate": 3.879071072135117e-05, "loss": 0.7917, "step": 60740 }, { "epoch": 0.67, "learning_rate": 3.878978799421266e-05, "loss": 0.7102, "step": 60745 }, { "epoch": 0.67, "learning_rate": 3.8788865267074145e-05, "loss": 0.735, "step": 60750 }, { "epoch": 0.67, "learning_rate": 3.878794253993563e-05, "loss": 0.7361, "step": 60755 }, { "epoch": 0.67, "learning_rate": 3.878701981279712e-05, "loss": 0.7353, "step": 60760 }, { "epoch": 0.67, "learning_rate": 3.878609708565861e-05, "loss": 0.6973, "step": 60765 }, { "epoch": 0.67, "learning_rate": 3.8785174358520096e-05, "loss": 0.7275, "step": 60770 }, { "epoch": 0.67, "learning_rate": 3.8784251631381584e-05, "loss": 0.7414, "step": 60775 }, { "epoch": 0.67, "learning_rate": 3.878332890424307e-05, "loss": 0.728, "step": 60780 }, { "epoch": 0.67, "learning_rate": 3.878240617710456e-05, "loss": 0.7047, "step": 60785 }, { "epoch": 0.67, "learning_rate": 3.878148344996604e-05, "loss": 0.7027, "step": 60790 }, { "epoch": 0.67, "learning_rate": 3.8780560722827536e-05, "loss": 0.7351, "step": 60795 }, { "epoch": 0.67, "learning_rate": 3.8779637995689024e-05, "loss": 0.7116, "step": 60800 }, { "epoch": 0.67, "learning_rate": 3.877871526855051e-05, "loss": 0.7138, "step": 60805 }, { "epoch": 0.67, "learning_rate": 3.877779254141199e-05, "loss": 0.7482, "step": 60810 }, { "epoch": 0.67, "learning_rate": 3.877686981427348e-05, "loss": 0.6988, "step": 60815 }, { "epoch": 0.67, "learning_rate": 3.8775947087134975e-05, "loss": 0.7009, "step": 60820 }, { "epoch": 0.67, "learning_rate": 3.8775024359996456e-05, "loss": 0.7141, "step": 60825 }, { "epoch": 0.67, "learning_rate": 3.8774101632857944e-05, "loss": 0.7471, "step": 60830 }, { "epoch": 0.67, "learning_rate": 3.877317890571943e-05, "loss": 0.7347, "step": 60835 }, { "epoch": 0.67, "learning_rate": 3.8772256178580926e-05, "loss": 0.7191, "step": 60840 }, { "epoch": 0.67, "learning_rate": 3.877133345144241e-05, "loss": 0.7183, "step": 60845 }, { "epoch": 0.67, "learning_rate": 3.8770410724303895e-05, "loss": 0.7394, "step": 60850 }, { "epoch": 0.67, "learning_rate": 3.876948799716538e-05, "loss": 0.7114, "step": 60855 }, { "epoch": 0.67, "learning_rate": 3.876856527002687e-05, "loss": 0.7349, "step": 60860 }, { "epoch": 0.67, "learning_rate": 3.876764254288836e-05, "loss": 0.6926, "step": 60865 }, { "epoch": 0.67, "learning_rate": 3.876671981574985e-05, "loss": 0.7305, "step": 60870 }, { "epoch": 0.67, "learning_rate": 3.8765797088611335e-05, "loss": 0.7385, "step": 60875 }, { "epoch": 0.67, "learning_rate": 3.876487436147282e-05, "loss": 0.7262, "step": 60880 }, { "epoch": 0.67, "learning_rate": 3.876395163433431e-05, "loss": 0.7246, "step": 60885 }, { "epoch": 0.67, "learning_rate": 3.87630289071958e-05, "loss": 0.7057, "step": 60890 }, { "epoch": 0.67, "learning_rate": 3.8762106180057286e-05, "loss": 0.7485, "step": 60895 }, { "epoch": 0.67, "learning_rate": 3.876118345291877e-05, "loss": 0.6985, "step": 60900 }, { "epoch": 0.67, "learning_rate": 3.876026072578026e-05, "loss": 0.7023, "step": 60905 }, { "epoch": 0.67, "learning_rate": 3.875933799864175e-05, "loss": 0.7514, "step": 60910 }, { "epoch": 0.67, "learning_rate": 3.875841527150324e-05, "loss": 0.7404, "step": 60915 }, { "epoch": 0.67, "learning_rate": 3.875749254436472e-05, "loss": 0.7453, "step": 60920 }, { "epoch": 0.67, "learning_rate": 3.875656981722621e-05, "loss": 0.726, "step": 60925 }, { "epoch": 0.67, "learning_rate": 3.87556470900877e-05, "loss": 0.7338, "step": 60930 }, { "epoch": 0.67, "learning_rate": 3.875472436294918e-05, "loss": 0.755, "step": 60935 }, { "epoch": 0.67, "learning_rate": 3.875380163581067e-05, "loss": 0.6468, "step": 60940 }, { "epoch": 0.67, "learning_rate": 3.8752878908672164e-05, "loss": 0.731, "step": 60945 }, { "epoch": 0.67, "learning_rate": 3.875195618153365e-05, "loss": 0.7599, "step": 60950 }, { "epoch": 0.67, "learning_rate": 3.875103345439513e-05, "loss": 0.7149, "step": 60955 }, { "epoch": 0.67, "learning_rate": 3.875011072725662e-05, "loss": 0.7353, "step": 60960 }, { "epoch": 0.68, "learning_rate": 3.874918800011811e-05, "loss": 0.6853, "step": 60965 }, { "epoch": 0.68, "learning_rate": 3.8748265272979604e-05, "loss": 0.7293, "step": 60970 }, { "epoch": 0.68, "learning_rate": 3.8747342545841085e-05, "loss": 0.6708, "step": 60975 }, { "epoch": 0.68, "learning_rate": 3.874641981870257e-05, "loss": 0.7602, "step": 60980 }, { "epoch": 0.68, "learning_rate": 3.874549709156406e-05, "loss": 0.7575, "step": 60985 }, { "epoch": 0.68, "learning_rate": 3.874457436442555e-05, "loss": 0.7084, "step": 60990 }, { "epoch": 0.68, "learning_rate": 3.8743651637287036e-05, "loss": 0.7072, "step": 60995 }, { "epoch": 0.68, "learning_rate": 3.8742728910148524e-05, "loss": 0.6816, "step": 61000 }, { "epoch": 0.68, "eval_loss": 0.7071796655654907, "eval_runtime": 69.7838, "eval_samples_per_second": 28.66, "eval_steps_per_second": 14.33, "step": 61000 }, { "epoch": 0.68, "learning_rate": 3.874180618301001e-05, "loss": 0.7136, "step": 61005 }, { "epoch": 0.68, "learning_rate": 3.87408834558715e-05, "loss": 0.6919, "step": 61010 }, { "epoch": 0.68, "learning_rate": 3.873996072873299e-05, "loss": 0.768, "step": 61015 }, { "epoch": 0.68, "learning_rate": 3.8739038001594475e-05, "loss": 0.7694, "step": 61020 }, { "epoch": 0.68, "learning_rate": 3.873811527445596e-05, "loss": 0.7661, "step": 61025 }, { "epoch": 0.68, "learning_rate": 3.8737192547317444e-05, "loss": 0.7985, "step": 61030 }, { "epoch": 0.68, "learning_rate": 3.873626982017894e-05, "loss": 0.7582, "step": 61035 }, { "epoch": 0.68, "learning_rate": 3.873534709304043e-05, "loss": 0.7232, "step": 61040 }, { "epoch": 0.68, "learning_rate": 3.8734424365901915e-05, "loss": 0.7311, "step": 61045 }, { "epoch": 0.68, "learning_rate": 3.8733501638763396e-05, "loss": 0.762, "step": 61050 }, { "epoch": 0.68, "learning_rate": 3.873257891162489e-05, "loss": 0.7042, "step": 61055 }, { "epoch": 0.68, "learning_rate": 3.873165618448638e-05, "loss": 0.7079, "step": 61060 }, { "epoch": 0.68, "learning_rate": 3.873073345734786e-05, "loss": 0.7553, "step": 61065 }, { "epoch": 0.68, "learning_rate": 3.872981073020935e-05, "loss": 0.7116, "step": 61070 }, { "epoch": 0.68, "learning_rate": 3.872888800307084e-05, "loss": 0.7307, "step": 61075 }, { "epoch": 0.68, "learning_rate": 3.872796527593233e-05, "loss": 0.6979, "step": 61080 }, { "epoch": 0.68, "learning_rate": 3.872704254879381e-05, "loss": 0.7026, "step": 61085 }, { "epoch": 0.68, "learning_rate": 3.87261198216553e-05, "loss": 0.741, "step": 61090 }, { "epoch": 0.68, "learning_rate": 3.872519709451679e-05, "loss": 0.7168, "step": 61095 }, { "epoch": 0.68, "learning_rate": 3.8724274367378274e-05, "loss": 0.7025, "step": 61100 }, { "epoch": 0.68, "learning_rate": 3.872335164023976e-05, "loss": 0.7167, "step": 61105 }, { "epoch": 0.68, "learning_rate": 3.872242891310125e-05, "loss": 0.7026, "step": 61110 }, { "epoch": 0.68, "learning_rate": 3.872150618596274e-05, "loss": 0.6915, "step": 61115 }, { "epoch": 0.68, "learning_rate": 3.8720583458824226e-05, "loss": 0.7027, "step": 61120 }, { "epoch": 0.68, "learning_rate": 3.8719660731685713e-05, "loss": 0.6581, "step": 61125 }, { "epoch": 0.68, "learning_rate": 3.87187380045472e-05, "loss": 0.7472, "step": 61130 }, { "epoch": 0.68, "learning_rate": 3.871781527740869e-05, "loss": 0.677, "step": 61135 }, { "epoch": 0.68, "learning_rate": 3.871689255027018e-05, "loss": 0.7468, "step": 61140 }, { "epoch": 0.68, "learning_rate": 3.8715969823131665e-05, "loss": 0.6892, "step": 61145 }, { "epoch": 0.68, "learning_rate": 3.871504709599315e-05, "loss": 0.8255, "step": 61150 }, { "epoch": 0.68, "learning_rate": 3.871412436885464e-05, "loss": 0.7277, "step": 61155 }, { "epoch": 0.68, "learning_rate": 3.871320164171613e-05, "loss": 0.7592, "step": 61160 }, { "epoch": 0.68, "learning_rate": 3.8712278914577616e-05, "loss": 0.7256, "step": 61165 }, { "epoch": 0.68, "learning_rate": 3.8711356187439104e-05, "loss": 0.7338, "step": 61170 }, { "epoch": 0.68, "learning_rate": 3.8710433460300585e-05, "loss": 0.7622, "step": 61175 }, { "epoch": 0.68, "learning_rate": 3.870951073316207e-05, "loss": 0.7258, "step": 61180 }, { "epoch": 0.68, "learning_rate": 3.870858800602357e-05, "loss": 0.6765, "step": 61185 }, { "epoch": 0.68, "learning_rate": 3.8707665278885055e-05, "loss": 0.7379, "step": 61190 }, { "epoch": 0.68, "learning_rate": 3.8706742551746537e-05, "loss": 0.7242, "step": 61195 }, { "epoch": 0.68, "learning_rate": 3.8705819824608024e-05, "loss": 0.7081, "step": 61200 }, { "epoch": 0.68, "learning_rate": 3.870489709746952e-05, "loss": 0.7372, "step": 61205 }, { "epoch": 0.68, "learning_rate": 3.8703974370331e-05, "loss": 0.7544, "step": 61210 }, { "epoch": 0.68, "learning_rate": 3.870305164319249e-05, "loss": 0.7173, "step": 61215 }, { "epoch": 0.68, "learning_rate": 3.8702128916053976e-05, "loss": 0.6637, "step": 61220 }, { "epoch": 0.68, "learning_rate": 3.870120618891547e-05, "loss": 0.6977, "step": 61225 }, { "epoch": 0.68, "learning_rate": 3.870028346177695e-05, "loss": 0.7171, "step": 61230 }, { "epoch": 0.68, "learning_rate": 3.869936073463844e-05, "loss": 0.7167, "step": 61235 }, { "epoch": 0.68, "learning_rate": 3.869843800749993e-05, "loss": 0.6451, "step": 61240 }, { "epoch": 0.68, "learning_rate": 3.8697515280361415e-05, "loss": 0.749, "step": 61245 }, { "epoch": 0.68, "learning_rate": 3.86965925532229e-05, "loss": 0.7426, "step": 61250 }, { "epoch": 0.68, "learning_rate": 3.869566982608439e-05, "loss": 0.7486, "step": 61255 }, { "epoch": 0.68, "learning_rate": 3.869474709894588e-05, "loss": 0.741, "step": 61260 }, { "epoch": 0.68, "learning_rate": 3.8693824371807366e-05, "loss": 0.7337, "step": 61265 }, { "epoch": 0.68, "learning_rate": 3.8692901644668854e-05, "loss": 0.7796, "step": 61270 }, { "epoch": 0.68, "learning_rate": 3.869197891753034e-05, "loss": 0.7313, "step": 61275 }, { "epoch": 0.68, "learning_rate": 3.869105619039183e-05, "loss": 0.7672, "step": 61280 }, { "epoch": 0.68, "learning_rate": 3.869013346325331e-05, "loss": 0.6949, "step": 61285 }, { "epoch": 0.68, "learning_rate": 3.8689210736114806e-05, "loss": 0.6792, "step": 61290 }, { "epoch": 0.68, "learning_rate": 3.8688288008976293e-05, "loss": 0.7269, "step": 61295 }, { "epoch": 0.68, "learning_rate": 3.868736528183778e-05, "loss": 0.6877, "step": 61300 }, { "epoch": 0.68, "learning_rate": 3.868644255469926e-05, "loss": 0.7017, "step": 61305 }, { "epoch": 0.68, "learning_rate": 3.868551982756076e-05, "loss": 0.7888, "step": 61310 }, { "epoch": 0.68, "learning_rate": 3.8684597100422245e-05, "loss": 0.7028, "step": 61315 }, { "epoch": 0.68, "learning_rate": 3.8683674373283726e-05, "loss": 0.7404, "step": 61320 }, { "epoch": 0.68, "learning_rate": 3.8682751646145214e-05, "loss": 0.6317, "step": 61325 }, { "epoch": 0.68, "learning_rate": 3.86818289190067e-05, "loss": 0.6935, "step": 61330 }, { "epoch": 0.68, "learning_rate": 3.8680906191868196e-05, "loss": 0.6956, "step": 61335 }, { "epoch": 0.68, "learning_rate": 3.867998346472968e-05, "loss": 0.7105, "step": 61340 }, { "epoch": 0.68, "learning_rate": 3.8679060737591165e-05, "loss": 0.668, "step": 61345 }, { "epoch": 0.68, "learning_rate": 3.867813801045265e-05, "loss": 0.6424, "step": 61350 }, { "epoch": 0.68, "learning_rate": 3.867721528331415e-05, "loss": 0.6876, "step": 61355 }, { "epoch": 0.68, "learning_rate": 3.867629255617563e-05, "loss": 0.7035, "step": 61360 }, { "epoch": 0.68, "learning_rate": 3.8675369829037117e-05, "loss": 0.7325, "step": 61365 }, { "epoch": 0.68, "learning_rate": 3.8674447101898604e-05, "loss": 0.6867, "step": 61370 }, { "epoch": 0.68, "learning_rate": 3.867352437476009e-05, "loss": 0.7394, "step": 61375 }, { "epoch": 0.68, "learning_rate": 3.867260164762158e-05, "loss": 0.7097, "step": 61380 }, { "epoch": 0.68, "learning_rate": 3.867167892048307e-05, "loss": 0.7245, "step": 61385 }, { "epoch": 0.68, "learning_rate": 3.8670756193344556e-05, "loss": 0.7268, "step": 61390 }, { "epoch": 0.68, "learning_rate": 3.8669833466206044e-05, "loss": 0.747, "step": 61395 }, { "epoch": 0.68, "learning_rate": 3.866891073906753e-05, "loss": 0.7104, "step": 61400 }, { "epoch": 0.68, "learning_rate": 3.866798801192902e-05, "loss": 0.767, "step": 61405 }, { "epoch": 0.68, "learning_rate": 3.866706528479051e-05, "loss": 0.7237, "step": 61410 }, { "epoch": 0.68, "learning_rate": 3.866614255765199e-05, "loss": 0.7623, "step": 61415 }, { "epoch": 0.68, "learning_rate": 3.866521983051348e-05, "loss": 0.7279, "step": 61420 }, { "epoch": 0.68, "learning_rate": 3.866429710337497e-05, "loss": 0.7298, "step": 61425 }, { "epoch": 0.68, "learning_rate": 3.866337437623646e-05, "loss": 0.6875, "step": 61430 }, { "epoch": 0.68, "learning_rate": 3.866245164909794e-05, "loss": 0.7567, "step": 61435 }, { "epoch": 0.68, "learning_rate": 3.8661528921959434e-05, "loss": 0.7192, "step": 61440 }, { "epoch": 0.68, "learning_rate": 3.866060619482092e-05, "loss": 0.7543, "step": 61445 }, { "epoch": 0.68, "learning_rate": 3.86596834676824e-05, "loss": 0.6638, "step": 61450 }, { "epoch": 0.68, "learning_rate": 3.865876074054389e-05, "loss": 0.6945, "step": 61455 }, { "epoch": 0.68, "learning_rate": 3.8657838013405386e-05, "loss": 0.7366, "step": 61460 }, { "epoch": 0.68, "learning_rate": 3.8656915286266874e-05, "loss": 0.7745, "step": 61465 }, { "epoch": 0.68, "learning_rate": 3.8655992559128355e-05, "loss": 0.7409, "step": 61470 }, { "epoch": 0.68, "learning_rate": 3.865506983198984e-05, "loss": 0.7203, "step": 61475 }, { "epoch": 0.68, "learning_rate": 3.865414710485133e-05, "loss": 0.6749, "step": 61480 }, { "epoch": 0.68, "learning_rate": 3.865322437771282e-05, "loss": 0.7893, "step": 61485 }, { "epoch": 0.68, "learning_rate": 3.8652301650574306e-05, "loss": 0.7784, "step": 61490 }, { "epoch": 0.68, "learning_rate": 3.8651378923435794e-05, "loss": 0.736, "step": 61495 }, { "epoch": 0.68, "learning_rate": 3.865045619629728e-05, "loss": 0.71, "step": 61500 }, { "epoch": 0.68, "learning_rate": 3.864953346915877e-05, "loss": 0.7264, "step": 61505 }, { "epoch": 0.68, "learning_rate": 3.864861074202026e-05, "loss": 0.7206, "step": 61510 }, { "epoch": 0.68, "learning_rate": 3.8647688014881745e-05, "loss": 0.732, "step": 61515 }, { "epoch": 0.68, "learning_rate": 3.864676528774323e-05, "loss": 0.7164, "step": 61520 }, { "epoch": 0.68, "learning_rate": 3.864584256060472e-05, "loss": 0.6736, "step": 61525 }, { "epoch": 0.68, "learning_rate": 3.864491983346621e-05, "loss": 0.7208, "step": 61530 }, { "epoch": 0.68, "learning_rate": 3.86439971063277e-05, "loss": 0.6961, "step": 61535 }, { "epoch": 0.68, "learning_rate": 3.8643074379189185e-05, "loss": 0.8042, "step": 61540 }, { "epoch": 0.68, "learning_rate": 3.864215165205067e-05, "loss": 0.7032, "step": 61545 }, { "epoch": 0.68, "learning_rate": 3.864122892491216e-05, "loss": 0.8, "step": 61550 }, { "epoch": 0.68, "learning_rate": 3.864030619777365e-05, "loss": 0.6933, "step": 61555 }, { "epoch": 0.68, "learning_rate": 3.863938347063513e-05, "loss": 0.6858, "step": 61560 }, { "epoch": 0.68, "learning_rate": 3.863846074349662e-05, "loss": 0.7458, "step": 61565 }, { "epoch": 0.68, "learning_rate": 3.863753801635811e-05, "loss": 0.6704, "step": 61570 }, { "epoch": 0.68, "learning_rate": 3.86366152892196e-05, "loss": 0.6687, "step": 61575 }, { "epoch": 0.68, "learning_rate": 3.863569256208108e-05, "loss": 0.773, "step": 61580 }, { "epoch": 0.68, "learning_rate": 3.863476983494257e-05, "loss": 0.6931, "step": 61585 }, { "epoch": 0.68, "learning_rate": 3.863384710780406e-05, "loss": 0.7288, "step": 61590 }, { "epoch": 0.68, "learning_rate": 3.8632924380665544e-05, "loss": 0.6955, "step": 61595 }, { "epoch": 0.68, "learning_rate": 3.863200165352703e-05, "loss": 0.7273, "step": 61600 }, { "epoch": 0.68, "learning_rate": 3.863107892638852e-05, "loss": 0.7187, "step": 61605 }, { "epoch": 0.68, "learning_rate": 3.8630156199250014e-05, "loss": 0.7312, "step": 61610 }, { "epoch": 0.68, "learning_rate": 3.8629233472111495e-05, "loss": 0.7232, "step": 61615 }, { "epoch": 0.68, "learning_rate": 3.862831074497298e-05, "loss": 0.771, "step": 61620 }, { "epoch": 0.68, "learning_rate": 3.862738801783447e-05, "loss": 0.7126, "step": 61625 }, { "epoch": 0.68, "learning_rate": 3.862646529069596e-05, "loss": 0.7344, "step": 61630 }, { "epoch": 0.68, "learning_rate": 3.862554256355745e-05, "loss": 0.6929, "step": 61635 }, { "epoch": 0.68, "learning_rate": 3.8624619836418935e-05, "loss": 0.8134, "step": 61640 }, { "epoch": 0.68, "learning_rate": 3.862369710928042e-05, "loss": 0.7476, "step": 61645 }, { "epoch": 0.68, "learning_rate": 3.862277438214191e-05, "loss": 0.6931, "step": 61650 }, { "epoch": 0.68, "learning_rate": 3.86218516550034e-05, "loss": 0.7374, "step": 61655 }, { "epoch": 0.68, "learning_rate": 3.8620928927864886e-05, "loss": 0.7719, "step": 61660 }, { "epoch": 0.68, "learning_rate": 3.8620006200726374e-05, "loss": 0.7094, "step": 61665 }, { "epoch": 0.68, "learning_rate": 3.8619083473587855e-05, "loss": 0.6631, "step": 61670 }, { "epoch": 0.68, "learning_rate": 3.861816074644935e-05, "loss": 0.6927, "step": 61675 }, { "epoch": 0.68, "learning_rate": 3.861723801931084e-05, "loss": 0.7506, "step": 61680 }, { "epoch": 0.68, "learning_rate": 3.8616315292172325e-05, "loss": 0.7739, "step": 61685 }, { "epoch": 0.68, "learning_rate": 3.8615392565033806e-05, "loss": 0.6892, "step": 61690 }, { "epoch": 0.68, "learning_rate": 3.86144698378953e-05, "loss": 0.7407, "step": 61695 }, { "epoch": 0.68, "learning_rate": 3.861354711075679e-05, "loss": 0.6752, "step": 61700 }, { "epoch": 0.68, "learning_rate": 3.861262438361827e-05, "loss": 0.7178, "step": 61705 }, { "epoch": 0.68, "learning_rate": 3.861170165647976e-05, "loss": 0.7921, "step": 61710 }, { "epoch": 0.68, "learning_rate": 3.8610778929341246e-05, "loss": 0.6963, "step": 61715 }, { "epoch": 0.68, "learning_rate": 3.860985620220274e-05, "loss": 0.762, "step": 61720 }, { "epoch": 0.68, "learning_rate": 3.860893347506422e-05, "loss": 0.7248, "step": 61725 }, { "epoch": 0.68, "learning_rate": 3.860801074792571e-05, "loss": 0.7436, "step": 61730 }, { "epoch": 0.68, "learning_rate": 3.86070880207872e-05, "loss": 0.6738, "step": 61735 }, { "epoch": 0.68, "learning_rate": 3.860616529364869e-05, "loss": 0.7222, "step": 61740 }, { "epoch": 0.68, "learning_rate": 3.860524256651017e-05, "loss": 0.7592, "step": 61745 }, { "epoch": 0.68, "learning_rate": 3.860431983937166e-05, "loss": 0.7342, "step": 61750 }, { "epoch": 0.68, "learning_rate": 3.860339711223315e-05, "loss": 0.7723, "step": 61755 }, { "epoch": 0.68, "learning_rate": 3.8602474385094636e-05, "loss": 0.7018, "step": 61760 }, { "epoch": 0.68, "learning_rate": 3.8601551657956124e-05, "loss": 0.6867, "step": 61765 }, { "epoch": 0.68, "learning_rate": 3.860062893081761e-05, "loss": 0.7466, "step": 61770 }, { "epoch": 0.68, "learning_rate": 3.85997062036791e-05, "loss": 0.6878, "step": 61775 }, { "epoch": 0.68, "learning_rate": 3.859878347654059e-05, "loss": 0.6884, "step": 61780 }, { "epoch": 0.68, "learning_rate": 3.8597860749402076e-05, "loss": 0.7111, "step": 61785 }, { "epoch": 0.68, "learning_rate": 3.8596938022263563e-05, "loss": 0.7347, "step": 61790 }, { "epoch": 0.68, "learning_rate": 3.859601529512505e-05, "loss": 0.8086, "step": 61795 }, { "epoch": 0.68, "learning_rate": 3.859509256798653e-05, "loss": 0.6968, "step": 61800 }, { "epoch": 0.68, "learning_rate": 3.859416984084803e-05, "loss": 0.7119, "step": 61805 }, { "epoch": 0.68, "learning_rate": 3.8593247113709515e-05, "loss": 0.7326, "step": 61810 }, { "epoch": 0.68, "learning_rate": 3.8592324386571e-05, "loss": 0.7376, "step": 61815 }, { "epoch": 0.68, "learning_rate": 3.8591401659432484e-05, "loss": 0.7279, "step": 61820 }, { "epoch": 0.68, "learning_rate": 3.859047893229398e-05, "loss": 0.7994, "step": 61825 }, { "epoch": 0.68, "learning_rate": 3.8589556205155466e-05, "loss": 0.7425, "step": 61830 }, { "epoch": 0.68, "learning_rate": 3.858863347801695e-05, "loss": 0.7081, "step": 61835 }, { "epoch": 0.68, "learning_rate": 3.8587710750878435e-05, "loss": 0.7311, "step": 61840 }, { "epoch": 0.68, "learning_rate": 3.858678802373993e-05, "loss": 0.718, "step": 61845 }, { "epoch": 0.68, "learning_rate": 3.858586529660142e-05, "loss": 0.738, "step": 61850 }, { "epoch": 0.68, "learning_rate": 3.85849425694629e-05, "loss": 0.745, "step": 61855 }, { "epoch": 0.68, "learning_rate": 3.8584019842324387e-05, "loss": 0.6967, "step": 61860 }, { "epoch": 0.69, "learning_rate": 3.8583097115185874e-05, "loss": 0.7131, "step": 61865 }, { "epoch": 0.69, "learning_rate": 3.858217438804736e-05, "loss": 0.7807, "step": 61870 }, { "epoch": 0.69, "learning_rate": 3.858125166090885e-05, "loss": 0.7214, "step": 61875 }, { "epoch": 0.69, "learning_rate": 3.858032893377034e-05, "loss": 0.7694, "step": 61880 }, { "epoch": 0.69, "learning_rate": 3.8579406206631826e-05, "loss": 0.7256, "step": 61885 }, { "epoch": 0.69, "learning_rate": 3.8578483479493314e-05, "loss": 0.7315, "step": 61890 }, { "epoch": 0.69, "learning_rate": 3.85775607523548e-05, "loss": 0.7154, "step": 61895 }, { "epoch": 0.69, "learning_rate": 3.857663802521629e-05, "loss": 0.7373, "step": 61900 }, { "epoch": 0.69, "learning_rate": 3.857571529807778e-05, "loss": 0.6923, "step": 61905 }, { "epoch": 0.69, "learning_rate": 3.8574792570939265e-05, "loss": 0.6674, "step": 61910 }, { "epoch": 0.69, "learning_rate": 3.857386984380075e-05, "loss": 0.7152, "step": 61915 }, { "epoch": 0.69, "learning_rate": 3.857294711666224e-05, "loss": 0.732, "step": 61920 }, { "epoch": 0.69, "learning_rate": 3.857202438952373e-05, "loss": 0.7789, "step": 61925 }, { "epoch": 0.69, "learning_rate": 3.8571101662385216e-05, "loss": 0.7592, "step": 61930 }, { "epoch": 0.69, "learning_rate": 3.8570178935246704e-05, "loss": 0.7237, "step": 61935 }, { "epoch": 0.69, "learning_rate": 3.856925620810819e-05, "loss": 0.6842, "step": 61940 }, { "epoch": 0.69, "learning_rate": 3.856833348096967e-05, "loss": 0.7202, "step": 61945 }, { "epoch": 0.69, "learning_rate": 3.856741075383116e-05, "loss": 0.729, "step": 61950 }, { "epoch": 0.69, "learning_rate": 3.8566488026692656e-05, "loss": 0.6924, "step": 61955 }, { "epoch": 0.69, "learning_rate": 3.8565565299554143e-05, "loss": 0.6414, "step": 61960 }, { "epoch": 0.69, "learning_rate": 3.8564642572415625e-05, "loss": 0.7662, "step": 61965 }, { "epoch": 0.69, "learning_rate": 3.856371984527711e-05, "loss": 0.7748, "step": 61970 }, { "epoch": 0.69, "learning_rate": 3.856279711813861e-05, "loss": 0.7489, "step": 61975 }, { "epoch": 0.69, "learning_rate": 3.856187439100009e-05, "loss": 0.7973, "step": 61980 }, { "epoch": 0.69, "learning_rate": 3.8560951663861576e-05, "loss": 0.7665, "step": 61985 }, { "epoch": 0.69, "learning_rate": 3.8560028936723064e-05, "loss": 0.7378, "step": 61990 }, { "epoch": 0.69, "learning_rate": 3.855910620958456e-05, "loss": 0.7407, "step": 61995 }, { "epoch": 0.69, "learning_rate": 3.855818348244604e-05, "loss": 0.7206, "step": 62000 }, { "epoch": 0.69, "eval_loss": 0.6967146992683411, "eval_runtime": 69.8888, "eval_samples_per_second": 28.617, "eval_steps_per_second": 14.308, "step": 62000 }, { "epoch": 0.69, "learning_rate": 3.855726075530753e-05, "loss": 0.7787, "step": 62005 }, { "epoch": 0.69, "learning_rate": 3.8556338028169015e-05, "loss": 0.6829, "step": 62010 }, { "epoch": 0.69, "learning_rate": 3.85554153010305e-05, "loss": 0.6913, "step": 62015 }, { "epoch": 0.69, "learning_rate": 3.855449257389199e-05, "loss": 0.7152, "step": 62020 }, { "epoch": 0.69, "learning_rate": 3.855356984675348e-05, "loss": 0.656, "step": 62025 }, { "epoch": 0.69, "learning_rate": 3.8552647119614967e-05, "loss": 0.729, "step": 62030 }, { "epoch": 0.69, "learning_rate": 3.8551724392476454e-05, "loss": 0.7186, "step": 62035 }, { "epoch": 0.69, "learning_rate": 3.855080166533794e-05, "loss": 0.7173, "step": 62040 }, { "epoch": 0.69, "learning_rate": 3.854987893819943e-05, "loss": 0.7611, "step": 62045 }, { "epoch": 0.69, "learning_rate": 3.854895621106092e-05, "loss": 0.75, "step": 62050 }, { "epoch": 0.69, "learning_rate": 3.85480334839224e-05, "loss": 0.7087, "step": 62055 }, { "epoch": 0.69, "learning_rate": 3.8547110756783894e-05, "loss": 0.7806, "step": 62060 }, { "epoch": 0.69, "learning_rate": 3.854618802964538e-05, "loss": 0.7451, "step": 62065 }, { "epoch": 0.69, "learning_rate": 3.854526530250687e-05, "loss": 0.753, "step": 62070 }, { "epoch": 0.69, "learning_rate": 3.854434257536835e-05, "loss": 0.7709, "step": 62075 }, { "epoch": 0.69, "learning_rate": 3.8543419848229845e-05, "loss": 0.6829, "step": 62080 }, { "epoch": 0.69, "learning_rate": 3.854249712109133e-05, "loss": 0.7224, "step": 62085 }, { "epoch": 0.69, "learning_rate": 3.8541574393952814e-05, "loss": 0.784, "step": 62090 }, { "epoch": 0.69, "learning_rate": 3.85406516668143e-05, "loss": 0.6594, "step": 62095 }, { "epoch": 0.69, "learning_rate": 3.853972893967579e-05, "loss": 0.7503, "step": 62100 }, { "epoch": 0.69, "learning_rate": 3.8538806212537284e-05, "loss": 0.7249, "step": 62105 }, { "epoch": 0.69, "learning_rate": 3.8537883485398765e-05, "loss": 0.7235, "step": 62110 }, { "epoch": 0.69, "learning_rate": 3.853696075826025e-05, "loss": 0.7818, "step": 62115 }, { "epoch": 0.69, "learning_rate": 3.853603803112174e-05, "loss": 0.7388, "step": 62120 }, { "epoch": 0.69, "learning_rate": 3.8535115303983236e-05, "loss": 0.8301, "step": 62125 }, { "epoch": 0.69, "learning_rate": 3.853419257684472e-05, "loss": 0.6565, "step": 62130 }, { "epoch": 0.69, "learning_rate": 3.8533269849706205e-05, "loss": 0.6952, "step": 62135 }, { "epoch": 0.69, "learning_rate": 3.853234712256769e-05, "loss": 0.7046, "step": 62140 }, { "epoch": 0.69, "learning_rate": 3.853142439542918e-05, "loss": 0.709, "step": 62145 }, { "epoch": 0.69, "learning_rate": 3.853050166829067e-05, "loss": 0.7417, "step": 62150 }, { "epoch": 0.69, "learning_rate": 3.8529578941152156e-05, "loss": 0.7214, "step": 62155 }, { "epoch": 0.69, "learning_rate": 3.8528656214013644e-05, "loss": 0.6932, "step": 62160 }, { "epoch": 0.69, "learning_rate": 3.8527733486875125e-05, "loss": 0.7714, "step": 62165 }, { "epoch": 0.69, "learning_rate": 3.852681075973662e-05, "loss": 0.7577, "step": 62170 }, { "epoch": 0.69, "learning_rate": 3.852588803259811e-05, "loss": 0.6745, "step": 62175 }, { "epoch": 0.69, "learning_rate": 3.8524965305459595e-05, "loss": 0.7063, "step": 62180 }, { "epoch": 0.69, "learning_rate": 3.8524042578321076e-05, "loss": 0.7604, "step": 62185 }, { "epoch": 0.69, "learning_rate": 3.852311985118257e-05, "loss": 0.7453, "step": 62190 }, { "epoch": 0.69, "learning_rate": 3.852219712404406e-05, "loss": 0.7227, "step": 62195 }, { "epoch": 0.69, "learning_rate": 3.852127439690555e-05, "loss": 0.755, "step": 62200 }, { "epoch": 0.69, "learning_rate": 3.852035166976703e-05, "loss": 0.757, "step": 62205 }, { "epoch": 0.69, "learning_rate": 3.851942894262852e-05, "loss": 0.6955, "step": 62210 }, { "epoch": 0.69, "learning_rate": 3.851850621549001e-05, "loss": 0.744, "step": 62215 }, { "epoch": 0.69, "learning_rate": 3.851758348835149e-05, "loss": 0.7104, "step": 62220 }, { "epoch": 0.69, "learning_rate": 3.851666076121298e-05, "loss": 0.7531, "step": 62225 }, { "epoch": 0.69, "learning_rate": 3.8515738034074474e-05, "loss": 0.7562, "step": 62230 }, { "epoch": 0.69, "learning_rate": 3.851481530693596e-05, "loss": 0.7824, "step": 62235 }, { "epoch": 0.69, "learning_rate": 3.851389257979744e-05, "loss": 0.7146, "step": 62240 }, { "epoch": 0.69, "learning_rate": 3.851296985265893e-05, "loss": 0.7457, "step": 62245 }, { "epoch": 0.69, "learning_rate": 3.851204712552042e-05, "loss": 0.8109, "step": 62250 }, { "epoch": 0.69, "learning_rate": 3.8511124398381906e-05, "loss": 0.7566, "step": 62255 }, { "epoch": 0.69, "learning_rate": 3.8510201671243394e-05, "loss": 0.7141, "step": 62260 }, { "epoch": 0.69, "learning_rate": 3.850927894410488e-05, "loss": 0.7652, "step": 62265 }, { "epoch": 0.69, "learning_rate": 3.850835621696637e-05, "loss": 0.7434, "step": 62270 }, { "epoch": 0.69, "learning_rate": 3.850743348982786e-05, "loss": 0.7843, "step": 62275 }, { "epoch": 0.69, "learning_rate": 3.8506510762689345e-05, "loss": 0.7144, "step": 62280 }, { "epoch": 0.69, "learning_rate": 3.850558803555083e-05, "loss": 0.7916, "step": 62285 }, { "epoch": 0.69, "learning_rate": 3.850466530841232e-05, "loss": 0.7357, "step": 62290 }, { "epoch": 0.69, "learning_rate": 3.850374258127381e-05, "loss": 0.7238, "step": 62295 }, { "epoch": 0.69, "learning_rate": 3.85028198541353e-05, "loss": 0.7116, "step": 62300 }, { "epoch": 0.69, "learning_rate": 3.8501897126996785e-05, "loss": 0.7511, "step": 62305 }, { "epoch": 0.69, "learning_rate": 3.850097439985827e-05, "loss": 0.7215, "step": 62310 }, { "epoch": 0.69, "learning_rate": 3.8500051672719754e-05, "loss": 0.7137, "step": 62315 }, { "epoch": 0.69, "learning_rate": 3.849912894558125e-05, "loss": 0.6925, "step": 62320 }, { "epoch": 0.69, "learning_rate": 3.8498206218442736e-05, "loss": 0.7275, "step": 62325 }, { "epoch": 0.69, "learning_rate": 3.849728349130422e-05, "loss": 0.7252, "step": 62330 }, { "epoch": 0.69, "learning_rate": 3.8496360764165705e-05, "loss": 0.7104, "step": 62335 }, { "epoch": 0.69, "learning_rate": 3.84954380370272e-05, "loss": 0.7222, "step": 62340 }, { "epoch": 0.69, "learning_rate": 3.849451530988869e-05, "loss": 0.6619, "step": 62345 }, { "epoch": 0.69, "learning_rate": 3.849359258275017e-05, "loss": 0.7567, "step": 62350 }, { "epoch": 0.69, "learning_rate": 3.8492669855611656e-05, "loss": 0.8252, "step": 62355 }, { "epoch": 0.69, "learning_rate": 3.849174712847315e-05, "loss": 0.7639, "step": 62360 }, { "epoch": 0.69, "learning_rate": 3.849082440133463e-05, "loss": 0.7539, "step": 62365 }, { "epoch": 0.69, "learning_rate": 3.848990167419612e-05, "loss": 0.7219, "step": 62370 }, { "epoch": 0.69, "learning_rate": 3.848897894705761e-05, "loss": 0.7662, "step": 62375 }, { "epoch": 0.69, "learning_rate": 3.84880562199191e-05, "loss": 0.7331, "step": 62380 }, { "epoch": 0.69, "learning_rate": 3.8487133492780584e-05, "loss": 0.7442, "step": 62385 }, { "epoch": 0.69, "learning_rate": 3.848621076564207e-05, "loss": 0.7073, "step": 62390 }, { "epoch": 0.69, "learning_rate": 3.848528803850356e-05, "loss": 0.704, "step": 62395 }, { "epoch": 0.69, "learning_rate": 3.848436531136505e-05, "loss": 0.687, "step": 62400 }, { "epoch": 0.69, "learning_rate": 3.8483442584226535e-05, "loss": 0.7125, "step": 62405 }, { "epoch": 0.69, "learning_rate": 3.848251985708802e-05, "loss": 0.7381, "step": 62410 }, { "epoch": 0.69, "learning_rate": 3.848159712994951e-05, "loss": 0.6668, "step": 62415 }, { "epoch": 0.69, "learning_rate": 3.8480674402811e-05, "loss": 0.7156, "step": 62420 }, { "epoch": 0.69, "learning_rate": 3.8479751675672486e-05, "loss": 0.6835, "step": 62425 }, { "epoch": 0.69, "learning_rate": 3.8478828948533974e-05, "loss": 0.6501, "step": 62430 }, { "epoch": 0.69, "learning_rate": 3.847790622139546e-05, "loss": 0.6854, "step": 62435 }, { "epoch": 0.69, "learning_rate": 3.847698349425694e-05, "loss": 0.7072, "step": 62440 }, { "epoch": 0.69, "learning_rate": 3.847606076711844e-05, "loss": 0.7104, "step": 62445 }, { "epoch": 0.69, "learning_rate": 3.8475138039979926e-05, "loss": 0.7369, "step": 62450 }, { "epoch": 0.69, "learning_rate": 3.8474215312841413e-05, "loss": 0.7838, "step": 62455 }, { "epoch": 0.69, "learning_rate": 3.8473292585702894e-05, "loss": 0.7048, "step": 62460 }, { "epoch": 0.69, "learning_rate": 3.847236985856439e-05, "loss": 0.6773, "step": 62465 }, { "epoch": 0.69, "learning_rate": 3.847144713142588e-05, "loss": 0.7519, "step": 62470 }, { "epoch": 0.69, "learning_rate": 3.847052440428736e-05, "loss": 0.6789, "step": 62475 }, { "epoch": 0.69, "learning_rate": 3.8469601677148846e-05, "loss": 0.7491, "step": 62480 }, { "epoch": 0.69, "learning_rate": 3.8468678950010334e-05, "loss": 0.8186, "step": 62485 }, { "epoch": 0.69, "learning_rate": 3.846775622287183e-05, "loss": 0.7295, "step": 62490 }, { "epoch": 0.69, "learning_rate": 3.846683349573331e-05, "loss": 0.7639, "step": 62495 }, { "epoch": 0.69, "learning_rate": 3.84659107685948e-05, "loss": 0.6997, "step": 62500 }, { "epoch": 0.69, "learning_rate": 3.8464988041456285e-05, "loss": 0.7531, "step": 62505 }, { "epoch": 0.69, "learning_rate": 3.846406531431778e-05, "loss": 0.7532, "step": 62510 }, { "epoch": 0.69, "learning_rate": 3.846314258717926e-05, "loss": 0.7509, "step": 62515 }, { "epoch": 0.69, "learning_rate": 3.846221986004075e-05, "loss": 0.7118, "step": 62520 }, { "epoch": 0.69, "learning_rate": 3.8461297132902237e-05, "loss": 0.7731, "step": 62525 }, { "epoch": 0.69, "learning_rate": 3.8460374405763724e-05, "loss": 0.7112, "step": 62530 }, { "epoch": 0.69, "learning_rate": 3.845945167862521e-05, "loss": 0.765, "step": 62535 }, { "epoch": 0.69, "learning_rate": 3.84585289514867e-05, "loss": 0.6941, "step": 62540 }, { "epoch": 0.69, "learning_rate": 3.845760622434819e-05, "loss": 0.706, "step": 62545 }, { "epoch": 0.69, "learning_rate": 3.845668349720967e-05, "loss": 0.7251, "step": 62550 }, { "epoch": 0.69, "learning_rate": 3.8455760770071164e-05, "loss": 0.74, "step": 62555 }, { "epoch": 0.69, "learning_rate": 3.845483804293265e-05, "loss": 0.7245, "step": 62560 }, { "epoch": 0.69, "learning_rate": 3.845391531579414e-05, "loss": 0.7767, "step": 62565 }, { "epoch": 0.69, "learning_rate": 3.845299258865562e-05, "loss": 0.7705, "step": 62570 }, { "epoch": 0.69, "learning_rate": 3.8452069861517115e-05, "loss": 0.7172, "step": 62575 }, { "epoch": 0.69, "learning_rate": 3.84511471343786e-05, "loss": 0.6805, "step": 62580 }, { "epoch": 0.69, "learning_rate": 3.845022440724009e-05, "loss": 0.7466, "step": 62585 }, { "epoch": 0.69, "learning_rate": 3.844930168010157e-05, "loss": 0.7071, "step": 62590 }, { "epoch": 0.69, "learning_rate": 3.8448378952963066e-05, "loss": 0.7668, "step": 62595 }, { "epoch": 0.69, "learning_rate": 3.8447456225824554e-05, "loss": 0.7432, "step": 62600 }, { "epoch": 0.69, "learning_rate": 3.8446533498686035e-05, "loss": 0.7679, "step": 62605 }, { "epoch": 0.69, "learning_rate": 3.844561077154752e-05, "loss": 0.774, "step": 62610 }, { "epoch": 0.69, "learning_rate": 3.844468804440902e-05, "loss": 0.7734, "step": 62615 }, { "epoch": 0.69, "learning_rate": 3.8443765317270506e-05, "loss": 0.7036, "step": 62620 }, { "epoch": 0.69, "learning_rate": 3.844284259013199e-05, "loss": 0.792, "step": 62625 }, { "epoch": 0.69, "learning_rate": 3.8441919862993475e-05, "loss": 0.7335, "step": 62630 }, { "epoch": 0.69, "learning_rate": 3.844099713585496e-05, "loss": 0.7296, "step": 62635 }, { "epoch": 0.69, "learning_rate": 3.844007440871645e-05, "loss": 0.6997, "step": 62640 }, { "epoch": 0.69, "learning_rate": 3.843915168157794e-05, "loss": 0.7064, "step": 62645 }, { "epoch": 0.69, "learning_rate": 3.8438228954439426e-05, "loss": 0.7226, "step": 62650 }, { "epoch": 0.69, "learning_rate": 3.8437306227300914e-05, "loss": 0.753, "step": 62655 }, { "epoch": 0.69, "learning_rate": 3.84363835001624e-05, "loss": 0.736, "step": 62660 }, { "epoch": 0.69, "learning_rate": 3.843546077302389e-05, "loss": 0.7534, "step": 62665 }, { "epoch": 0.69, "learning_rate": 3.843453804588538e-05, "loss": 0.7332, "step": 62670 }, { "epoch": 0.69, "learning_rate": 3.8433615318746865e-05, "loss": 0.7168, "step": 62675 }, { "epoch": 0.69, "learning_rate": 3.843269259160835e-05, "loss": 0.7021, "step": 62680 }, { "epoch": 0.69, "learning_rate": 3.843176986446984e-05, "loss": 0.776, "step": 62685 }, { "epoch": 0.69, "learning_rate": 3.843084713733133e-05, "loss": 0.7043, "step": 62690 }, { "epoch": 0.69, "learning_rate": 3.8429924410192817e-05, "loss": 0.7316, "step": 62695 }, { "epoch": 0.69, "learning_rate": 3.84290016830543e-05, "loss": 0.7326, "step": 62700 }, { "epoch": 0.69, "learning_rate": 3.842807895591579e-05, "loss": 0.8423, "step": 62705 }, { "epoch": 0.69, "learning_rate": 3.842715622877728e-05, "loss": 0.7045, "step": 62710 }, { "epoch": 0.69, "learning_rate": 3.842623350163876e-05, "loss": 0.7231, "step": 62715 }, { "epoch": 0.69, "learning_rate": 3.842531077450025e-05, "loss": 0.7295, "step": 62720 }, { "epoch": 0.69, "learning_rate": 3.8424388047361744e-05, "loss": 0.7617, "step": 62725 }, { "epoch": 0.69, "learning_rate": 3.842346532022323e-05, "loss": 0.7381, "step": 62730 }, { "epoch": 0.69, "learning_rate": 3.842254259308471e-05, "loss": 0.7331, "step": 62735 }, { "epoch": 0.69, "learning_rate": 3.84216198659462e-05, "loss": 0.6832, "step": 62740 }, { "epoch": 0.69, "learning_rate": 3.8420697138807695e-05, "loss": 0.7066, "step": 62745 }, { "epoch": 0.69, "learning_rate": 3.8419774411669176e-05, "loss": 0.6439, "step": 62750 }, { "epoch": 0.69, "learning_rate": 3.8418851684530664e-05, "loss": 0.7311, "step": 62755 }, { "epoch": 0.69, "learning_rate": 3.841792895739215e-05, "loss": 0.7004, "step": 62760 }, { "epoch": 0.69, "learning_rate": 3.8417006230253646e-05, "loss": 0.697, "step": 62765 }, { "epoch": 0.7, "learning_rate": 3.841608350311513e-05, "loss": 0.7373, "step": 62770 }, { "epoch": 0.7, "learning_rate": 3.8415160775976615e-05, "loss": 0.7323, "step": 62775 }, { "epoch": 0.7, "learning_rate": 3.84142380488381e-05, "loss": 0.7562, "step": 62780 }, { "epoch": 0.7, "learning_rate": 3.841331532169959e-05, "loss": 0.7356, "step": 62785 }, { "epoch": 0.7, "learning_rate": 3.841239259456108e-05, "loss": 0.7341, "step": 62790 }, { "epoch": 0.7, "learning_rate": 3.841146986742257e-05, "loss": 0.7531, "step": 62795 }, { "epoch": 0.7, "learning_rate": 3.8410547140284055e-05, "loss": 0.7342, "step": 62800 }, { "epoch": 0.7, "learning_rate": 3.840962441314554e-05, "loss": 0.7608, "step": 62805 }, { "epoch": 0.7, "learning_rate": 3.840870168600703e-05, "loss": 0.7869, "step": 62810 }, { "epoch": 0.7, "learning_rate": 3.840777895886852e-05, "loss": 0.7359, "step": 62815 }, { "epoch": 0.7, "learning_rate": 3.8406856231730006e-05, "loss": 0.7568, "step": 62820 }, { "epoch": 0.7, "learning_rate": 3.840593350459149e-05, "loss": 0.7471, "step": 62825 }, { "epoch": 0.7, "learning_rate": 3.840501077745298e-05, "loss": 0.667, "step": 62830 }, { "epoch": 0.7, "learning_rate": 3.840408805031447e-05, "loss": 0.7009, "step": 62835 }, { "epoch": 0.7, "learning_rate": 3.840316532317596e-05, "loss": 0.6714, "step": 62840 }, { "epoch": 0.7, "learning_rate": 3.840224259603744e-05, "loss": 0.6928, "step": 62845 }, { "epoch": 0.7, "learning_rate": 3.8401319868898926e-05, "loss": 0.6602, "step": 62850 }, { "epoch": 0.7, "learning_rate": 3.840039714176042e-05, "loss": 0.7476, "step": 62855 }, { "epoch": 0.7, "learning_rate": 3.839947441462191e-05, "loss": 0.7135, "step": 62860 }, { "epoch": 0.7, "learning_rate": 3.839855168748339e-05, "loss": 0.7673, "step": 62865 }, { "epoch": 0.7, "learning_rate": 3.839762896034488e-05, "loss": 0.6983, "step": 62870 }, { "epoch": 0.7, "learning_rate": 3.839670623320637e-05, "loss": 0.7171, "step": 62875 }, { "epoch": 0.7, "learning_rate": 3.8395783506067853e-05, "loss": 0.7006, "step": 62880 }, { "epoch": 0.7, "learning_rate": 3.839486077892934e-05, "loss": 0.6975, "step": 62885 }, { "epoch": 0.7, "learning_rate": 3.839393805179083e-05, "loss": 0.709, "step": 62890 }, { "epoch": 0.7, "learning_rate": 3.8393015324652324e-05, "loss": 0.6798, "step": 62895 }, { "epoch": 0.7, "learning_rate": 3.8392092597513805e-05, "loss": 0.7626, "step": 62900 }, { "epoch": 0.7, "learning_rate": 3.839116987037529e-05, "loss": 0.7466, "step": 62905 }, { "epoch": 0.7, "learning_rate": 3.839024714323678e-05, "loss": 0.6959, "step": 62910 }, { "epoch": 0.7, "learning_rate": 3.838932441609827e-05, "loss": 0.6989, "step": 62915 }, { "epoch": 0.7, "learning_rate": 3.8388401688959756e-05, "loss": 0.6949, "step": 62920 }, { "epoch": 0.7, "learning_rate": 3.8387478961821244e-05, "loss": 0.7406, "step": 62925 }, { "epoch": 0.7, "learning_rate": 3.838655623468273e-05, "loss": 0.7041, "step": 62930 }, { "epoch": 0.7, "learning_rate": 3.838563350754422e-05, "loss": 0.7359, "step": 62935 }, { "epoch": 0.7, "learning_rate": 3.838471078040571e-05, "loss": 0.6633, "step": 62940 }, { "epoch": 0.7, "learning_rate": 3.8383788053267195e-05, "loss": 0.7403, "step": 62945 }, { "epoch": 0.7, "learning_rate": 3.838286532612868e-05, "loss": 0.7068, "step": 62950 }, { "epoch": 0.7, "learning_rate": 3.8381942598990164e-05, "loss": 0.6974, "step": 62955 }, { "epoch": 0.7, "learning_rate": 3.838101987185166e-05, "loss": 0.7243, "step": 62960 }, { "epoch": 0.7, "learning_rate": 3.838009714471315e-05, "loss": 0.7134, "step": 62965 }, { "epoch": 0.7, "learning_rate": 3.8379174417574635e-05, "loss": 0.7333, "step": 62970 }, { "epoch": 0.7, "learning_rate": 3.8378251690436116e-05, "loss": 0.7039, "step": 62975 }, { "epoch": 0.7, "learning_rate": 3.837732896329761e-05, "loss": 0.6703, "step": 62980 }, { "epoch": 0.7, "learning_rate": 3.83764062361591e-05, "loss": 0.7147, "step": 62985 }, { "epoch": 0.7, "learning_rate": 3.837548350902058e-05, "loss": 0.7372, "step": 62990 }, { "epoch": 0.7, "learning_rate": 3.837456078188207e-05, "loss": 0.7068, "step": 62995 }, { "epoch": 0.7, "learning_rate": 3.8373638054743555e-05, "loss": 0.6439, "step": 63000 }, { "epoch": 0.7, "eval_loss": 0.6798315644264221, "eval_runtime": 69.7772, "eval_samples_per_second": 28.663, "eval_steps_per_second": 14.331, "step": 63000 }, { "epoch": 0.7, "learning_rate": 3.837271532760505e-05, "loss": 0.7141, "step": 63005 }, { "epoch": 0.7, "learning_rate": 3.837179260046653e-05, "loss": 0.8032, "step": 63010 }, { "epoch": 0.7, "learning_rate": 3.837086987332802e-05, "loss": 0.7384, "step": 63015 }, { "epoch": 0.7, "learning_rate": 3.8369947146189506e-05, "loss": 0.7891, "step": 63020 }, { "epoch": 0.7, "learning_rate": 3.8369024419050994e-05, "loss": 0.7747, "step": 63025 }, { "epoch": 0.7, "learning_rate": 3.836810169191248e-05, "loss": 0.7052, "step": 63030 }, { "epoch": 0.7, "learning_rate": 3.836717896477397e-05, "loss": 0.6919, "step": 63035 }, { "epoch": 0.7, "learning_rate": 3.836625623763546e-05, "loss": 0.75, "step": 63040 }, { "epoch": 0.7, "learning_rate": 3.8365333510496946e-05, "loss": 0.718, "step": 63045 }, { "epoch": 0.7, "learning_rate": 3.8364410783358434e-05, "loss": 0.7596, "step": 63050 }, { "epoch": 0.7, "learning_rate": 3.836348805621992e-05, "loss": 0.7804, "step": 63055 }, { "epoch": 0.7, "learning_rate": 3.836256532908141e-05, "loss": 0.7471, "step": 63060 }, { "epoch": 0.7, "learning_rate": 3.83616426019429e-05, "loss": 0.7344, "step": 63065 }, { "epoch": 0.7, "learning_rate": 3.8360719874804385e-05, "loss": 0.6815, "step": 63070 }, { "epoch": 0.7, "learning_rate": 3.835979714766587e-05, "loss": 0.7206, "step": 63075 }, { "epoch": 0.7, "learning_rate": 3.835887442052736e-05, "loss": 0.7428, "step": 63080 }, { "epoch": 0.7, "learning_rate": 3.835795169338884e-05, "loss": 0.7368, "step": 63085 }, { "epoch": 0.7, "learning_rate": 3.8357028966250336e-05, "loss": 0.7708, "step": 63090 }, { "epoch": 0.7, "learning_rate": 3.8356106239111824e-05, "loss": 0.8101, "step": 63095 }, { "epoch": 0.7, "learning_rate": 3.8355183511973305e-05, "loss": 0.7228, "step": 63100 }, { "epoch": 0.7, "learning_rate": 3.835426078483479e-05, "loss": 0.7118, "step": 63105 }, { "epoch": 0.7, "learning_rate": 3.835333805769629e-05, "loss": 0.7092, "step": 63110 }, { "epoch": 0.7, "learning_rate": 3.8352415330557776e-05, "loss": 0.7655, "step": 63115 }, { "epoch": 0.7, "learning_rate": 3.835149260341926e-05, "loss": 0.7768, "step": 63120 }, { "epoch": 0.7, "learning_rate": 3.8350569876280744e-05, "loss": 0.681, "step": 63125 }, { "epoch": 0.7, "learning_rate": 3.834964714914224e-05, "loss": 0.7589, "step": 63130 }, { "epoch": 0.7, "learning_rate": 3.834872442200372e-05, "loss": 0.7145, "step": 63135 }, { "epoch": 0.7, "learning_rate": 3.834780169486521e-05, "loss": 0.7517, "step": 63140 }, { "epoch": 0.7, "learning_rate": 3.8346878967726696e-05, "loss": 0.6879, "step": 63145 }, { "epoch": 0.7, "learning_rate": 3.8345956240588184e-05, "loss": 0.7324, "step": 63150 }, { "epoch": 0.7, "learning_rate": 3.834503351344967e-05, "loss": 0.7335, "step": 63155 }, { "epoch": 0.7, "learning_rate": 3.834411078631116e-05, "loss": 0.6993, "step": 63160 }, { "epoch": 0.7, "learning_rate": 3.834318805917265e-05, "loss": 0.6872, "step": 63165 }, { "epoch": 0.7, "learning_rate": 3.8342265332034135e-05, "loss": 0.7082, "step": 63170 }, { "epoch": 0.7, "learning_rate": 3.834134260489562e-05, "loss": 0.7036, "step": 63175 }, { "epoch": 0.7, "learning_rate": 3.834041987775711e-05, "loss": 0.7474, "step": 63180 }, { "epoch": 0.7, "learning_rate": 3.83394971506186e-05, "loss": 0.7176, "step": 63185 }, { "epoch": 0.7, "learning_rate": 3.8338574423480086e-05, "loss": 0.6811, "step": 63190 }, { "epoch": 0.7, "learning_rate": 3.8337651696341574e-05, "loss": 0.6973, "step": 63195 }, { "epoch": 0.7, "learning_rate": 3.833672896920306e-05, "loss": 0.7017, "step": 63200 }, { "epoch": 0.7, "learning_rate": 3.833580624206455e-05, "loss": 0.7195, "step": 63205 }, { "epoch": 0.7, "learning_rate": 3.833488351492603e-05, "loss": 0.7065, "step": 63210 }, { "epoch": 0.7, "learning_rate": 3.8333960787787526e-05, "loss": 0.6632, "step": 63215 }, { "epoch": 0.7, "learning_rate": 3.8333038060649014e-05, "loss": 0.6954, "step": 63220 }, { "epoch": 0.7, "learning_rate": 3.83321153335105e-05, "loss": 0.7699, "step": 63225 }, { "epoch": 0.7, "learning_rate": 3.833119260637198e-05, "loss": 0.6758, "step": 63230 }, { "epoch": 0.7, "learning_rate": 3.833026987923347e-05, "loss": 0.7655, "step": 63235 }, { "epoch": 0.7, "learning_rate": 3.8329347152094965e-05, "loss": 0.7372, "step": 63240 }, { "epoch": 0.7, "learning_rate": 3.832842442495645e-05, "loss": 0.7546, "step": 63245 }, { "epoch": 0.7, "learning_rate": 3.8327501697817934e-05, "loss": 0.7244, "step": 63250 }, { "epoch": 0.7, "learning_rate": 3.832657897067942e-05, "loss": 0.683, "step": 63255 }, { "epoch": 0.7, "learning_rate": 3.8325656243540916e-05, "loss": 0.6936, "step": 63260 }, { "epoch": 0.7, "learning_rate": 3.83247335164024e-05, "loss": 0.7159, "step": 63265 }, { "epoch": 0.7, "learning_rate": 3.8323810789263885e-05, "loss": 0.775, "step": 63270 }, { "epoch": 0.7, "learning_rate": 3.832288806212537e-05, "loss": 0.6684, "step": 63275 }, { "epoch": 0.7, "learning_rate": 3.832196533498687e-05, "loss": 0.7482, "step": 63280 }, { "epoch": 0.7, "learning_rate": 3.832104260784835e-05, "loss": 0.7365, "step": 63285 }, { "epoch": 0.7, "learning_rate": 3.832011988070984e-05, "loss": 0.6894, "step": 63290 }, { "epoch": 0.7, "learning_rate": 3.8319197153571325e-05, "loss": 0.7202, "step": 63295 }, { "epoch": 0.7, "learning_rate": 3.831827442643281e-05, "loss": 0.7254, "step": 63300 }, { "epoch": 0.7, "learning_rate": 3.83173516992943e-05, "loss": 0.7694, "step": 63305 }, { "epoch": 0.7, "learning_rate": 3.831642897215579e-05, "loss": 0.7344, "step": 63310 }, { "epoch": 0.7, "learning_rate": 3.8315506245017276e-05, "loss": 0.7405, "step": 63315 }, { "epoch": 0.7, "learning_rate": 3.8314583517878764e-05, "loss": 0.6881, "step": 63320 }, { "epoch": 0.7, "learning_rate": 3.831366079074025e-05, "loss": 0.7131, "step": 63325 }, { "epoch": 0.7, "learning_rate": 3.831273806360174e-05, "loss": 0.7036, "step": 63330 }, { "epoch": 0.7, "learning_rate": 3.831181533646323e-05, "loss": 0.747, "step": 63335 }, { "epoch": 0.7, "learning_rate": 3.831089260932471e-05, "loss": 0.7101, "step": 63340 }, { "epoch": 0.7, "learning_rate": 3.83099698821862e-05, "loss": 0.7476, "step": 63345 }, { "epoch": 0.7, "learning_rate": 3.830904715504769e-05, "loss": 0.7303, "step": 63350 }, { "epoch": 0.7, "learning_rate": 3.830812442790918e-05, "loss": 0.6428, "step": 63355 }, { "epoch": 0.7, "learning_rate": 3.830720170077066e-05, "loss": 0.7247, "step": 63360 }, { "epoch": 0.7, "learning_rate": 3.8306278973632154e-05, "loss": 0.7055, "step": 63365 }, { "epoch": 0.7, "learning_rate": 3.830535624649364e-05, "loss": 0.6751, "step": 63370 }, { "epoch": 0.7, "learning_rate": 3.830443351935512e-05, "loss": 0.7787, "step": 63375 }, { "epoch": 0.7, "learning_rate": 3.830351079221661e-05, "loss": 0.6825, "step": 63380 }, { "epoch": 0.7, "learning_rate": 3.83025880650781e-05, "loss": 0.7387, "step": 63385 }, { "epoch": 0.7, "learning_rate": 3.8301665337939594e-05, "loss": 0.678, "step": 63390 }, { "epoch": 0.7, "learning_rate": 3.8300742610801075e-05, "loss": 0.7221, "step": 63395 }, { "epoch": 0.7, "learning_rate": 3.829981988366256e-05, "loss": 0.735, "step": 63400 }, { "epoch": 0.7, "learning_rate": 3.829889715652405e-05, "loss": 0.6647, "step": 63405 }, { "epoch": 0.7, "learning_rate": 3.829797442938554e-05, "loss": 0.7654, "step": 63410 }, { "epoch": 0.7, "learning_rate": 3.8297051702247026e-05, "loss": 0.75, "step": 63415 }, { "epoch": 0.7, "learning_rate": 3.8296128975108514e-05, "loss": 0.7106, "step": 63420 }, { "epoch": 0.7, "learning_rate": 3.829520624797e-05, "loss": 0.667, "step": 63425 }, { "epoch": 0.7, "learning_rate": 3.829428352083149e-05, "loss": 0.6991, "step": 63430 }, { "epoch": 0.7, "learning_rate": 3.829336079369298e-05, "loss": 0.7158, "step": 63435 }, { "epoch": 0.7, "learning_rate": 3.8292438066554465e-05, "loss": 0.6978, "step": 63440 }, { "epoch": 0.7, "learning_rate": 3.829151533941595e-05, "loss": 0.706, "step": 63445 }, { "epoch": 0.7, "learning_rate": 3.829059261227744e-05, "loss": 0.7087, "step": 63450 }, { "epoch": 0.7, "learning_rate": 3.828966988513893e-05, "loss": 0.7672, "step": 63455 }, { "epoch": 0.7, "learning_rate": 3.828874715800042e-05, "loss": 0.7174, "step": 63460 }, { "epoch": 0.7, "learning_rate": 3.8287824430861905e-05, "loss": 0.7504, "step": 63465 }, { "epoch": 0.7, "learning_rate": 3.8286901703723386e-05, "loss": 0.7399, "step": 63470 }, { "epoch": 0.7, "learning_rate": 3.828597897658488e-05, "loss": 0.743, "step": 63475 }, { "epoch": 0.7, "learning_rate": 3.828505624944637e-05, "loss": 0.7634, "step": 63480 }, { "epoch": 0.7, "learning_rate": 3.828413352230785e-05, "loss": 0.8386, "step": 63485 }, { "epoch": 0.7, "learning_rate": 3.828321079516934e-05, "loss": 0.7716, "step": 63490 }, { "epoch": 0.7, "learning_rate": 3.828228806803083e-05, "loss": 0.72, "step": 63495 }, { "epoch": 0.7, "learning_rate": 3.828136534089232e-05, "loss": 0.7296, "step": 63500 }, { "epoch": 0.7, "learning_rate": 3.82804426137538e-05, "loss": 0.7175, "step": 63505 }, { "epoch": 0.7, "learning_rate": 3.827951988661529e-05, "loss": 0.7556, "step": 63510 }, { "epoch": 0.7, "learning_rate": 3.827859715947678e-05, "loss": 0.6613, "step": 63515 }, { "epoch": 0.7, "learning_rate": 3.8277674432338264e-05, "loss": 0.7205, "step": 63520 }, { "epoch": 0.7, "learning_rate": 3.827675170519975e-05, "loss": 0.7323, "step": 63525 }, { "epoch": 0.7, "learning_rate": 3.827582897806124e-05, "loss": 0.7361, "step": 63530 }, { "epoch": 0.7, "learning_rate": 3.827490625092273e-05, "loss": 0.6982, "step": 63535 }, { "epoch": 0.7, "learning_rate": 3.8273983523784216e-05, "loss": 0.7307, "step": 63540 }, { "epoch": 0.7, "learning_rate": 3.8273060796645703e-05, "loss": 0.7009, "step": 63545 }, { "epoch": 0.7, "learning_rate": 3.827213806950719e-05, "loss": 0.7444, "step": 63550 }, { "epoch": 0.7, "learning_rate": 3.827121534236868e-05, "loss": 0.6855, "step": 63555 }, { "epoch": 0.7, "learning_rate": 3.827029261523017e-05, "loss": 0.732, "step": 63560 }, { "epoch": 0.7, "learning_rate": 3.8269369888091655e-05, "loss": 0.8071, "step": 63565 }, { "epoch": 0.7, "learning_rate": 3.826844716095314e-05, "loss": 0.716, "step": 63570 }, { "epoch": 0.7, "learning_rate": 3.826752443381463e-05, "loss": 0.7649, "step": 63575 }, { "epoch": 0.7, "learning_rate": 3.826660170667612e-05, "loss": 0.723, "step": 63580 }, { "epoch": 0.7, "learning_rate": 3.8265678979537606e-05, "loss": 0.7158, "step": 63585 }, { "epoch": 0.7, "learning_rate": 3.8264756252399094e-05, "loss": 0.7479, "step": 63590 }, { "epoch": 0.7, "learning_rate": 3.8263833525260575e-05, "loss": 0.7506, "step": 63595 }, { "epoch": 0.7, "learning_rate": 3.826291079812207e-05, "loss": 0.7772, "step": 63600 }, { "epoch": 0.7, "learning_rate": 3.826198807098356e-05, "loss": 0.7753, "step": 63605 }, { "epoch": 0.7, "learning_rate": 3.8261065343845045e-05, "loss": 0.6901, "step": 63610 }, { "epoch": 0.7, "learning_rate": 3.8260142616706527e-05, "loss": 0.7056, "step": 63615 }, { "epoch": 0.7, "learning_rate": 3.8259219889568014e-05, "loss": 0.7222, "step": 63620 }, { "epoch": 0.7, "learning_rate": 3.825829716242951e-05, "loss": 0.7272, "step": 63625 }, { "epoch": 0.7, "learning_rate": 3.8257374435291e-05, "loss": 0.69, "step": 63630 }, { "epoch": 0.7, "learning_rate": 3.825645170815248e-05, "loss": 0.7126, "step": 63635 }, { "epoch": 0.7, "learning_rate": 3.8255528981013966e-05, "loss": 0.7457, "step": 63640 }, { "epoch": 0.7, "learning_rate": 3.825460625387546e-05, "loss": 0.7191, "step": 63645 }, { "epoch": 0.7, "learning_rate": 3.825368352673694e-05, "loss": 0.7375, "step": 63650 }, { "epoch": 0.7, "learning_rate": 3.825276079959843e-05, "loss": 0.6875, "step": 63655 }, { "epoch": 0.7, "learning_rate": 3.825183807245992e-05, "loss": 0.7484, "step": 63660 }, { "epoch": 0.7, "learning_rate": 3.825091534532141e-05, "loss": 0.7327, "step": 63665 }, { "epoch": 0.7, "learning_rate": 3.824999261818289e-05, "loss": 0.7332, "step": 63670 }, { "epoch": 0.71, "learning_rate": 3.824906989104438e-05, "loss": 0.7484, "step": 63675 }, { "epoch": 0.71, "learning_rate": 3.824814716390587e-05, "loss": 0.7255, "step": 63680 }, { "epoch": 0.71, "learning_rate": 3.8247224436767356e-05, "loss": 0.7123, "step": 63685 }, { "epoch": 0.71, "learning_rate": 3.8246301709628844e-05, "loss": 0.7802, "step": 63690 }, { "epoch": 0.71, "learning_rate": 3.824537898249033e-05, "loss": 0.7128, "step": 63695 }, { "epoch": 0.71, "learning_rate": 3.824445625535182e-05, "loss": 0.7738, "step": 63700 }, { "epoch": 0.71, "learning_rate": 3.824353352821331e-05, "loss": 0.7559, "step": 63705 }, { "epoch": 0.71, "learning_rate": 3.8242610801074796e-05, "loss": 0.7483, "step": 63710 }, { "epoch": 0.71, "learning_rate": 3.8241688073936283e-05, "loss": 0.6959, "step": 63715 }, { "epoch": 0.71, "learning_rate": 3.824076534679777e-05, "loss": 0.7493, "step": 63720 }, { "epoch": 0.71, "learning_rate": 3.823984261965925e-05, "loss": 0.6971, "step": 63725 }, { "epoch": 0.71, "learning_rate": 3.823891989252075e-05, "loss": 0.6885, "step": 63730 }, { "epoch": 0.71, "learning_rate": 3.8237997165382235e-05, "loss": 0.6952, "step": 63735 }, { "epoch": 0.71, "learning_rate": 3.823707443824372e-05, "loss": 0.7148, "step": 63740 }, { "epoch": 0.71, "learning_rate": 3.8236151711105204e-05, "loss": 0.7416, "step": 63745 }, { "epoch": 0.71, "learning_rate": 3.82352289839667e-05, "loss": 0.7113, "step": 63750 }, { "epoch": 0.71, "learning_rate": 3.8234306256828186e-05, "loss": 0.7282, "step": 63755 }, { "epoch": 0.71, "learning_rate": 3.823338352968967e-05, "loss": 0.7602, "step": 63760 }, { "epoch": 0.71, "learning_rate": 3.8232460802551155e-05, "loss": 0.6905, "step": 63765 }, { "epoch": 0.71, "learning_rate": 3.823153807541264e-05, "loss": 0.7458, "step": 63770 }, { "epoch": 0.71, "learning_rate": 3.823061534827414e-05, "loss": 0.7259, "step": 63775 }, { "epoch": 0.71, "learning_rate": 3.822969262113562e-05, "loss": 0.7321, "step": 63780 }, { "epoch": 0.71, "learning_rate": 3.8228769893997107e-05, "loss": 0.7428, "step": 63785 }, { "epoch": 0.71, "learning_rate": 3.8227847166858594e-05, "loss": 0.6954, "step": 63790 }, { "epoch": 0.71, "learning_rate": 3.822692443972008e-05, "loss": 0.7736, "step": 63795 }, { "epoch": 0.71, "learning_rate": 3.822600171258157e-05, "loss": 0.6927, "step": 63800 }, { "epoch": 0.71, "learning_rate": 3.822507898544306e-05, "loss": 0.7161, "step": 63805 }, { "epoch": 0.71, "learning_rate": 3.8224156258304546e-05, "loss": 0.7471, "step": 63810 }, { "epoch": 0.71, "learning_rate": 3.8223233531166034e-05, "loss": 0.6692, "step": 63815 }, { "epoch": 0.71, "learning_rate": 3.822231080402752e-05, "loss": 0.6827, "step": 63820 }, { "epoch": 0.71, "learning_rate": 3.822138807688901e-05, "loss": 0.7683, "step": 63825 }, { "epoch": 0.71, "learning_rate": 3.82204653497505e-05, "loss": 0.7347, "step": 63830 }, { "epoch": 0.71, "learning_rate": 3.821954262261198e-05, "loss": 0.7295, "step": 63835 }, { "epoch": 0.71, "learning_rate": 3.821861989547347e-05, "loss": 0.7354, "step": 63840 }, { "epoch": 0.71, "learning_rate": 3.821769716833496e-05, "loss": 0.7336, "step": 63845 }, { "epoch": 0.71, "learning_rate": 3.821677444119645e-05, "loss": 0.688, "step": 63850 }, { "epoch": 0.71, "learning_rate": 3.821585171405793e-05, "loss": 0.7528, "step": 63855 }, { "epoch": 0.71, "learning_rate": 3.8214928986919424e-05, "loss": 0.7077, "step": 63860 }, { "epoch": 0.71, "learning_rate": 3.821400625978091e-05, "loss": 0.7363, "step": 63865 }, { "epoch": 0.71, "learning_rate": 3.821308353264239e-05, "loss": 0.6887, "step": 63870 }, { "epoch": 0.71, "learning_rate": 3.821216080550388e-05, "loss": 0.6986, "step": 63875 }, { "epoch": 0.71, "learning_rate": 3.8211238078365376e-05, "loss": 0.7206, "step": 63880 }, { "epoch": 0.71, "learning_rate": 3.8210315351226864e-05, "loss": 0.7612, "step": 63885 }, { "epoch": 0.71, "learning_rate": 3.8209392624088345e-05, "loss": 0.7529, "step": 63890 }, { "epoch": 0.71, "learning_rate": 3.820846989694983e-05, "loss": 0.7013, "step": 63895 }, { "epoch": 0.71, "learning_rate": 3.820754716981133e-05, "loss": 0.7007, "step": 63900 }, { "epoch": 0.71, "learning_rate": 3.820662444267281e-05, "loss": 0.6841, "step": 63905 }, { "epoch": 0.71, "learning_rate": 3.8205701715534296e-05, "loss": 0.7333, "step": 63910 }, { "epoch": 0.71, "learning_rate": 3.8204778988395784e-05, "loss": 0.7128, "step": 63915 }, { "epoch": 0.71, "learning_rate": 3.820385626125727e-05, "loss": 0.743, "step": 63920 }, { "epoch": 0.71, "learning_rate": 3.820293353411876e-05, "loss": 0.717, "step": 63925 }, { "epoch": 0.71, "learning_rate": 3.820201080698025e-05, "loss": 0.7111, "step": 63930 }, { "epoch": 0.71, "learning_rate": 3.8201088079841735e-05, "loss": 0.7691, "step": 63935 }, { "epoch": 0.71, "learning_rate": 3.820016535270322e-05, "loss": 0.7042, "step": 63940 }, { "epoch": 0.71, "learning_rate": 3.819924262556471e-05, "loss": 0.7108, "step": 63945 }, { "epoch": 0.71, "learning_rate": 3.81983198984262e-05, "loss": 0.6938, "step": 63950 }, { "epoch": 0.71, "learning_rate": 3.819739717128769e-05, "loss": 0.7485, "step": 63955 }, { "epoch": 0.71, "learning_rate": 3.8196474444149175e-05, "loss": 0.7248, "step": 63960 }, { "epoch": 0.71, "learning_rate": 3.819555171701066e-05, "loss": 0.7931, "step": 63965 }, { "epoch": 0.71, "learning_rate": 3.819462898987215e-05, "loss": 0.7785, "step": 63970 }, { "epoch": 0.71, "learning_rate": 3.819370626273364e-05, "loss": 0.7681, "step": 63975 }, { "epoch": 0.71, "learning_rate": 3.819278353559512e-05, "loss": 0.7714, "step": 63980 }, { "epoch": 0.71, "learning_rate": 3.819186080845661e-05, "loss": 0.6809, "step": 63985 }, { "epoch": 0.71, "learning_rate": 3.81909380813181e-05, "loss": 0.7128, "step": 63990 }, { "epoch": 0.71, "learning_rate": 3.819001535417959e-05, "loss": 0.7343, "step": 63995 }, { "epoch": 0.71, "learning_rate": 3.818909262704107e-05, "loss": 0.7309, "step": 64000 }, { "epoch": 0.71, "eval_loss": 0.71632981300354, "eval_runtime": 69.7162, "eval_samples_per_second": 28.688, "eval_steps_per_second": 14.344, "step": 64000 }, { "epoch": 0.71, "learning_rate": 3.818816989990256e-05, "loss": 0.7756, "step": 64005 }, { "epoch": 0.71, "learning_rate": 3.818724717276405e-05, "loss": 0.6963, "step": 64010 }, { "epoch": 0.71, "learning_rate": 3.818632444562554e-05, "loss": 0.7483, "step": 64015 }, { "epoch": 0.71, "learning_rate": 3.818540171848702e-05, "loss": 0.7766, "step": 64020 }, { "epoch": 0.71, "learning_rate": 3.818447899134851e-05, "loss": 0.7379, "step": 64025 }, { "epoch": 0.71, "learning_rate": 3.8183556264210004e-05, "loss": 0.6793, "step": 64030 }, { "epoch": 0.71, "learning_rate": 3.8182633537071485e-05, "loss": 0.7204, "step": 64035 }, { "epoch": 0.71, "learning_rate": 3.818171080993297e-05, "loss": 0.7137, "step": 64040 }, { "epoch": 0.71, "learning_rate": 3.818078808279446e-05, "loss": 0.7657, "step": 64045 }, { "epoch": 0.71, "learning_rate": 3.8179865355655956e-05, "loss": 0.7299, "step": 64050 }, { "epoch": 0.71, "learning_rate": 3.817894262851744e-05, "loss": 0.7507, "step": 64055 }, { "epoch": 0.71, "learning_rate": 3.8178019901378925e-05, "loss": 0.7454, "step": 64060 }, { "epoch": 0.71, "learning_rate": 3.817709717424041e-05, "loss": 0.6568, "step": 64065 }, { "epoch": 0.71, "learning_rate": 3.81761744471019e-05, "loss": 0.7151, "step": 64070 }, { "epoch": 0.71, "learning_rate": 3.817525171996339e-05, "loss": 0.719, "step": 64075 }, { "epoch": 0.71, "learning_rate": 3.8174328992824876e-05, "loss": 0.7249, "step": 64080 }, { "epoch": 0.71, "learning_rate": 3.8173406265686364e-05, "loss": 0.7203, "step": 64085 }, { "epoch": 0.71, "learning_rate": 3.817248353854785e-05, "loss": 0.7471, "step": 64090 }, { "epoch": 0.71, "learning_rate": 3.817156081140934e-05, "loss": 0.7643, "step": 64095 }, { "epoch": 0.71, "learning_rate": 3.817063808427083e-05, "loss": 0.7163, "step": 64100 }, { "epoch": 0.71, "learning_rate": 3.8169715357132315e-05, "loss": 0.6787, "step": 64105 }, { "epoch": 0.71, "learning_rate": 3.8168792629993796e-05, "loss": 0.7339, "step": 64110 }, { "epoch": 0.71, "learning_rate": 3.816786990285529e-05, "loss": 0.6675, "step": 64115 }, { "epoch": 0.71, "learning_rate": 3.816694717571678e-05, "loss": 0.7657, "step": 64120 }, { "epoch": 0.71, "learning_rate": 3.816602444857827e-05, "loss": 0.692, "step": 64125 }, { "epoch": 0.71, "learning_rate": 3.816510172143975e-05, "loss": 0.7044, "step": 64130 }, { "epoch": 0.71, "learning_rate": 3.816417899430124e-05, "loss": 0.7186, "step": 64135 }, { "epoch": 0.71, "learning_rate": 3.816325626716273e-05, "loss": 0.6826, "step": 64140 }, { "epoch": 0.71, "learning_rate": 3.816233354002421e-05, "loss": 0.738, "step": 64145 }, { "epoch": 0.71, "learning_rate": 3.81614108128857e-05, "loss": 0.7166, "step": 64150 }, { "epoch": 0.71, "learning_rate": 3.816048808574719e-05, "loss": 0.7259, "step": 64155 }, { "epoch": 0.71, "learning_rate": 3.815956535860868e-05, "loss": 0.758, "step": 64160 }, { "epoch": 0.71, "learning_rate": 3.815864263147016e-05, "loss": 0.6444, "step": 64165 }, { "epoch": 0.71, "learning_rate": 3.815771990433165e-05, "loss": 0.7118, "step": 64170 }, { "epoch": 0.71, "learning_rate": 3.815679717719314e-05, "loss": 0.7111, "step": 64175 }, { "epoch": 0.71, "learning_rate": 3.8155874450054626e-05, "loss": 0.7323, "step": 64180 }, { "epoch": 0.71, "learning_rate": 3.8154951722916114e-05, "loss": 0.7696, "step": 64185 }, { "epoch": 0.71, "learning_rate": 3.81540289957776e-05, "loss": 0.71, "step": 64190 }, { "epoch": 0.71, "learning_rate": 3.815310626863909e-05, "loss": 0.7359, "step": 64195 }, { "epoch": 0.71, "learning_rate": 3.815218354150058e-05, "loss": 0.7223, "step": 64200 }, { "epoch": 0.71, "learning_rate": 3.8151260814362066e-05, "loss": 0.7549, "step": 64205 }, { "epoch": 0.71, "learning_rate": 3.8150338087223553e-05, "loss": 0.7327, "step": 64210 }, { "epoch": 0.71, "learning_rate": 3.814941536008504e-05, "loss": 0.7643, "step": 64215 }, { "epoch": 0.71, "learning_rate": 3.814849263294652e-05, "loss": 0.7915, "step": 64220 }, { "epoch": 0.71, "learning_rate": 3.814756990580802e-05, "loss": 0.7668, "step": 64225 }, { "epoch": 0.71, "learning_rate": 3.8146647178669505e-05, "loss": 0.6642, "step": 64230 }, { "epoch": 0.71, "learning_rate": 3.814572445153099e-05, "loss": 0.7538, "step": 64235 }, { "epoch": 0.71, "learning_rate": 3.8144801724392474e-05, "loss": 0.7187, "step": 64240 }, { "epoch": 0.71, "learning_rate": 3.814387899725397e-05, "loss": 0.6791, "step": 64245 }, { "epoch": 0.71, "learning_rate": 3.8142956270115456e-05, "loss": 0.7519, "step": 64250 }, { "epoch": 0.71, "learning_rate": 3.814203354297694e-05, "loss": 0.6986, "step": 64255 }, { "epoch": 0.71, "learning_rate": 3.8141110815838425e-05, "loss": 0.7891, "step": 64260 }, { "epoch": 0.71, "learning_rate": 3.814018808869992e-05, "loss": 0.6846, "step": 64265 }, { "epoch": 0.71, "learning_rate": 3.813926536156141e-05, "loss": 0.7749, "step": 64270 }, { "epoch": 0.71, "learning_rate": 3.813834263442289e-05, "loss": 0.6646, "step": 64275 }, { "epoch": 0.71, "learning_rate": 3.8137419907284377e-05, "loss": 0.7705, "step": 64280 }, { "epoch": 0.71, "learning_rate": 3.813649718014587e-05, "loss": 0.6296, "step": 64285 }, { "epoch": 0.71, "learning_rate": 3.813557445300735e-05, "loss": 0.7164, "step": 64290 }, { "epoch": 0.71, "learning_rate": 3.813465172586884e-05, "loss": 0.7801, "step": 64295 }, { "epoch": 0.71, "learning_rate": 3.813372899873033e-05, "loss": 0.7124, "step": 64300 }, { "epoch": 0.71, "learning_rate": 3.8132806271591816e-05, "loss": 0.7265, "step": 64305 }, { "epoch": 0.71, "learning_rate": 3.8131883544453304e-05, "loss": 0.728, "step": 64310 }, { "epoch": 0.71, "learning_rate": 3.813096081731479e-05, "loss": 0.7259, "step": 64315 }, { "epoch": 0.71, "learning_rate": 3.813003809017628e-05, "loss": 0.7455, "step": 64320 }, { "epoch": 0.71, "learning_rate": 3.812911536303777e-05, "loss": 0.7422, "step": 64325 }, { "epoch": 0.71, "learning_rate": 3.8128192635899255e-05, "loss": 0.6693, "step": 64330 }, { "epoch": 0.71, "learning_rate": 3.812726990876074e-05, "loss": 0.7125, "step": 64335 }, { "epoch": 0.71, "learning_rate": 3.812634718162223e-05, "loss": 0.7464, "step": 64340 }, { "epoch": 0.71, "learning_rate": 3.812542445448372e-05, "loss": 0.7234, "step": 64345 }, { "epoch": 0.71, "learning_rate": 3.8124501727345206e-05, "loss": 0.7402, "step": 64350 }, { "epoch": 0.71, "learning_rate": 3.8123579000206694e-05, "loss": 0.677, "step": 64355 }, { "epoch": 0.71, "learning_rate": 3.812265627306818e-05, "loss": 0.7141, "step": 64360 }, { "epoch": 0.71, "learning_rate": 3.812173354592966e-05, "loss": 0.7089, "step": 64365 }, { "epoch": 0.71, "learning_rate": 3.812081081879115e-05, "loss": 0.7084, "step": 64370 }, { "epoch": 0.71, "learning_rate": 3.8119888091652646e-05, "loss": 0.7085, "step": 64375 }, { "epoch": 0.71, "learning_rate": 3.8118965364514133e-05, "loss": 0.7738, "step": 64380 }, { "epoch": 0.71, "learning_rate": 3.8118042637375615e-05, "loss": 0.7128, "step": 64385 }, { "epoch": 0.71, "learning_rate": 3.81171199102371e-05, "loss": 0.7152, "step": 64390 }, { "epoch": 0.71, "learning_rate": 3.81161971830986e-05, "loss": 0.6481, "step": 64395 }, { "epoch": 0.71, "learning_rate": 3.8115274455960085e-05, "loss": 0.7277, "step": 64400 }, { "epoch": 0.71, "learning_rate": 3.8114351728821566e-05, "loss": 0.7288, "step": 64405 }, { "epoch": 0.71, "learning_rate": 3.8113429001683054e-05, "loss": 0.7539, "step": 64410 }, { "epoch": 0.71, "learning_rate": 3.811250627454455e-05, "loss": 0.752, "step": 64415 }, { "epoch": 0.71, "learning_rate": 3.811158354740603e-05, "loss": 0.7723, "step": 64420 }, { "epoch": 0.71, "learning_rate": 3.811066082026752e-05, "loss": 0.6725, "step": 64425 }, { "epoch": 0.71, "learning_rate": 3.8109738093129005e-05, "loss": 0.7112, "step": 64430 }, { "epoch": 0.71, "learning_rate": 3.81088153659905e-05, "loss": 0.6596, "step": 64435 }, { "epoch": 0.71, "learning_rate": 3.810789263885198e-05, "loss": 0.6617, "step": 64440 }, { "epoch": 0.71, "learning_rate": 3.810696991171347e-05, "loss": 0.7312, "step": 64445 }, { "epoch": 0.71, "learning_rate": 3.8106047184574957e-05, "loss": 0.7612, "step": 64450 }, { "epoch": 0.71, "learning_rate": 3.8105124457436444e-05, "loss": 0.715, "step": 64455 }, { "epoch": 0.71, "learning_rate": 3.810420173029793e-05, "loss": 0.7134, "step": 64460 }, { "epoch": 0.71, "learning_rate": 3.810327900315942e-05, "loss": 0.694, "step": 64465 }, { "epoch": 0.71, "learning_rate": 3.810235627602091e-05, "loss": 0.6983, "step": 64470 }, { "epoch": 0.71, "learning_rate": 3.8101433548882396e-05, "loss": 0.7122, "step": 64475 }, { "epoch": 0.71, "learning_rate": 3.8100510821743884e-05, "loss": 0.7152, "step": 64480 }, { "epoch": 0.71, "learning_rate": 3.809958809460537e-05, "loss": 0.6711, "step": 64485 }, { "epoch": 0.71, "learning_rate": 3.809866536746686e-05, "loss": 0.7262, "step": 64490 }, { "epoch": 0.71, "learning_rate": 3.809774264032834e-05, "loss": 0.6949, "step": 64495 }, { "epoch": 0.71, "learning_rate": 3.8096819913189835e-05, "loss": 0.6484, "step": 64500 }, { "epoch": 0.71, "learning_rate": 3.809589718605132e-05, "loss": 0.7631, "step": 64505 }, { "epoch": 0.71, "learning_rate": 3.809497445891281e-05, "loss": 0.7161, "step": 64510 }, { "epoch": 0.71, "learning_rate": 3.809405173177429e-05, "loss": 0.7079, "step": 64515 }, { "epoch": 0.71, "learning_rate": 3.809312900463578e-05, "loss": 0.7261, "step": 64520 }, { "epoch": 0.71, "learning_rate": 3.8092206277497274e-05, "loss": 0.7289, "step": 64525 }, { "epoch": 0.71, "learning_rate": 3.8091283550358755e-05, "loss": 0.7392, "step": 64530 }, { "epoch": 0.71, "learning_rate": 3.809036082322024e-05, "loss": 0.6756, "step": 64535 }, { "epoch": 0.71, "learning_rate": 3.808943809608173e-05, "loss": 0.7123, "step": 64540 }, { "epoch": 0.71, "learning_rate": 3.8088515368943226e-05, "loss": 0.7341, "step": 64545 }, { "epoch": 0.71, "learning_rate": 3.808759264180471e-05, "loss": 0.7508, "step": 64550 }, { "epoch": 0.71, "learning_rate": 3.8086669914666195e-05, "loss": 0.6948, "step": 64555 }, { "epoch": 0.71, "learning_rate": 3.808574718752768e-05, "loss": 0.7167, "step": 64560 }, { "epoch": 0.71, "learning_rate": 3.808482446038917e-05, "loss": 0.7037, "step": 64565 }, { "epoch": 0.71, "learning_rate": 3.808390173325066e-05, "loss": 0.7385, "step": 64570 }, { "epoch": 0.72, "learning_rate": 3.8082979006112146e-05, "loss": 0.6835, "step": 64575 }, { "epoch": 0.72, "learning_rate": 3.8082056278973634e-05, "loss": 0.6942, "step": 64580 }, { "epoch": 0.72, "learning_rate": 3.808113355183512e-05, "loss": 0.7335, "step": 64585 }, { "epoch": 0.72, "learning_rate": 3.808021082469661e-05, "loss": 0.7274, "step": 64590 }, { "epoch": 0.72, "learning_rate": 3.80792880975581e-05, "loss": 0.687, "step": 64595 }, { "epoch": 0.72, "learning_rate": 3.8078365370419585e-05, "loss": 0.7035, "step": 64600 }, { "epoch": 0.72, "learning_rate": 3.8077442643281066e-05, "loss": 0.6391, "step": 64605 }, { "epoch": 0.72, "learning_rate": 3.807651991614256e-05, "loss": 0.7339, "step": 64610 }, { "epoch": 0.72, "learning_rate": 3.807559718900405e-05, "loss": 0.708, "step": 64615 }, { "epoch": 0.72, "learning_rate": 3.807467446186554e-05, "loss": 0.7701, "step": 64620 }, { "epoch": 0.72, "learning_rate": 3.807375173472702e-05, "loss": 0.7183, "step": 64625 }, { "epoch": 0.72, "learning_rate": 3.807282900758851e-05, "loss": 0.7222, "step": 64630 }, { "epoch": 0.72, "learning_rate": 3.807190628045e-05, "loss": 0.6742, "step": 64635 }, { "epoch": 0.72, "learning_rate": 3.807098355331148e-05, "loss": 0.6968, "step": 64640 }, { "epoch": 0.72, "learning_rate": 3.807006082617297e-05, "loss": 0.752, "step": 64645 }, { "epoch": 0.72, "learning_rate": 3.8069138099034464e-05, "loss": 0.7256, "step": 64650 }, { "epoch": 0.72, "learning_rate": 3.806821537189595e-05, "loss": 0.695, "step": 64655 }, { "epoch": 0.72, "learning_rate": 3.806729264475743e-05, "loss": 0.7274, "step": 64660 }, { "epoch": 0.72, "learning_rate": 3.806636991761892e-05, "loss": 0.7226, "step": 64665 }, { "epoch": 0.72, "learning_rate": 3.806544719048041e-05, "loss": 0.6987, "step": 64670 }, { "epoch": 0.72, "learning_rate": 3.8064524463341896e-05, "loss": 0.7062, "step": 64675 }, { "epoch": 0.72, "learning_rate": 3.8063601736203384e-05, "loss": 0.7168, "step": 64680 }, { "epoch": 0.72, "learning_rate": 3.806267900906487e-05, "loss": 0.7432, "step": 64685 }, { "epoch": 0.72, "learning_rate": 3.806175628192636e-05, "loss": 0.713, "step": 64690 }, { "epoch": 0.72, "learning_rate": 3.806083355478785e-05, "loss": 0.7482, "step": 64695 }, { "epoch": 0.72, "learning_rate": 3.8059910827649335e-05, "loss": 0.7437, "step": 64700 }, { "epoch": 0.72, "learning_rate": 3.805898810051082e-05, "loss": 0.7153, "step": 64705 }, { "epoch": 0.72, "learning_rate": 3.805806537337231e-05, "loss": 0.7466, "step": 64710 }, { "epoch": 0.72, "learning_rate": 3.80571426462338e-05, "loss": 0.7455, "step": 64715 }, { "epoch": 0.72, "learning_rate": 3.805621991909529e-05, "loss": 0.7168, "step": 64720 }, { "epoch": 0.72, "learning_rate": 3.8055297191956775e-05, "loss": 0.7599, "step": 64725 }, { "epoch": 0.72, "learning_rate": 3.805437446481826e-05, "loss": 0.7389, "step": 64730 }, { "epoch": 0.72, "learning_rate": 3.805345173767975e-05, "loss": 0.7998, "step": 64735 }, { "epoch": 0.72, "learning_rate": 3.805252901054124e-05, "loss": 0.7132, "step": 64740 }, { "epoch": 0.72, "learning_rate": 3.8051606283402726e-05, "loss": 0.7634, "step": 64745 }, { "epoch": 0.72, "learning_rate": 3.805068355626421e-05, "loss": 0.7386, "step": 64750 }, { "epoch": 0.72, "learning_rate": 3.8049760829125695e-05, "loss": 0.7583, "step": 64755 }, { "epoch": 0.72, "learning_rate": 3.804883810198719e-05, "loss": 0.7483, "step": 64760 }, { "epoch": 0.72, "learning_rate": 3.804791537484868e-05, "loss": 0.7112, "step": 64765 }, { "epoch": 0.72, "learning_rate": 3.804699264771016e-05, "loss": 0.6977, "step": 64770 }, { "epoch": 0.72, "learning_rate": 3.8046069920571646e-05, "loss": 0.751, "step": 64775 }, { "epoch": 0.72, "learning_rate": 3.804514719343314e-05, "loss": 0.7515, "step": 64780 }, { "epoch": 0.72, "learning_rate": 3.804422446629463e-05, "loss": 0.6939, "step": 64785 }, { "epoch": 0.72, "learning_rate": 3.804330173915611e-05, "loss": 0.7285, "step": 64790 }, { "epoch": 0.72, "learning_rate": 3.80423790120176e-05, "loss": 0.7574, "step": 64795 }, { "epoch": 0.72, "learning_rate": 3.804145628487909e-05, "loss": 0.7174, "step": 64800 }, { "epoch": 0.72, "learning_rate": 3.8040533557740574e-05, "loss": 0.7039, "step": 64805 }, { "epoch": 0.72, "learning_rate": 3.803961083060206e-05, "loss": 0.6865, "step": 64810 }, { "epoch": 0.72, "learning_rate": 3.803868810346355e-05, "loss": 0.6838, "step": 64815 }, { "epoch": 0.72, "learning_rate": 3.803776537632504e-05, "loss": 0.676, "step": 64820 }, { "epoch": 0.72, "learning_rate": 3.8036842649186525e-05, "loss": 0.7411, "step": 64825 }, { "epoch": 0.72, "learning_rate": 3.803591992204801e-05, "loss": 0.7758, "step": 64830 }, { "epoch": 0.72, "learning_rate": 3.80349971949095e-05, "loss": 0.7403, "step": 64835 }, { "epoch": 0.72, "learning_rate": 3.803407446777099e-05, "loss": 0.6845, "step": 64840 }, { "epoch": 0.72, "learning_rate": 3.8033151740632476e-05, "loss": 0.6917, "step": 64845 }, { "epoch": 0.72, "learning_rate": 3.8032229013493964e-05, "loss": 0.7242, "step": 64850 }, { "epoch": 0.72, "learning_rate": 3.803130628635545e-05, "loss": 0.6883, "step": 64855 }, { "epoch": 0.72, "learning_rate": 3.803038355921694e-05, "loss": 0.7439, "step": 64860 }, { "epoch": 0.72, "learning_rate": 3.802946083207843e-05, "loss": 0.6711, "step": 64865 }, { "epoch": 0.72, "learning_rate": 3.8028538104939916e-05, "loss": 0.684, "step": 64870 }, { "epoch": 0.72, "learning_rate": 3.8027615377801403e-05, "loss": 0.653, "step": 64875 }, { "epoch": 0.72, "learning_rate": 3.8026692650662884e-05, "loss": 0.6713, "step": 64880 }, { "epoch": 0.72, "learning_rate": 3.802576992352438e-05, "loss": 0.7147, "step": 64885 }, { "epoch": 0.72, "learning_rate": 3.802484719638587e-05, "loss": 0.7131, "step": 64890 }, { "epoch": 0.72, "learning_rate": 3.8023924469247355e-05, "loss": 0.7191, "step": 64895 }, { "epoch": 0.72, "learning_rate": 3.8023001742108836e-05, "loss": 0.7175, "step": 64900 }, { "epoch": 0.72, "learning_rate": 3.8022079014970324e-05, "loss": 0.7293, "step": 64905 }, { "epoch": 0.72, "learning_rate": 3.802115628783182e-05, "loss": 0.7272, "step": 64910 }, { "epoch": 0.72, "learning_rate": 3.80202335606933e-05, "loss": 0.6935, "step": 64915 }, { "epoch": 0.72, "learning_rate": 3.801931083355479e-05, "loss": 0.7503, "step": 64920 }, { "epoch": 0.72, "learning_rate": 3.8018388106416275e-05, "loss": 0.6327, "step": 64925 }, { "epoch": 0.72, "learning_rate": 3.801746537927777e-05, "loss": 0.7407, "step": 64930 }, { "epoch": 0.72, "learning_rate": 3.801654265213925e-05, "loss": 0.6558, "step": 64935 }, { "epoch": 0.72, "learning_rate": 3.801561992500074e-05, "loss": 0.6661, "step": 64940 }, { "epoch": 0.72, "learning_rate": 3.8014697197862227e-05, "loss": 0.7131, "step": 64945 }, { "epoch": 0.72, "learning_rate": 3.8013774470723714e-05, "loss": 0.7614, "step": 64950 }, { "epoch": 0.72, "learning_rate": 3.80128517435852e-05, "loss": 0.7039, "step": 64955 }, { "epoch": 0.72, "learning_rate": 3.801192901644669e-05, "loss": 0.7016, "step": 64960 }, { "epoch": 0.72, "learning_rate": 3.801100628930818e-05, "loss": 0.7054, "step": 64965 }, { "epoch": 0.72, "learning_rate": 3.8010083562169666e-05, "loss": 0.6759, "step": 64970 }, { "epoch": 0.72, "learning_rate": 3.8009160835031154e-05, "loss": 0.7004, "step": 64975 }, { "epoch": 0.72, "learning_rate": 3.800823810789264e-05, "loss": 0.7123, "step": 64980 }, { "epoch": 0.72, "learning_rate": 3.800731538075413e-05, "loss": 0.7353, "step": 64985 }, { "epoch": 0.72, "learning_rate": 3.800639265361561e-05, "loss": 0.6986, "step": 64990 }, { "epoch": 0.72, "learning_rate": 3.8005469926477105e-05, "loss": 0.7161, "step": 64995 }, { "epoch": 0.72, "learning_rate": 3.800454719933859e-05, "loss": 0.6925, "step": 65000 }, { "epoch": 0.72, "eval_loss": 0.6793932914733887, "eval_runtime": 69.6196, "eval_samples_per_second": 28.728, "eval_steps_per_second": 14.364, "step": 65000 }, { "epoch": 0.72, "learning_rate": 3.800362447220008e-05, "loss": 0.6515, "step": 65005 }, { "epoch": 0.72, "learning_rate": 3.800270174506156e-05, "loss": 0.7682, "step": 65010 }, { "epoch": 0.72, "learning_rate": 3.8001779017923056e-05, "loss": 0.7108, "step": 65015 }, { "epoch": 0.72, "learning_rate": 3.8000856290784544e-05, "loss": 0.6933, "step": 65020 }, { "epoch": 0.72, "learning_rate": 3.7999933563646025e-05, "loss": 0.7313, "step": 65025 }, { "epoch": 0.72, "learning_rate": 3.799901083650751e-05, "loss": 0.7406, "step": 65030 }, { "epoch": 0.72, "learning_rate": 3.799808810936901e-05, "loss": 0.7586, "step": 65035 }, { "epoch": 0.72, "learning_rate": 3.7997165382230496e-05, "loss": 0.7436, "step": 65040 }, { "epoch": 0.72, "learning_rate": 3.799624265509198e-05, "loss": 0.6897, "step": 65045 }, { "epoch": 0.72, "learning_rate": 3.7995319927953465e-05, "loss": 0.7406, "step": 65050 }, { "epoch": 0.72, "learning_rate": 3.799439720081495e-05, "loss": 0.7613, "step": 65055 }, { "epoch": 0.72, "learning_rate": 3.799347447367644e-05, "loss": 0.6569, "step": 65060 }, { "epoch": 0.72, "learning_rate": 3.799255174653793e-05, "loss": 0.744, "step": 65065 }, { "epoch": 0.72, "learning_rate": 3.7991629019399416e-05, "loss": 0.7109, "step": 65070 }, { "epoch": 0.72, "learning_rate": 3.7990706292260904e-05, "loss": 0.7131, "step": 65075 }, { "epoch": 0.72, "learning_rate": 3.798978356512239e-05, "loss": 0.735, "step": 65080 }, { "epoch": 0.72, "learning_rate": 3.798886083798388e-05, "loss": 0.6861, "step": 65085 }, { "epoch": 0.72, "learning_rate": 3.798793811084537e-05, "loss": 0.7055, "step": 65090 }, { "epoch": 0.72, "learning_rate": 3.7987015383706855e-05, "loss": 0.6994, "step": 65095 }, { "epoch": 0.72, "learning_rate": 3.798609265656834e-05, "loss": 0.7428, "step": 65100 }, { "epoch": 0.72, "learning_rate": 3.798516992942983e-05, "loss": 0.7372, "step": 65105 }, { "epoch": 0.72, "learning_rate": 3.798424720229132e-05, "loss": 0.7765, "step": 65110 }, { "epoch": 0.72, "learning_rate": 3.7983324475152807e-05, "loss": 0.69, "step": 65115 }, { "epoch": 0.72, "learning_rate": 3.7982401748014294e-05, "loss": 0.6883, "step": 65120 }, { "epoch": 0.72, "learning_rate": 3.798147902087578e-05, "loss": 0.7202, "step": 65125 }, { "epoch": 0.72, "learning_rate": 3.798055629373727e-05, "loss": 0.79, "step": 65130 }, { "epoch": 0.72, "learning_rate": 3.797963356659876e-05, "loss": 0.7392, "step": 65135 }, { "epoch": 0.72, "learning_rate": 3.797871083946024e-05, "loss": 0.6957, "step": 65140 }, { "epoch": 0.72, "learning_rate": 3.7977788112321734e-05, "loss": 0.7787, "step": 65145 }, { "epoch": 0.72, "learning_rate": 3.797686538518322e-05, "loss": 0.6698, "step": 65150 }, { "epoch": 0.72, "learning_rate": 3.79759426580447e-05, "loss": 0.6911, "step": 65155 }, { "epoch": 0.72, "learning_rate": 3.797501993090619e-05, "loss": 0.7084, "step": 65160 }, { "epoch": 0.72, "learning_rate": 3.7974097203767685e-05, "loss": 0.7063, "step": 65165 }, { "epoch": 0.72, "learning_rate": 3.797317447662917e-05, "loss": 0.6621, "step": 65170 }, { "epoch": 0.72, "learning_rate": 3.7972251749490654e-05, "loss": 0.7509, "step": 65175 }, { "epoch": 0.72, "learning_rate": 3.797132902235214e-05, "loss": 0.7407, "step": 65180 }, { "epoch": 0.72, "learning_rate": 3.7970406295213636e-05, "loss": 0.7105, "step": 65185 }, { "epoch": 0.72, "learning_rate": 3.796948356807512e-05, "loss": 0.6686, "step": 65190 }, { "epoch": 0.72, "learning_rate": 3.7968560840936605e-05, "loss": 0.69, "step": 65195 }, { "epoch": 0.72, "learning_rate": 3.796763811379809e-05, "loss": 0.719, "step": 65200 }, { "epoch": 0.72, "learning_rate": 3.796671538665958e-05, "loss": 0.7218, "step": 65205 }, { "epoch": 0.72, "learning_rate": 3.796579265952107e-05, "loss": 0.7234, "step": 65210 }, { "epoch": 0.72, "learning_rate": 3.796486993238256e-05, "loss": 0.7039, "step": 65215 }, { "epoch": 0.72, "learning_rate": 3.7963947205244045e-05, "loss": 0.6598, "step": 65220 }, { "epoch": 0.72, "learning_rate": 3.796302447810553e-05, "loss": 0.727, "step": 65225 }, { "epoch": 0.72, "learning_rate": 3.796210175096702e-05, "loss": 0.7217, "step": 65230 }, { "epoch": 0.72, "learning_rate": 3.796117902382851e-05, "loss": 0.729, "step": 65235 }, { "epoch": 0.72, "learning_rate": 3.7960256296689996e-05, "loss": 0.7656, "step": 65240 }, { "epoch": 0.72, "learning_rate": 3.7959333569551484e-05, "loss": 0.7213, "step": 65245 }, { "epoch": 0.72, "learning_rate": 3.795841084241297e-05, "loss": 0.7527, "step": 65250 }, { "epoch": 0.72, "learning_rate": 3.795748811527446e-05, "loss": 0.6981, "step": 65255 }, { "epoch": 0.72, "learning_rate": 3.795656538813595e-05, "loss": 0.6879, "step": 65260 }, { "epoch": 0.72, "learning_rate": 3.795564266099743e-05, "loss": 0.656, "step": 65265 }, { "epoch": 0.72, "learning_rate": 3.795471993385892e-05, "loss": 0.6908, "step": 65270 }, { "epoch": 0.72, "learning_rate": 3.795379720672041e-05, "loss": 0.6825, "step": 65275 }, { "epoch": 0.72, "learning_rate": 3.79528744795819e-05, "loss": 0.7322, "step": 65280 }, { "epoch": 0.72, "learning_rate": 3.795195175244338e-05, "loss": 0.655, "step": 65285 }, { "epoch": 0.72, "learning_rate": 3.795102902530487e-05, "loss": 0.6828, "step": 65290 }, { "epoch": 0.72, "learning_rate": 3.795010629816636e-05, "loss": 0.6716, "step": 65295 }, { "epoch": 0.72, "learning_rate": 3.7949183571027843e-05, "loss": 0.7306, "step": 65300 }, { "epoch": 0.72, "learning_rate": 3.794826084388933e-05, "loss": 0.6994, "step": 65305 }, { "epoch": 0.72, "learning_rate": 3.794733811675082e-05, "loss": 0.7529, "step": 65310 }, { "epoch": 0.72, "learning_rate": 3.7946415389612314e-05, "loss": 0.7475, "step": 65315 }, { "epoch": 0.72, "learning_rate": 3.7945492662473795e-05, "loss": 0.6866, "step": 65320 }, { "epoch": 0.72, "learning_rate": 3.794456993533528e-05, "loss": 0.7013, "step": 65325 }, { "epoch": 0.72, "learning_rate": 3.794364720819677e-05, "loss": 0.7597, "step": 65330 }, { "epoch": 0.72, "learning_rate": 3.794272448105826e-05, "loss": 0.7125, "step": 65335 }, { "epoch": 0.72, "learning_rate": 3.7941801753919746e-05, "loss": 0.7116, "step": 65340 }, { "epoch": 0.72, "learning_rate": 3.7940879026781234e-05, "loss": 0.7261, "step": 65345 }, { "epoch": 0.72, "learning_rate": 3.793995629964272e-05, "loss": 0.7422, "step": 65350 }, { "epoch": 0.72, "learning_rate": 3.793903357250421e-05, "loss": 0.7713, "step": 65355 }, { "epoch": 0.72, "learning_rate": 3.79381108453657e-05, "loss": 0.6504, "step": 65360 }, { "epoch": 0.72, "learning_rate": 3.7937188118227185e-05, "loss": 0.6887, "step": 65365 }, { "epoch": 0.72, "learning_rate": 3.793626539108867e-05, "loss": 0.6821, "step": 65370 }, { "epoch": 0.72, "learning_rate": 3.7935342663950154e-05, "loss": 0.6899, "step": 65375 }, { "epoch": 0.72, "learning_rate": 3.793441993681165e-05, "loss": 0.6852, "step": 65380 }, { "epoch": 0.72, "learning_rate": 3.793349720967314e-05, "loss": 0.7603, "step": 65385 }, { "epoch": 0.72, "learning_rate": 3.7932574482534625e-05, "loss": 0.7327, "step": 65390 }, { "epoch": 0.72, "learning_rate": 3.7931651755396106e-05, "loss": 0.7104, "step": 65395 }, { "epoch": 0.72, "learning_rate": 3.79307290282576e-05, "loss": 0.7334, "step": 65400 }, { "epoch": 0.72, "learning_rate": 3.792980630111909e-05, "loss": 0.7047, "step": 65405 }, { "epoch": 0.72, "learning_rate": 3.792888357398057e-05, "loss": 0.7728, "step": 65410 }, { "epoch": 0.72, "learning_rate": 3.792796084684206e-05, "loss": 0.8028, "step": 65415 }, { "epoch": 0.72, "learning_rate": 3.792703811970355e-05, "loss": 0.7314, "step": 65420 }, { "epoch": 0.72, "learning_rate": 3.792611539256504e-05, "loss": 0.7266, "step": 65425 }, { "epoch": 0.72, "learning_rate": 3.792519266542652e-05, "loss": 0.7406, "step": 65430 }, { "epoch": 0.72, "learning_rate": 3.792426993828801e-05, "loss": 0.7376, "step": 65435 }, { "epoch": 0.72, "learning_rate": 3.7923347211149496e-05, "loss": 0.7047, "step": 65440 }, { "epoch": 0.72, "learning_rate": 3.792242448401099e-05, "loss": 0.7235, "step": 65445 }, { "epoch": 0.72, "learning_rate": 3.792150175687247e-05, "loss": 0.6538, "step": 65450 }, { "epoch": 0.72, "learning_rate": 3.792057902973396e-05, "loss": 0.6765, "step": 65455 }, { "epoch": 0.72, "learning_rate": 3.791965630259545e-05, "loss": 0.6794, "step": 65460 }, { "epoch": 0.72, "learning_rate": 3.7918733575456936e-05, "loss": 0.7458, "step": 65465 }, { "epoch": 0.72, "learning_rate": 3.7917810848318424e-05, "loss": 0.698, "step": 65470 }, { "epoch": 0.72, "learning_rate": 3.791688812117991e-05, "loss": 0.6788, "step": 65475 }, { "epoch": 0.73, "learning_rate": 3.79159653940414e-05, "loss": 0.7102, "step": 65480 }, { "epoch": 0.73, "learning_rate": 3.791504266690289e-05, "loss": 0.6991, "step": 65485 }, { "epoch": 0.73, "learning_rate": 3.7914119939764375e-05, "loss": 0.7577, "step": 65490 }, { "epoch": 0.73, "learning_rate": 3.791319721262586e-05, "loss": 0.7061, "step": 65495 }, { "epoch": 0.73, "learning_rate": 3.791227448548735e-05, "loss": 0.6972, "step": 65500 }, { "epoch": 0.73, "learning_rate": 3.791135175834883e-05, "loss": 0.7575, "step": 65505 }, { "epoch": 0.73, "learning_rate": 3.7910429031210326e-05, "loss": 0.7099, "step": 65510 }, { "epoch": 0.73, "learning_rate": 3.7909506304071814e-05, "loss": 0.7708, "step": 65515 }, { "epoch": 0.73, "learning_rate": 3.79085835769333e-05, "loss": 0.7278, "step": 65520 }, { "epoch": 0.73, "learning_rate": 3.790766084979478e-05, "loss": 0.7393, "step": 65525 }, { "epoch": 0.73, "learning_rate": 3.790673812265628e-05, "loss": 0.7319, "step": 65530 }, { "epoch": 0.73, "learning_rate": 3.7905815395517766e-05, "loss": 0.6803, "step": 65535 }, { "epoch": 0.73, "learning_rate": 3.790489266837925e-05, "loss": 0.7073, "step": 65540 }, { "epoch": 0.73, "learning_rate": 3.7903969941240734e-05, "loss": 0.7129, "step": 65545 }, { "epoch": 0.73, "learning_rate": 3.790304721410223e-05, "loss": 0.7289, "step": 65550 }, { "epoch": 0.73, "learning_rate": 3.790212448696372e-05, "loss": 0.7226, "step": 65555 }, { "epoch": 0.73, "learning_rate": 3.79012017598252e-05, "loss": 0.7641, "step": 65560 }, { "epoch": 0.73, "learning_rate": 3.7900279032686686e-05, "loss": 0.7379, "step": 65565 }, { "epoch": 0.73, "learning_rate": 3.789935630554818e-05, "loss": 0.6952, "step": 65570 }, { "epoch": 0.73, "learning_rate": 3.789843357840966e-05, "loss": 0.7193, "step": 65575 }, { "epoch": 0.73, "learning_rate": 3.789751085127115e-05, "loss": 0.7132, "step": 65580 }, { "epoch": 0.73, "learning_rate": 3.789658812413264e-05, "loss": 0.7109, "step": 65585 }, { "epoch": 0.73, "learning_rate": 3.7895665396994125e-05, "loss": 0.6982, "step": 65590 }, { "epoch": 0.73, "learning_rate": 3.789474266985561e-05, "loss": 0.6676, "step": 65595 }, { "epoch": 0.73, "learning_rate": 3.78938199427171e-05, "loss": 0.7457, "step": 65600 }, { "epoch": 0.73, "learning_rate": 3.789289721557859e-05, "loss": 0.6597, "step": 65605 }, { "epoch": 0.73, "learning_rate": 3.7891974488440077e-05, "loss": 0.6754, "step": 65610 }, { "epoch": 0.73, "learning_rate": 3.7891051761301564e-05, "loss": 0.6838, "step": 65615 }, { "epoch": 0.73, "learning_rate": 3.789012903416305e-05, "loss": 0.7389, "step": 65620 }, { "epoch": 0.73, "learning_rate": 3.788920630702454e-05, "loss": 0.7301, "step": 65625 }, { "epoch": 0.73, "learning_rate": 3.788828357988603e-05, "loss": 0.6977, "step": 65630 }, { "epoch": 0.73, "learning_rate": 3.7887360852747516e-05, "loss": 0.6765, "step": 65635 }, { "epoch": 0.73, "learning_rate": 3.7886438125609004e-05, "loss": 0.7148, "step": 65640 }, { "epoch": 0.73, "learning_rate": 3.788551539847049e-05, "loss": 0.7471, "step": 65645 }, { "epoch": 0.73, "learning_rate": 3.788459267133197e-05, "loss": 0.7027, "step": 65650 }, { "epoch": 0.73, "learning_rate": 3.788366994419346e-05, "loss": 0.6432, "step": 65655 }, { "epoch": 0.73, "learning_rate": 3.7882747217054955e-05, "loss": 0.6936, "step": 65660 }, { "epoch": 0.73, "learning_rate": 3.788182448991644e-05, "loss": 0.6458, "step": 65665 }, { "epoch": 0.73, "learning_rate": 3.7880901762777924e-05, "loss": 0.7198, "step": 65670 }, { "epoch": 0.73, "learning_rate": 3.787997903563941e-05, "loss": 0.7801, "step": 65675 }, { "epoch": 0.73, "learning_rate": 3.7879056308500906e-05, "loss": 0.7048, "step": 65680 }, { "epoch": 0.73, "learning_rate": 3.787813358136239e-05, "loss": 0.7451, "step": 65685 }, { "epoch": 0.73, "learning_rate": 3.7877210854223875e-05, "loss": 0.662, "step": 65690 }, { "epoch": 0.73, "learning_rate": 3.787628812708536e-05, "loss": 0.7353, "step": 65695 }, { "epoch": 0.73, "learning_rate": 3.787536539994686e-05, "loss": 0.7915, "step": 65700 }, { "epoch": 0.73, "learning_rate": 3.787444267280834e-05, "loss": 0.7363, "step": 65705 }, { "epoch": 0.73, "learning_rate": 3.787351994566983e-05, "loss": 0.6717, "step": 65710 }, { "epoch": 0.73, "learning_rate": 3.7872597218531315e-05, "loss": 0.7146, "step": 65715 }, { "epoch": 0.73, "learning_rate": 3.78716744913928e-05, "loss": 0.7447, "step": 65720 }, { "epoch": 0.73, "learning_rate": 3.787075176425429e-05, "loss": 0.7997, "step": 65725 }, { "epoch": 0.73, "learning_rate": 3.786982903711578e-05, "loss": 0.7143, "step": 65730 }, { "epoch": 0.73, "learning_rate": 3.7868906309977266e-05, "loss": 0.6937, "step": 65735 }, { "epoch": 0.73, "learning_rate": 3.7867983582838754e-05, "loss": 0.7571, "step": 65740 }, { "epoch": 0.73, "learning_rate": 3.786706085570024e-05, "loss": 0.6773, "step": 65745 }, { "epoch": 0.73, "learning_rate": 3.786613812856173e-05, "loss": 0.8049, "step": 65750 }, { "epoch": 0.73, "learning_rate": 3.786521540142322e-05, "loss": 0.7214, "step": 65755 }, { "epoch": 0.73, "learning_rate": 3.78642926742847e-05, "loss": 0.6747, "step": 65760 }, { "epoch": 0.73, "learning_rate": 3.786336994714619e-05, "loss": 0.7482, "step": 65765 }, { "epoch": 0.73, "learning_rate": 3.786244722000768e-05, "loss": 0.7249, "step": 65770 }, { "epoch": 0.73, "learning_rate": 3.786152449286917e-05, "loss": 0.6984, "step": 65775 }, { "epoch": 0.73, "learning_rate": 3.786060176573065e-05, "loss": 0.717, "step": 65780 }, { "epoch": 0.73, "learning_rate": 3.7859679038592144e-05, "loss": 0.6988, "step": 65785 }, { "epoch": 0.73, "learning_rate": 3.785875631145363e-05, "loss": 0.6831, "step": 65790 }, { "epoch": 0.73, "learning_rate": 3.785783358431511e-05, "loss": 0.7313, "step": 65795 }, { "epoch": 0.73, "learning_rate": 3.78569108571766e-05, "loss": 0.7625, "step": 65800 }, { "epoch": 0.73, "learning_rate": 3.7855988130038096e-05, "loss": 0.6981, "step": 65805 }, { "epoch": 0.73, "learning_rate": 3.7855065402899584e-05, "loss": 0.6862, "step": 65810 }, { "epoch": 0.73, "learning_rate": 3.7854142675761065e-05, "loss": 0.7009, "step": 65815 }, { "epoch": 0.73, "learning_rate": 3.785321994862255e-05, "loss": 0.7318, "step": 65820 }, { "epoch": 0.73, "learning_rate": 3.785229722148404e-05, "loss": 0.7385, "step": 65825 }, { "epoch": 0.73, "learning_rate": 3.7851374494345535e-05, "loss": 0.7377, "step": 65830 }, { "epoch": 0.73, "learning_rate": 3.7850451767207016e-05, "loss": 0.6672, "step": 65835 }, { "epoch": 0.73, "learning_rate": 3.7849529040068504e-05, "loss": 0.7294, "step": 65840 }, { "epoch": 0.73, "learning_rate": 3.784860631292999e-05, "loss": 0.7626, "step": 65845 }, { "epoch": 0.73, "learning_rate": 3.784768358579148e-05, "loss": 0.7521, "step": 65850 }, { "epoch": 0.73, "learning_rate": 3.784676085865297e-05, "loss": 0.807, "step": 65855 }, { "epoch": 0.73, "learning_rate": 3.7845838131514455e-05, "loss": 0.6524, "step": 65860 }, { "epoch": 0.73, "learning_rate": 3.784491540437594e-05, "loss": 0.6878, "step": 65865 }, { "epoch": 0.73, "learning_rate": 3.784399267723743e-05, "loss": 0.7099, "step": 65870 }, { "epoch": 0.73, "learning_rate": 3.784306995009892e-05, "loss": 0.7443, "step": 65875 }, { "epoch": 0.73, "learning_rate": 3.784214722296041e-05, "loss": 0.7227, "step": 65880 }, { "epoch": 0.73, "learning_rate": 3.7841224495821895e-05, "loss": 0.7298, "step": 65885 }, { "epoch": 0.73, "learning_rate": 3.7840301768683376e-05, "loss": 0.6431, "step": 65890 }, { "epoch": 0.73, "learning_rate": 3.783937904154487e-05, "loss": 0.6794, "step": 65895 }, { "epoch": 0.73, "learning_rate": 3.783845631440636e-05, "loss": 0.7996, "step": 65900 }, { "epoch": 0.73, "learning_rate": 3.7837533587267846e-05, "loss": 0.7602, "step": 65905 }, { "epoch": 0.73, "learning_rate": 3.783661086012933e-05, "loss": 0.7338, "step": 65910 }, { "epoch": 0.73, "learning_rate": 3.783568813299082e-05, "loss": 0.6846, "step": 65915 }, { "epoch": 0.73, "learning_rate": 3.783476540585231e-05, "loss": 0.7293, "step": 65920 }, { "epoch": 0.73, "learning_rate": 3.783384267871379e-05, "loss": 0.6707, "step": 65925 }, { "epoch": 0.73, "learning_rate": 3.783291995157528e-05, "loss": 0.6868, "step": 65930 }, { "epoch": 0.73, "learning_rate": 3.783199722443677e-05, "loss": 0.6963, "step": 65935 }, { "epoch": 0.73, "learning_rate": 3.783107449729826e-05, "loss": 0.7129, "step": 65940 }, { "epoch": 0.73, "learning_rate": 3.783015177015974e-05, "loss": 0.7032, "step": 65945 }, { "epoch": 0.73, "learning_rate": 3.782922904302123e-05, "loss": 0.643, "step": 65950 }, { "epoch": 0.73, "learning_rate": 3.7828306315882725e-05, "loss": 0.7225, "step": 65955 }, { "epoch": 0.73, "learning_rate": 3.7827383588744206e-05, "loss": 0.7625, "step": 65960 }, { "epoch": 0.73, "learning_rate": 3.7826460861605693e-05, "loss": 0.6752, "step": 65965 }, { "epoch": 0.73, "learning_rate": 3.782553813446718e-05, "loss": 0.7398, "step": 65970 }, { "epoch": 0.73, "learning_rate": 3.782461540732867e-05, "loss": 0.7256, "step": 65975 }, { "epoch": 0.73, "learning_rate": 3.782369268019016e-05, "loss": 0.7096, "step": 65980 }, { "epoch": 0.73, "learning_rate": 3.7822769953051645e-05, "loss": 0.7483, "step": 65985 }, { "epoch": 0.73, "learning_rate": 3.782184722591313e-05, "loss": 0.741, "step": 65990 }, { "epoch": 0.73, "learning_rate": 3.782092449877462e-05, "loss": 0.718, "step": 65995 }, { "epoch": 0.73, "learning_rate": 3.782000177163611e-05, "loss": 0.6833, "step": 66000 }, { "epoch": 0.73, "eval_loss": 0.6636908054351807, "eval_runtime": 69.4804, "eval_samples_per_second": 28.785, "eval_steps_per_second": 14.393, "step": 66000 }, { "epoch": 0.73, "learning_rate": 3.7819079044497596e-05, "loss": 0.6388, "step": 66005 }, { "epoch": 0.73, "learning_rate": 3.7818156317359084e-05, "loss": 0.6867, "step": 66010 }, { "epoch": 0.73, "learning_rate": 3.781723359022057e-05, "loss": 0.6734, "step": 66015 }, { "epoch": 0.73, "learning_rate": 3.781631086308206e-05, "loss": 0.7072, "step": 66020 }, { "epoch": 0.73, "learning_rate": 3.781538813594355e-05, "loss": 0.724, "step": 66025 }, { "epoch": 0.73, "learning_rate": 3.7814465408805035e-05, "loss": 0.6967, "step": 66030 }, { "epoch": 0.73, "learning_rate": 3.7813542681666517e-05, "loss": 0.7196, "step": 66035 }, { "epoch": 0.73, "learning_rate": 3.7812619954528004e-05, "loss": 0.7323, "step": 66040 }, { "epoch": 0.73, "learning_rate": 3.78116972273895e-05, "loss": 0.7128, "step": 66045 }, { "epoch": 0.73, "learning_rate": 3.781077450025099e-05, "loss": 0.7201, "step": 66050 }, { "epoch": 0.73, "learning_rate": 3.780985177311247e-05, "loss": 0.7452, "step": 66055 }, { "epoch": 0.73, "learning_rate": 3.7808929045973956e-05, "loss": 0.7362, "step": 66060 }, { "epoch": 0.73, "learning_rate": 3.780800631883545e-05, "loss": 0.7387, "step": 66065 }, { "epoch": 0.73, "learning_rate": 3.780708359169693e-05, "loss": 0.7029, "step": 66070 }, { "epoch": 0.73, "learning_rate": 3.780616086455842e-05, "loss": 0.7686, "step": 66075 }, { "epoch": 0.73, "learning_rate": 3.780523813741991e-05, "loss": 0.7768, "step": 66080 }, { "epoch": 0.73, "learning_rate": 3.78043154102814e-05, "loss": 0.6887, "step": 66085 }, { "epoch": 0.73, "learning_rate": 3.780339268314288e-05, "loss": 0.7522, "step": 66090 }, { "epoch": 0.73, "learning_rate": 3.780246995600437e-05, "loss": 0.7077, "step": 66095 }, { "epoch": 0.73, "learning_rate": 3.780154722886586e-05, "loss": 0.7863, "step": 66100 }, { "epoch": 0.73, "learning_rate": 3.7800624501727346e-05, "loss": 0.776, "step": 66105 }, { "epoch": 0.73, "learning_rate": 3.7799701774588834e-05, "loss": 0.7558, "step": 66110 }, { "epoch": 0.73, "learning_rate": 3.779877904745032e-05, "loss": 0.6717, "step": 66115 }, { "epoch": 0.73, "learning_rate": 3.779785632031181e-05, "loss": 0.68, "step": 66120 }, { "epoch": 0.73, "learning_rate": 3.77969335931733e-05, "loss": 0.7338, "step": 66125 }, { "epoch": 0.73, "learning_rate": 3.7796010866034786e-05, "loss": 0.7488, "step": 66130 }, { "epoch": 0.73, "learning_rate": 3.7795088138896274e-05, "loss": 0.7427, "step": 66135 }, { "epoch": 0.73, "learning_rate": 3.779416541175776e-05, "loss": 0.7642, "step": 66140 }, { "epoch": 0.73, "learning_rate": 3.779324268461924e-05, "loss": 0.6585, "step": 66145 }, { "epoch": 0.73, "learning_rate": 3.779231995748074e-05, "loss": 0.6916, "step": 66150 }, { "epoch": 0.73, "learning_rate": 3.7791397230342225e-05, "loss": 0.6828, "step": 66155 }, { "epoch": 0.73, "learning_rate": 3.779047450320371e-05, "loss": 0.6861, "step": 66160 }, { "epoch": 0.73, "learning_rate": 3.7789551776065194e-05, "loss": 0.7114, "step": 66165 }, { "epoch": 0.73, "learning_rate": 3.778862904892669e-05, "loss": 0.7133, "step": 66170 }, { "epoch": 0.73, "learning_rate": 3.7787706321788176e-05, "loss": 0.7593, "step": 66175 }, { "epoch": 0.73, "learning_rate": 3.778678359464966e-05, "loss": 0.693, "step": 66180 }, { "epoch": 0.73, "learning_rate": 3.7785860867511145e-05, "loss": 0.775, "step": 66185 }, { "epoch": 0.73, "learning_rate": 3.778493814037263e-05, "loss": 0.6687, "step": 66190 }, { "epoch": 0.73, "learning_rate": 3.778401541323413e-05, "loss": 0.7209, "step": 66195 }, { "epoch": 0.73, "learning_rate": 3.778309268609561e-05, "loss": 0.7859, "step": 66200 }, { "epoch": 0.73, "learning_rate": 3.77821699589571e-05, "loss": 0.7248, "step": 66205 }, { "epoch": 0.73, "learning_rate": 3.7781247231818584e-05, "loss": 0.6888, "step": 66210 }, { "epoch": 0.73, "learning_rate": 3.778032450468008e-05, "loss": 0.6771, "step": 66215 }, { "epoch": 0.73, "learning_rate": 3.777940177754156e-05, "loss": 0.7008, "step": 66220 }, { "epoch": 0.73, "learning_rate": 3.777847905040305e-05, "loss": 0.7318, "step": 66225 }, { "epoch": 0.73, "learning_rate": 3.7777556323264536e-05, "loss": 0.784, "step": 66230 }, { "epoch": 0.73, "learning_rate": 3.7776633596126024e-05, "loss": 0.7164, "step": 66235 }, { "epoch": 0.73, "learning_rate": 3.777571086898751e-05, "loss": 0.7872, "step": 66240 }, { "epoch": 0.73, "learning_rate": 3.7774788141849e-05, "loss": 0.7232, "step": 66245 }, { "epoch": 0.73, "learning_rate": 3.777386541471049e-05, "loss": 0.7367, "step": 66250 }, { "epoch": 0.73, "learning_rate": 3.7772942687571975e-05, "loss": 0.7324, "step": 66255 }, { "epoch": 0.73, "learning_rate": 3.777201996043346e-05, "loss": 0.7366, "step": 66260 }, { "epoch": 0.73, "learning_rate": 3.777109723329495e-05, "loss": 0.7251, "step": 66265 }, { "epoch": 0.73, "learning_rate": 3.777017450615644e-05, "loss": 0.7107, "step": 66270 }, { "epoch": 0.73, "learning_rate": 3.776925177901792e-05, "loss": 0.8058, "step": 66275 }, { "epoch": 0.73, "learning_rate": 3.7768329051879414e-05, "loss": 0.7852, "step": 66280 }, { "epoch": 0.73, "learning_rate": 3.77674063247409e-05, "loss": 0.7364, "step": 66285 }, { "epoch": 0.73, "learning_rate": 3.776648359760239e-05, "loss": 0.6962, "step": 66290 }, { "epoch": 0.73, "learning_rate": 3.776556087046387e-05, "loss": 0.7086, "step": 66295 }, { "epoch": 0.73, "learning_rate": 3.7764638143325366e-05, "loss": 0.6976, "step": 66300 }, { "epoch": 0.73, "learning_rate": 3.7763715416186854e-05, "loss": 0.6869, "step": 66305 }, { "epoch": 0.73, "learning_rate": 3.7762792689048335e-05, "loss": 0.7213, "step": 66310 }, { "epoch": 0.73, "learning_rate": 3.776186996190982e-05, "loss": 0.7152, "step": 66315 }, { "epoch": 0.73, "learning_rate": 3.776094723477132e-05, "loss": 0.6318, "step": 66320 }, { "epoch": 0.73, "learning_rate": 3.7760024507632805e-05, "loss": 0.7343, "step": 66325 }, { "epoch": 0.73, "learning_rate": 3.7759101780494286e-05, "loss": 0.7661, "step": 66330 }, { "epoch": 0.73, "learning_rate": 3.7758179053355774e-05, "loss": 0.7354, "step": 66335 }, { "epoch": 0.73, "learning_rate": 3.775725632621726e-05, "loss": 0.6768, "step": 66340 }, { "epoch": 0.73, "learning_rate": 3.775633359907875e-05, "loss": 0.7062, "step": 66345 }, { "epoch": 0.73, "learning_rate": 3.775541087194024e-05, "loss": 0.7159, "step": 66350 }, { "epoch": 0.73, "learning_rate": 3.7754488144801725e-05, "loss": 0.7076, "step": 66355 }, { "epoch": 0.73, "learning_rate": 3.775356541766321e-05, "loss": 0.6968, "step": 66360 }, { "epoch": 0.73, "learning_rate": 3.77526426905247e-05, "loss": 0.7876, "step": 66365 }, { "epoch": 0.73, "learning_rate": 3.775171996338619e-05, "loss": 0.798, "step": 66370 }, { "epoch": 0.73, "learning_rate": 3.775079723624768e-05, "loss": 0.7245, "step": 66375 }, { "epoch": 0.74, "learning_rate": 3.7749874509109165e-05, "loss": 0.7068, "step": 66380 }, { "epoch": 0.74, "learning_rate": 3.774895178197065e-05, "loss": 0.692, "step": 66385 }, { "epoch": 0.74, "learning_rate": 3.774802905483214e-05, "loss": 0.6364, "step": 66390 }, { "epoch": 0.74, "learning_rate": 3.774710632769363e-05, "loss": 0.6926, "step": 66395 }, { "epoch": 0.74, "learning_rate": 3.7746183600555116e-05, "loss": 0.6981, "step": 66400 }, { "epoch": 0.74, "learning_rate": 3.7745260873416604e-05, "loss": 0.7715, "step": 66405 }, { "epoch": 0.74, "learning_rate": 3.774433814627809e-05, "loss": 0.7371, "step": 66410 }, { "epoch": 0.74, "learning_rate": 3.774341541913958e-05, "loss": 0.7075, "step": 66415 }, { "epoch": 0.74, "learning_rate": 3.774249269200106e-05, "loss": 0.6846, "step": 66420 }, { "epoch": 0.74, "learning_rate": 3.774156996486255e-05, "loss": 0.7486, "step": 66425 }, { "epoch": 0.74, "learning_rate": 3.774064723772404e-05, "loss": 0.7269, "step": 66430 }, { "epoch": 0.74, "learning_rate": 3.773972451058553e-05, "loss": 0.7312, "step": 66435 }, { "epoch": 0.74, "learning_rate": 3.773880178344701e-05, "loss": 0.7253, "step": 66440 }, { "epoch": 0.74, "learning_rate": 3.77378790563085e-05, "loss": 0.7602, "step": 66445 }, { "epoch": 0.74, "learning_rate": 3.7736956329169994e-05, "loss": 0.753, "step": 66450 }, { "epoch": 0.74, "learning_rate": 3.7736033602031475e-05, "loss": 0.6908, "step": 66455 }, { "epoch": 0.74, "learning_rate": 3.773511087489296e-05, "loss": 0.75, "step": 66460 }, { "epoch": 0.74, "learning_rate": 3.773418814775445e-05, "loss": 0.7368, "step": 66465 }, { "epoch": 0.74, "learning_rate": 3.7733265420615946e-05, "loss": 0.6831, "step": 66470 }, { "epoch": 0.74, "learning_rate": 3.773234269347743e-05, "loss": 0.6699, "step": 66475 }, { "epoch": 0.74, "learning_rate": 3.7731419966338915e-05, "loss": 0.7295, "step": 66480 }, { "epoch": 0.74, "learning_rate": 3.77304972392004e-05, "loss": 0.7259, "step": 66485 }, { "epoch": 0.74, "learning_rate": 3.772957451206189e-05, "loss": 0.8298, "step": 66490 }, { "epoch": 0.74, "learning_rate": 3.772865178492338e-05, "loss": 0.6438, "step": 66495 }, { "epoch": 0.74, "learning_rate": 3.7727729057784866e-05, "loss": 0.7629, "step": 66500 }, { "epoch": 0.74, "learning_rate": 3.7726806330646354e-05, "loss": 0.7013, "step": 66505 }, { "epoch": 0.74, "learning_rate": 3.772588360350784e-05, "loss": 0.6671, "step": 66510 }, { "epoch": 0.74, "learning_rate": 3.772496087636933e-05, "loss": 0.7312, "step": 66515 }, { "epoch": 0.74, "learning_rate": 3.772403814923082e-05, "loss": 0.7346, "step": 66520 }, { "epoch": 0.74, "learning_rate": 3.7723115422092305e-05, "loss": 0.7662, "step": 66525 }, { "epoch": 0.74, "learning_rate": 3.7722192694953786e-05, "loss": 0.7878, "step": 66530 }, { "epoch": 0.74, "learning_rate": 3.772126996781528e-05, "loss": 0.7214, "step": 66535 }, { "epoch": 0.74, "learning_rate": 3.772034724067677e-05, "loss": 0.7555, "step": 66540 }, { "epoch": 0.74, "learning_rate": 3.771942451353826e-05, "loss": 0.7317, "step": 66545 }, { "epoch": 0.74, "learning_rate": 3.771850178639974e-05, "loss": 0.7649, "step": 66550 }, { "epoch": 0.74, "learning_rate": 3.771757905926123e-05, "loss": 0.7196, "step": 66555 }, { "epoch": 0.74, "learning_rate": 3.771665633212272e-05, "loss": 0.7386, "step": 66560 }, { "epoch": 0.74, "learning_rate": 3.77157336049842e-05, "loss": 0.6789, "step": 66565 }, { "epoch": 0.74, "learning_rate": 3.771481087784569e-05, "loss": 0.6968, "step": 66570 }, { "epoch": 0.74, "learning_rate": 3.771388815070718e-05, "loss": 0.6682, "step": 66575 }, { "epoch": 0.74, "learning_rate": 3.771296542356867e-05, "loss": 0.7079, "step": 66580 }, { "epoch": 0.74, "learning_rate": 3.771204269643015e-05, "loss": 0.6866, "step": 66585 }, { "epoch": 0.74, "learning_rate": 3.771111996929164e-05, "loss": 0.73, "step": 66590 }, { "epoch": 0.74, "learning_rate": 3.771019724215313e-05, "loss": 0.7164, "step": 66595 }, { "epoch": 0.74, "learning_rate": 3.770927451501462e-05, "loss": 0.7726, "step": 66600 }, { "epoch": 0.74, "learning_rate": 3.7708351787876104e-05, "loss": 0.7636, "step": 66605 }, { "epoch": 0.74, "learning_rate": 3.770742906073759e-05, "loss": 0.7372, "step": 66610 }, { "epoch": 0.74, "learning_rate": 3.770650633359908e-05, "loss": 0.6721, "step": 66615 }, { "epoch": 0.74, "learning_rate": 3.770558360646057e-05, "loss": 0.74, "step": 66620 }, { "epoch": 0.74, "learning_rate": 3.7704660879322056e-05, "loss": 0.6807, "step": 66625 }, { "epoch": 0.74, "learning_rate": 3.7703738152183543e-05, "loss": 0.711, "step": 66630 }, { "epoch": 0.74, "learning_rate": 3.770281542504503e-05, "loss": 0.7475, "step": 66635 }, { "epoch": 0.74, "learning_rate": 3.770189269790652e-05, "loss": 0.6983, "step": 66640 }, { "epoch": 0.74, "learning_rate": 3.770096997076801e-05, "loss": 0.742, "step": 66645 }, { "epoch": 0.74, "learning_rate": 3.7700047243629495e-05, "loss": 0.7296, "step": 66650 }, { "epoch": 0.74, "learning_rate": 3.769912451649098e-05, "loss": 0.7673, "step": 66655 }, { "epoch": 0.74, "learning_rate": 3.7698201789352464e-05, "loss": 0.6811, "step": 66660 }, { "epoch": 0.74, "learning_rate": 3.769727906221396e-05, "loss": 0.7342, "step": 66665 }, { "epoch": 0.74, "learning_rate": 3.7696356335075446e-05, "loss": 0.7372, "step": 66670 }, { "epoch": 0.74, "learning_rate": 3.7695433607936934e-05, "loss": 0.719, "step": 66675 }, { "epoch": 0.74, "learning_rate": 3.7694510880798415e-05, "loss": 0.7336, "step": 66680 }, { "epoch": 0.74, "learning_rate": 3.769358815365991e-05, "loss": 0.7298, "step": 66685 }, { "epoch": 0.74, "learning_rate": 3.76926654265214e-05, "loss": 0.7289, "step": 66690 }, { "epoch": 0.74, "learning_rate": 3.769174269938288e-05, "loss": 0.6832, "step": 66695 }, { "epoch": 0.74, "learning_rate": 3.7690819972244367e-05, "loss": 0.7472, "step": 66700 }, { "epoch": 0.74, "learning_rate": 3.768989724510586e-05, "loss": 0.7156, "step": 66705 }, { "epoch": 0.74, "learning_rate": 3.768897451796735e-05, "loss": 0.697, "step": 66710 }, { "epoch": 0.74, "learning_rate": 3.768805179082883e-05, "loss": 0.7253, "step": 66715 }, { "epoch": 0.74, "learning_rate": 3.768712906369032e-05, "loss": 0.7634, "step": 66720 }, { "epoch": 0.74, "learning_rate": 3.7686206336551806e-05, "loss": 0.7425, "step": 66725 }, { "epoch": 0.74, "learning_rate": 3.7685283609413294e-05, "loss": 0.7432, "step": 66730 }, { "epoch": 0.74, "learning_rate": 3.768436088227478e-05, "loss": 0.7388, "step": 66735 }, { "epoch": 0.74, "learning_rate": 3.768343815513627e-05, "loss": 0.6929, "step": 66740 }, { "epoch": 0.74, "learning_rate": 3.768251542799776e-05, "loss": 0.8052, "step": 66745 }, { "epoch": 0.74, "learning_rate": 3.7681592700859245e-05, "loss": 0.737, "step": 66750 }, { "epoch": 0.74, "learning_rate": 3.768066997372073e-05, "loss": 0.7363, "step": 66755 }, { "epoch": 0.74, "learning_rate": 3.767974724658222e-05, "loss": 0.6914, "step": 66760 }, { "epoch": 0.74, "learning_rate": 3.767882451944371e-05, "loss": 0.7364, "step": 66765 }, { "epoch": 0.74, "learning_rate": 3.7677901792305196e-05, "loss": 0.7071, "step": 66770 }, { "epoch": 0.74, "learning_rate": 3.7676979065166684e-05, "loss": 0.7691, "step": 66775 }, { "epoch": 0.74, "learning_rate": 3.767605633802817e-05, "loss": 0.7418, "step": 66780 }, { "epoch": 0.74, "learning_rate": 3.767513361088966e-05, "loss": 0.6909, "step": 66785 }, { "epoch": 0.74, "learning_rate": 3.767421088375115e-05, "loss": 0.7582, "step": 66790 }, { "epoch": 0.74, "learning_rate": 3.7673288156612636e-05, "loss": 0.7496, "step": 66795 }, { "epoch": 0.74, "learning_rate": 3.7672365429474124e-05, "loss": 0.6719, "step": 66800 }, { "epoch": 0.74, "learning_rate": 3.7671442702335605e-05, "loss": 0.7435, "step": 66805 }, { "epoch": 0.74, "learning_rate": 3.767051997519709e-05, "loss": 0.7541, "step": 66810 }, { "epoch": 0.74, "learning_rate": 3.766959724805859e-05, "loss": 0.6936, "step": 66815 }, { "epoch": 0.74, "learning_rate": 3.7668674520920075e-05, "loss": 0.7484, "step": 66820 }, { "epoch": 0.74, "learning_rate": 3.7667751793781556e-05, "loss": 0.7451, "step": 66825 }, { "epoch": 0.74, "learning_rate": 3.7666829066643044e-05, "loss": 0.6885, "step": 66830 }, { "epoch": 0.74, "learning_rate": 3.766590633950454e-05, "loss": 0.7011, "step": 66835 }, { "epoch": 0.74, "learning_rate": 3.766498361236602e-05, "loss": 0.7063, "step": 66840 }, { "epoch": 0.74, "learning_rate": 3.766406088522751e-05, "loss": 0.7442, "step": 66845 }, { "epoch": 0.74, "learning_rate": 3.7663138158088995e-05, "loss": 0.6884, "step": 66850 }, { "epoch": 0.74, "learning_rate": 3.766221543095049e-05, "loss": 0.6972, "step": 66855 }, { "epoch": 0.74, "learning_rate": 3.766129270381197e-05, "loss": 0.6606, "step": 66860 }, { "epoch": 0.74, "learning_rate": 3.766036997667346e-05, "loss": 0.661, "step": 66865 }, { "epoch": 0.74, "learning_rate": 3.7659447249534947e-05, "loss": 0.6831, "step": 66870 }, { "epoch": 0.74, "learning_rate": 3.7658524522396434e-05, "loss": 0.7053, "step": 66875 }, { "epoch": 0.74, "learning_rate": 3.765760179525792e-05, "loss": 0.7178, "step": 66880 }, { "epoch": 0.74, "learning_rate": 3.765667906811941e-05, "loss": 0.7975, "step": 66885 }, { "epoch": 0.74, "learning_rate": 3.76557563409809e-05, "loss": 0.7295, "step": 66890 }, { "epoch": 0.74, "learning_rate": 3.7654833613842386e-05, "loss": 0.7126, "step": 66895 }, { "epoch": 0.74, "learning_rate": 3.7653910886703874e-05, "loss": 0.7063, "step": 66900 }, { "epoch": 0.74, "learning_rate": 3.765298815956536e-05, "loss": 0.7201, "step": 66905 }, { "epoch": 0.74, "learning_rate": 3.765206543242685e-05, "loss": 0.7219, "step": 66910 }, { "epoch": 0.74, "learning_rate": 3.765114270528833e-05, "loss": 0.7389, "step": 66915 }, { "epoch": 0.74, "learning_rate": 3.7650219978149825e-05, "loss": 0.7389, "step": 66920 }, { "epoch": 0.74, "learning_rate": 3.764929725101131e-05, "loss": 0.7383, "step": 66925 }, { "epoch": 0.74, "learning_rate": 3.76483745238728e-05, "loss": 0.6608, "step": 66930 }, { "epoch": 0.74, "learning_rate": 3.764745179673428e-05, "loss": 0.6813, "step": 66935 }, { "epoch": 0.74, "learning_rate": 3.7646529069595776e-05, "loss": 0.692, "step": 66940 }, { "epoch": 0.74, "learning_rate": 3.7645606342457264e-05, "loss": 0.7159, "step": 66945 }, { "epoch": 0.74, "learning_rate": 3.7644683615318745e-05, "loss": 0.7391, "step": 66950 }, { "epoch": 0.74, "learning_rate": 3.764376088818023e-05, "loss": 0.7192, "step": 66955 }, { "epoch": 0.74, "learning_rate": 3.764283816104172e-05, "loss": 0.7245, "step": 66960 }, { "epoch": 0.74, "learning_rate": 3.7641915433903216e-05, "loss": 0.6558, "step": 66965 }, { "epoch": 0.74, "learning_rate": 3.76409927067647e-05, "loss": 0.6952, "step": 66970 }, { "epoch": 0.74, "learning_rate": 3.7640069979626185e-05, "loss": 0.7953, "step": 66975 }, { "epoch": 0.74, "learning_rate": 3.763914725248767e-05, "loss": 0.7002, "step": 66980 }, { "epoch": 0.74, "learning_rate": 3.763822452534917e-05, "loss": 0.7429, "step": 66985 }, { "epoch": 0.74, "learning_rate": 3.763730179821065e-05, "loss": 0.664, "step": 66990 }, { "epoch": 0.74, "learning_rate": 3.7636379071072136e-05, "loss": 0.7505, "step": 66995 }, { "epoch": 0.74, "learning_rate": 3.7635456343933624e-05, "loss": 0.6643, "step": 67000 }, { "epoch": 0.74, "eval_loss": 0.6854579448699951, "eval_runtime": 70.2951, "eval_samples_per_second": 28.451, "eval_steps_per_second": 14.226, "step": 67000 }, { "epoch": 0.74, "learning_rate": 3.763453361679511e-05, "loss": 0.7075, "step": 67005 }, { "epoch": 0.74, "learning_rate": 3.76336108896566e-05, "loss": 0.7104, "step": 67010 }, { "epoch": 0.74, "learning_rate": 3.763268816251809e-05, "loss": 0.7542, "step": 67015 }, { "epoch": 0.74, "learning_rate": 3.7631765435379575e-05, "loss": 0.6383, "step": 67020 }, { "epoch": 0.74, "learning_rate": 3.7630842708241056e-05, "loss": 0.6892, "step": 67025 }, { "epoch": 0.74, "learning_rate": 3.762991998110255e-05, "loss": 0.7043, "step": 67030 }, { "epoch": 0.74, "learning_rate": 3.762899725396404e-05, "loss": 0.6846, "step": 67035 }, { "epoch": 0.74, "learning_rate": 3.762807452682553e-05, "loss": 0.7607, "step": 67040 }, { "epoch": 0.74, "learning_rate": 3.762715179968701e-05, "loss": 0.6499, "step": 67045 }, { "epoch": 0.74, "learning_rate": 3.76262290725485e-05, "loss": 0.698, "step": 67050 }, { "epoch": 0.74, "learning_rate": 3.762530634540999e-05, "loss": 0.73, "step": 67055 }, { "epoch": 0.74, "learning_rate": 3.762438361827148e-05, "loss": 0.7082, "step": 67060 }, { "epoch": 0.74, "learning_rate": 3.762346089113296e-05, "loss": 0.7114, "step": 67065 }, { "epoch": 0.74, "learning_rate": 3.7622538163994454e-05, "loss": 0.6676, "step": 67070 }, { "epoch": 0.74, "learning_rate": 3.762161543685594e-05, "loss": 0.7108, "step": 67075 }, { "epoch": 0.74, "learning_rate": 3.762069270971742e-05, "loss": 0.7171, "step": 67080 }, { "epoch": 0.74, "learning_rate": 3.761976998257891e-05, "loss": 0.7031, "step": 67085 }, { "epoch": 0.74, "learning_rate": 3.7618847255440405e-05, "loss": 0.6471, "step": 67090 }, { "epoch": 0.74, "learning_rate": 3.761792452830189e-05, "loss": 0.739, "step": 67095 }, { "epoch": 0.74, "learning_rate": 3.7617001801163374e-05, "loss": 0.7269, "step": 67100 }, { "epoch": 0.74, "learning_rate": 3.761607907402486e-05, "loss": 0.6486, "step": 67105 }, { "epoch": 0.74, "learning_rate": 3.761515634688635e-05, "loss": 0.7529, "step": 67110 }, { "epoch": 0.74, "learning_rate": 3.761423361974784e-05, "loss": 0.7234, "step": 67115 }, { "epoch": 0.74, "learning_rate": 3.7613310892609325e-05, "loss": 0.7215, "step": 67120 }, { "epoch": 0.74, "learning_rate": 3.761238816547081e-05, "loss": 0.6697, "step": 67125 }, { "epoch": 0.74, "learning_rate": 3.76114654383323e-05, "loss": 0.6789, "step": 67130 }, { "epoch": 0.74, "learning_rate": 3.761054271119379e-05, "loss": 0.7119, "step": 67135 }, { "epoch": 0.74, "learning_rate": 3.760961998405528e-05, "loss": 0.6595, "step": 67140 }, { "epoch": 0.74, "learning_rate": 3.7608697256916765e-05, "loss": 0.6872, "step": 67145 }, { "epoch": 0.74, "learning_rate": 3.760777452977825e-05, "loss": 0.7242, "step": 67150 }, { "epoch": 0.74, "learning_rate": 3.760685180263974e-05, "loss": 0.7457, "step": 67155 }, { "epoch": 0.74, "learning_rate": 3.760592907550123e-05, "loss": 0.7035, "step": 67160 }, { "epoch": 0.74, "learning_rate": 3.7605006348362716e-05, "loss": 0.7231, "step": 67165 }, { "epoch": 0.74, "learning_rate": 3.7604083621224204e-05, "loss": 0.7262, "step": 67170 }, { "epoch": 0.74, "learning_rate": 3.7603160894085685e-05, "loss": 0.7188, "step": 67175 }, { "epoch": 0.74, "learning_rate": 3.760223816694718e-05, "loss": 0.6698, "step": 67180 }, { "epoch": 0.74, "learning_rate": 3.760131543980867e-05, "loss": 0.711, "step": 67185 }, { "epoch": 0.74, "learning_rate": 3.760039271267015e-05, "loss": 0.6999, "step": 67190 }, { "epoch": 0.74, "learning_rate": 3.7599469985531636e-05, "loss": 0.6726, "step": 67195 }, { "epoch": 0.74, "learning_rate": 3.759854725839313e-05, "loss": 0.7284, "step": 67200 }, { "epoch": 0.74, "learning_rate": 3.759762453125462e-05, "loss": 0.7194, "step": 67205 }, { "epoch": 0.74, "learning_rate": 3.75967018041161e-05, "loss": 0.6731, "step": 67210 }, { "epoch": 0.74, "learning_rate": 3.759577907697759e-05, "loss": 0.6615, "step": 67215 }, { "epoch": 0.74, "learning_rate": 3.759485634983908e-05, "loss": 0.7175, "step": 67220 }, { "epoch": 0.74, "learning_rate": 3.7593933622700564e-05, "loss": 0.6699, "step": 67225 }, { "epoch": 0.74, "learning_rate": 3.759301089556205e-05, "loss": 0.7272, "step": 67230 }, { "epoch": 0.74, "learning_rate": 3.759208816842354e-05, "loss": 0.7544, "step": 67235 }, { "epoch": 0.74, "learning_rate": 3.7591165441285034e-05, "loss": 0.7682, "step": 67240 }, { "epoch": 0.74, "learning_rate": 3.7590242714146515e-05, "loss": 0.7175, "step": 67245 }, { "epoch": 0.74, "learning_rate": 3.7589319987008e-05, "loss": 0.6616, "step": 67250 }, { "epoch": 0.74, "learning_rate": 3.758839725986949e-05, "loss": 0.7032, "step": 67255 }, { "epoch": 0.74, "learning_rate": 3.758747453273098e-05, "loss": 0.7077, "step": 67260 }, { "epoch": 0.74, "learning_rate": 3.7586551805592466e-05, "loss": 0.6788, "step": 67265 }, { "epoch": 0.74, "learning_rate": 3.7585629078453954e-05, "loss": 0.8163, "step": 67270 }, { "epoch": 0.74, "learning_rate": 3.758470635131544e-05, "loss": 0.7002, "step": 67275 }, { "epoch": 0.74, "learning_rate": 3.758378362417693e-05, "loss": 0.6907, "step": 67280 }, { "epoch": 0.75, "learning_rate": 3.758286089703842e-05, "loss": 0.6921, "step": 67285 }, { "epoch": 0.75, "learning_rate": 3.7581938169899906e-05, "loss": 0.6874, "step": 67290 }, { "epoch": 0.75, "learning_rate": 3.7581015442761393e-05, "loss": 0.7187, "step": 67295 }, { "epoch": 0.75, "learning_rate": 3.7580092715622874e-05, "loss": 0.7291, "step": 67300 }, { "epoch": 0.75, "learning_rate": 3.757916998848437e-05, "loss": 0.7219, "step": 67305 }, { "epoch": 0.75, "learning_rate": 3.757824726134586e-05, "loss": 0.7078, "step": 67310 }, { "epoch": 0.75, "learning_rate": 3.7577324534207345e-05, "loss": 0.7571, "step": 67315 }, { "epoch": 0.75, "learning_rate": 3.7576401807068826e-05, "loss": 0.6912, "step": 67320 }, { "epoch": 0.75, "learning_rate": 3.7575479079930314e-05, "loss": 0.6579, "step": 67325 }, { "epoch": 0.75, "learning_rate": 3.757455635279181e-05, "loss": 0.7236, "step": 67330 }, { "epoch": 0.75, "learning_rate": 3.757363362565329e-05, "loss": 0.7533, "step": 67335 }, { "epoch": 0.75, "learning_rate": 3.757271089851478e-05, "loss": 0.6943, "step": 67340 }, { "epoch": 0.75, "learning_rate": 3.7571788171376265e-05, "loss": 0.7294, "step": 67345 }, { "epoch": 0.75, "learning_rate": 3.757086544423776e-05, "loss": 0.7499, "step": 67350 }, { "epoch": 0.75, "learning_rate": 3.756994271709924e-05, "loss": 0.7074, "step": 67355 }, { "epoch": 0.75, "learning_rate": 3.756901998996073e-05, "loss": 0.7034, "step": 67360 }, { "epoch": 0.75, "learning_rate": 3.7568097262822217e-05, "loss": 0.7123, "step": 67365 }, { "epoch": 0.75, "learning_rate": 3.756717453568371e-05, "loss": 0.7599, "step": 67370 }, { "epoch": 0.75, "learning_rate": 3.756625180854519e-05, "loss": 0.6843, "step": 67375 }, { "epoch": 0.75, "learning_rate": 3.756532908140668e-05, "loss": 0.7288, "step": 67380 }, { "epoch": 0.75, "learning_rate": 3.756440635426817e-05, "loss": 0.7504, "step": 67385 }, { "epoch": 0.75, "learning_rate": 3.7563483627129656e-05, "loss": 0.7115, "step": 67390 }, { "epoch": 0.75, "learning_rate": 3.7562560899991144e-05, "loss": 0.7023, "step": 67395 }, { "epoch": 0.75, "learning_rate": 3.756163817285263e-05, "loss": 0.7683, "step": 67400 }, { "epoch": 0.75, "learning_rate": 3.756071544571412e-05, "loss": 0.7146, "step": 67405 }, { "epoch": 0.75, "learning_rate": 3.755979271857561e-05, "loss": 0.7268, "step": 67410 }, { "epoch": 0.75, "learning_rate": 3.7558869991437095e-05, "loss": 0.6955, "step": 67415 }, { "epoch": 0.75, "learning_rate": 3.755794726429858e-05, "loss": 0.7636, "step": 67420 }, { "epoch": 0.75, "learning_rate": 3.755702453716007e-05, "loss": 0.7566, "step": 67425 }, { "epoch": 0.75, "learning_rate": 3.755610181002155e-05, "loss": 0.7288, "step": 67430 }, { "epoch": 0.75, "learning_rate": 3.7555179082883046e-05, "loss": 0.7156, "step": 67435 }, { "epoch": 0.75, "learning_rate": 3.7554256355744534e-05, "loss": 0.6729, "step": 67440 }, { "epoch": 0.75, "learning_rate": 3.755333362860602e-05, "loss": 0.7179, "step": 67445 }, { "epoch": 0.75, "learning_rate": 3.75524109014675e-05, "loss": 0.7477, "step": 67450 }, { "epoch": 0.75, "learning_rate": 3.7551488174329e-05, "loss": 0.7255, "step": 67455 }, { "epoch": 0.75, "learning_rate": 3.7550565447190486e-05, "loss": 0.7305, "step": 67460 }, { "epoch": 0.75, "learning_rate": 3.754964272005197e-05, "loss": 0.7435, "step": 67465 }, { "epoch": 0.75, "learning_rate": 3.7548719992913455e-05, "loss": 0.7038, "step": 67470 }, { "epoch": 0.75, "learning_rate": 3.754779726577495e-05, "loss": 0.739, "step": 67475 }, { "epoch": 0.75, "learning_rate": 3.754687453863644e-05, "loss": 0.8039, "step": 67480 }, { "epoch": 0.75, "learning_rate": 3.754595181149792e-05, "loss": 0.6996, "step": 67485 }, { "epoch": 0.75, "learning_rate": 3.7545029084359406e-05, "loss": 0.7618, "step": 67490 }, { "epoch": 0.75, "learning_rate": 3.7544106357220894e-05, "loss": 0.7774, "step": 67495 }, { "epoch": 0.75, "learning_rate": 3.754318363008238e-05, "loss": 0.7065, "step": 67500 }, { "epoch": 0.75, "learning_rate": 3.754226090294387e-05, "loss": 0.7425, "step": 67505 }, { "epoch": 0.75, "learning_rate": 3.754133817580536e-05, "loss": 0.7184, "step": 67510 }, { "epoch": 0.75, "learning_rate": 3.7540415448666845e-05, "loss": 0.6787, "step": 67515 }, { "epoch": 0.75, "learning_rate": 3.753949272152833e-05, "loss": 0.6894, "step": 67520 }, { "epoch": 0.75, "learning_rate": 3.753856999438982e-05, "loss": 0.7049, "step": 67525 }, { "epoch": 0.75, "learning_rate": 3.753764726725131e-05, "loss": 0.7182, "step": 67530 }, { "epoch": 0.75, "learning_rate": 3.7536724540112797e-05, "loss": 0.7368, "step": 67535 }, { "epoch": 0.75, "learning_rate": 3.7535801812974284e-05, "loss": 0.6753, "step": 67540 }, { "epoch": 0.75, "learning_rate": 3.753487908583577e-05, "loss": 0.7231, "step": 67545 }, { "epoch": 0.75, "learning_rate": 3.753395635869726e-05, "loss": 0.6883, "step": 67550 }, { "epoch": 0.75, "learning_rate": 3.753303363155875e-05, "loss": 0.7243, "step": 67555 }, { "epoch": 0.75, "learning_rate": 3.753211090442023e-05, "loss": 0.7696, "step": 67560 }, { "epoch": 0.75, "learning_rate": 3.7531188177281724e-05, "loss": 0.6447, "step": 67565 }, { "epoch": 0.75, "learning_rate": 3.753026545014321e-05, "loss": 0.7156, "step": 67570 }, { "epoch": 0.75, "learning_rate": 3.752934272300469e-05, "loss": 0.7681, "step": 67575 }, { "epoch": 0.75, "learning_rate": 3.752841999586618e-05, "loss": 0.7053, "step": 67580 }, { "epoch": 0.75, "learning_rate": 3.7527497268727675e-05, "loss": 0.7401, "step": 67585 }, { "epoch": 0.75, "learning_rate": 3.752657454158916e-05, "loss": 0.7312, "step": 67590 }, { "epoch": 0.75, "learning_rate": 3.7525651814450644e-05, "loss": 0.7984, "step": 67595 }, { "epoch": 0.75, "learning_rate": 3.752472908731213e-05, "loss": 0.6966, "step": 67600 }, { "epoch": 0.75, "learning_rate": 3.7523806360173626e-05, "loss": 0.756, "step": 67605 }, { "epoch": 0.75, "learning_rate": 3.752288363303511e-05, "loss": 0.6885, "step": 67610 }, { "epoch": 0.75, "learning_rate": 3.7521960905896595e-05, "loss": 0.7454, "step": 67615 }, { "epoch": 0.75, "learning_rate": 3.752103817875808e-05, "loss": 0.7224, "step": 67620 }, { "epoch": 0.75, "learning_rate": 3.752011545161958e-05, "loss": 0.7318, "step": 67625 }, { "epoch": 0.75, "learning_rate": 3.751919272448106e-05, "loss": 0.7064, "step": 67630 }, { "epoch": 0.75, "learning_rate": 3.751826999734255e-05, "loss": 0.7381, "step": 67635 }, { "epoch": 0.75, "learning_rate": 3.7517347270204035e-05, "loss": 0.6581, "step": 67640 }, { "epoch": 0.75, "learning_rate": 3.751642454306552e-05, "loss": 0.7289, "step": 67645 }, { "epoch": 0.75, "learning_rate": 3.751550181592701e-05, "loss": 0.7188, "step": 67650 }, { "epoch": 0.75, "learning_rate": 3.75145790887885e-05, "loss": 0.6785, "step": 67655 }, { "epoch": 0.75, "learning_rate": 3.7513656361649986e-05, "loss": 0.7182, "step": 67660 }, { "epoch": 0.75, "learning_rate": 3.7512733634511474e-05, "loss": 0.7161, "step": 67665 }, { "epoch": 0.75, "learning_rate": 3.751181090737296e-05, "loss": 0.6808, "step": 67670 }, { "epoch": 0.75, "learning_rate": 3.751088818023445e-05, "loss": 0.6888, "step": 67675 }, { "epoch": 0.75, "learning_rate": 3.750996545309594e-05, "loss": 0.6862, "step": 67680 }, { "epoch": 0.75, "learning_rate": 3.750904272595742e-05, "loss": 0.7588, "step": 67685 }, { "epoch": 0.75, "learning_rate": 3.750811999881891e-05, "loss": 0.6917, "step": 67690 }, { "epoch": 0.75, "learning_rate": 3.75071972716804e-05, "loss": 0.628, "step": 67695 }, { "epoch": 0.75, "learning_rate": 3.750627454454189e-05, "loss": 0.6967, "step": 67700 }, { "epoch": 0.75, "learning_rate": 3.750535181740337e-05, "loss": 0.7406, "step": 67705 }, { "epoch": 0.75, "learning_rate": 3.750442909026486e-05, "loss": 0.7183, "step": 67710 }, { "epoch": 0.75, "learning_rate": 3.750350636312635e-05, "loss": 0.7204, "step": 67715 }, { "epoch": 0.75, "learning_rate": 3.750258363598784e-05, "loss": 0.7415, "step": 67720 }, { "epoch": 0.75, "learning_rate": 3.750166090884932e-05, "loss": 0.7569, "step": 67725 }, { "epoch": 0.75, "learning_rate": 3.750073818171081e-05, "loss": 0.755, "step": 67730 }, { "epoch": 0.75, "learning_rate": 3.7499815454572304e-05, "loss": 0.6879, "step": 67735 }, { "epoch": 0.75, "learning_rate": 3.7498892727433785e-05, "loss": 0.6598, "step": 67740 }, { "epoch": 0.75, "learning_rate": 3.749797000029527e-05, "loss": 0.7296, "step": 67745 }, { "epoch": 0.75, "learning_rate": 3.749704727315676e-05, "loss": 0.7419, "step": 67750 }, { "epoch": 0.75, "learning_rate": 3.7496124546018255e-05, "loss": 0.6987, "step": 67755 }, { "epoch": 0.75, "learning_rate": 3.7495201818879736e-05, "loss": 0.7356, "step": 67760 }, { "epoch": 0.75, "learning_rate": 3.7494279091741224e-05, "loss": 0.7409, "step": 67765 }, { "epoch": 0.75, "learning_rate": 3.749335636460271e-05, "loss": 0.6393, "step": 67770 }, { "epoch": 0.75, "learning_rate": 3.74924336374642e-05, "loss": 0.7243, "step": 67775 }, { "epoch": 0.75, "learning_rate": 3.749151091032569e-05, "loss": 0.732, "step": 67780 }, { "epoch": 0.75, "learning_rate": 3.7490588183187175e-05, "loss": 0.6805, "step": 67785 }, { "epoch": 0.75, "learning_rate": 3.748966545604866e-05, "loss": 0.7472, "step": 67790 }, { "epoch": 0.75, "learning_rate": 3.748874272891015e-05, "loss": 0.7571, "step": 67795 }, { "epoch": 0.75, "learning_rate": 3.748782000177164e-05, "loss": 0.6989, "step": 67800 }, { "epoch": 0.75, "learning_rate": 3.748689727463313e-05, "loss": 0.6615, "step": 67805 }, { "epoch": 0.75, "learning_rate": 3.7485974547494615e-05, "loss": 0.6854, "step": 67810 }, { "epoch": 0.75, "learning_rate": 3.7485051820356096e-05, "loss": 0.7522, "step": 67815 }, { "epoch": 0.75, "learning_rate": 3.748412909321759e-05, "loss": 0.7402, "step": 67820 }, { "epoch": 0.75, "learning_rate": 3.748320636607908e-05, "loss": 0.6987, "step": 67825 }, { "epoch": 0.75, "learning_rate": 3.7482283638940566e-05, "loss": 0.7562, "step": 67830 }, { "epoch": 0.75, "learning_rate": 3.748136091180205e-05, "loss": 0.7126, "step": 67835 }, { "epoch": 0.75, "learning_rate": 3.748043818466354e-05, "loss": 0.7331, "step": 67840 }, { "epoch": 0.75, "learning_rate": 3.747951545752503e-05, "loss": 0.7748, "step": 67845 }, { "epoch": 0.75, "learning_rate": 3.747859273038651e-05, "loss": 0.6746, "step": 67850 }, { "epoch": 0.75, "learning_rate": 3.7477670003248e-05, "loss": 0.7044, "step": 67855 }, { "epoch": 0.75, "learning_rate": 3.7476747276109486e-05, "loss": 0.7355, "step": 67860 }, { "epoch": 0.75, "learning_rate": 3.747582454897098e-05, "loss": 0.8163, "step": 67865 }, { "epoch": 0.75, "learning_rate": 3.747490182183246e-05, "loss": 0.7346, "step": 67870 }, { "epoch": 0.75, "learning_rate": 3.747397909469395e-05, "loss": 0.7221, "step": 67875 }, { "epoch": 0.75, "learning_rate": 3.747305636755544e-05, "loss": 0.6586, "step": 67880 }, { "epoch": 0.75, "learning_rate": 3.7472133640416926e-05, "loss": 0.7191, "step": 67885 }, { "epoch": 0.75, "learning_rate": 3.7471210913278414e-05, "loss": 0.6921, "step": 67890 }, { "epoch": 0.75, "learning_rate": 3.74702881861399e-05, "loss": 0.7221, "step": 67895 }, { "epoch": 0.75, "learning_rate": 3.746936545900139e-05, "loss": 0.7026, "step": 67900 }, { "epoch": 0.75, "learning_rate": 3.746844273186288e-05, "loss": 0.7404, "step": 67905 }, { "epoch": 0.75, "learning_rate": 3.7467520004724365e-05, "loss": 0.6689, "step": 67910 }, { "epoch": 0.75, "learning_rate": 3.746659727758585e-05, "loss": 0.7301, "step": 67915 }, { "epoch": 0.75, "learning_rate": 3.746567455044734e-05, "loss": 0.7093, "step": 67920 }, { "epoch": 0.75, "learning_rate": 3.746475182330883e-05, "loss": 0.7073, "step": 67925 }, { "epoch": 0.75, "learning_rate": 3.7463829096170316e-05, "loss": 0.7092, "step": 67930 }, { "epoch": 0.75, "learning_rate": 3.7462906369031804e-05, "loss": 0.7093, "step": 67935 }, { "epoch": 0.75, "learning_rate": 3.746198364189329e-05, "loss": 0.7292, "step": 67940 }, { "epoch": 0.75, "learning_rate": 3.746106091475477e-05, "loss": 0.699, "step": 67945 }, { "epoch": 0.75, "learning_rate": 3.746013818761627e-05, "loss": 0.7271, "step": 67950 }, { "epoch": 0.75, "learning_rate": 3.7459215460477756e-05, "loss": 0.7368, "step": 67955 }, { "epoch": 0.75, "learning_rate": 3.745829273333924e-05, "loss": 0.7236, "step": 67960 }, { "epoch": 0.75, "learning_rate": 3.7457370006200724e-05, "loss": 0.7593, "step": 67965 }, { "epoch": 0.75, "learning_rate": 3.745644727906222e-05, "loss": 0.6272, "step": 67970 }, { "epoch": 0.75, "learning_rate": 3.745552455192371e-05, "loss": 0.7511, "step": 67975 }, { "epoch": 0.75, "learning_rate": 3.745460182478519e-05, "loss": 0.6973, "step": 67980 }, { "epoch": 0.75, "learning_rate": 3.7453679097646676e-05, "loss": 0.736, "step": 67985 }, { "epoch": 0.75, "learning_rate": 3.745275637050817e-05, "loss": 0.7333, "step": 67990 }, { "epoch": 0.75, "learning_rate": 3.745183364336965e-05, "loss": 0.7188, "step": 67995 }, { "epoch": 0.75, "learning_rate": 3.745091091623114e-05, "loss": 0.6433, "step": 68000 }, { "epoch": 0.75, "eval_loss": 0.7035318613052368, "eval_runtime": 69.9781, "eval_samples_per_second": 28.58, "eval_steps_per_second": 14.29, "step": 68000 }, { "epoch": 0.75, "learning_rate": 3.744998818909263e-05, "loss": 0.7204, "step": 68005 }, { "epoch": 0.75, "learning_rate": 3.7449065461954115e-05, "loss": 0.6299, "step": 68010 }, { "epoch": 0.75, "learning_rate": 3.74481427348156e-05, "loss": 0.7074, "step": 68015 }, { "epoch": 0.75, "learning_rate": 3.744722000767709e-05, "loss": 0.6888, "step": 68020 }, { "epoch": 0.75, "learning_rate": 3.744629728053858e-05, "loss": 0.713, "step": 68025 }, { "epoch": 0.75, "learning_rate": 3.7445374553400067e-05, "loss": 0.6585, "step": 68030 }, { "epoch": 0.75, "learning_rate": 3.7444451826261554e-05, "loss": 0.7181, "step": 68035 }, { "epoch": 0.75, "learning_rate": 3.744352909912304e-05, "loss": 0.7059, "step": 68040 }, { "epoch": 0.75, "learning_rate": 3.744260637198453e-05, "loss": 0.7285, "step": 68045 }, { "epoch": 0.75, "learning_rate": 3.744168364484602e-05, "loss": 0.6721, "step": 68050 }, { "epoch": 0.75, "learning_rate": 3.7440760917707506e-05, "loss": 0.7383, "step": 68055 }, { "epoch": 0.75, "learning_rate": 3.7439838190568994e-05, "loss": 0.6975, "step": 68060 }, { "epoch": 0.75, "learning_rate": 3.743891546343048e-05, "loss": 0.6507, "step": 68065 }, { "epoch": 0.75, "learning_rate": 3.743799273629196e-05, "loss": 0.6742, "step": 68070 }, { "epoch": 0.75, "learning_rate": 3.743707000915346e-05, "loss": 0.7412, "step": 68075 }, { "epoch": 0.75, "learning_rate": 3.7436147282014945e-05, "loss": 0.7685, "step": 68080 }, { "epoch": 0.75, "learning_rate": 3.743522455487643e-05, "loss": 0.6922, "step": 68085 }, { "epoch": 0.75, "learning_rate": 3.7434301827737914e-05, "loss": 0.7063, "step": 68090 }, { "epoch": 0.75, "learning_rate": 3.74333791005994e-05, "loss": 0.73, "step": 68095 }, { "epoch": 0.75, "learning_rate": 3.7432456373460896e-05, "loss": 0.6834, "step": 68100 }, { "epoch": 0.75, "learning_rate": 3.7431533646322384e-05, "loss": 0.6682, "step": 68105 }, { "epoch": 0.75, "learning_rate": 3.7430610919183865e-05, "loss": 0.6553, "step": 68110 }, { "epoch": 0.75, "learning_rate": 3.742968819204535e-05, "loss": 0.7385, "step": 68115 }, { "epoch": 0.75, "learning_rate": 3.742876546490685e-05, "loss": 0.7078, "step": 68120 }, { "epoch": 0.75, "learning_rate": 3.742784273776833e-05, "loss": 0.7068, "step": 68125 }, { "epoch": 0.75, "learning_rate": 3.742692001062982e-05, "loss": 0.6843, "step": 68130 }, { "epoch": 0.75, "learning_rate": 3.7425997283491305e-05, "loss": 0.7577, "step": 68135 }, { "epoch": 0.75, "learning_rate": 3.74250745563528e-05, "loss": 0.6961, "step": 68140 }, { "epoch": 0.75, "learning_rate": 3.742415182921428e-05, "loss": 0.7059, "step": 68145 }, { "epoch": 0.75, "learning_rate": 3.742322910207577e-05, "loss": 0.7429, "step": 68150 }, { "epoch": 0.75, "learning_rate": 3.7422306374937256e-05, "loss": 0.6434, "step": 68155 }, { "epoch": 0.75, "learning_rate": 3.7421383647798744e-05, "loss": 0.7372, "step": 68160 }, { "epoch": 0.75, "learning_rate": 3.742046092066023e-05, "loss": 0.6765, "step": 68165 }, { "epoch": 0.75, "learning_rate": 3.741953819352172e-05, "loss": 0.722, "step": 68170 }, { "epoch": 0.75, "learning_rate": 3.741861546638321e-05, "loss": 0.7009, "step": 68175 }, { "epoch": 0.75, "learning_rate": 3.7417692739244695e-05, "loss": 0.7128, "step": 68180 }, { "epoch": 0.75, "learning_rate": 3.741677001210618e-05, "loss": 0.7385, "step": 68185 }, { "epoch": 0.76, "learning_rate": 3.741584728496767e-05, "loss": 0.6628, "step": 68190 }, { "epoch": 0.76, "learning_rate": 3.741492455782916e-05, "loss": 0.6947, "step": 68195 }, { "epoch": 0.76, "learning_rate": 3.741400183069064e-05, "loss": 0.7032, "step": 68200 }, { "epoch": 0.76, "learning_rate": 3.7413079103552134e-05, "loss": 0.765, "step": 68205 }, { "epoch": 0.76, "learning_rate": 3.741215637641362e-05, "loss": 0.7094, "step": 68210 }, { "epoch": 0.76, "learning_rate": 3.741123364927511e-05, "loss": 0.7386, "step": 68215 }, { "epoch": 0.76, "learning_rate": 3.741031092213659e-05, "loss": 0.6954, "step": 68220 }, { "epoch": 0.76, "learning_rate": 3.7409388194998086e-05, "loss": 0.7175, "step": 68225 }, { "epoch": 0.76, "learning_rate": 3.7408465467859574e-05, "loss": 0.6776, "step": 68230 }, { "epoch": 0.76, "learning_rate": 3.7407542740721055e-05, "loss": 0.6656, "step": 68235 }, { "epoch": 0.76, "learning_rate": 3.740662001358254e-05, "loss": 0.7061, "step": 68240 }, { "epoch": 0.76, "learning_rate": 3.740569728644403e-05, "loss": 0.6931, "step": 68245 }, { "epoch": 0.76, "learning_rate": 3.7404774559305525e-05, "loss": 0.7671, "step": 68250 }, { "epoch": 0.76, "learning_rate": 3.7403851832167006e-05, "loss": 0.6834, "step": 68255 }, { "epoch": 0.76, "learning_rate": 3.7402929105028494e-05, "loss": 0.7485, "step": 68260 }, { "epoch": 0.76, "learning_rate": 3.740200637788998e-05, "loss": 0.7619, "step": 68265 }, { "epoch": 0.76, "learning_rate": 3.740108365075147e-05, "loss": 0.7407, "step": 68270 }, { "epoch": 0.76, "learning_rate": 3.740016092361296e-05, "loss": 0.7413, "step": 68275 }, { "epoch": 0.76, "learning_rate": 3.7399238196474445e-05, "loss": 0.7002, "step": 68280 }, { "epoch": 0.76, "learning_rate": 3.739831546933593e-05, "loss": 0.6662, "step": 68285 }, { "epoch": 0.76, "learning_rate": 3.739739274219742e-05, "loss": 0.7588, "step": 68290 }, { "epoch": 0.76, "learning_rate": 3.739647001505891e-05, "loss": 0.7621, "step": 68295 }, { "epoch": 0.76, "learning_rate": 3.73955472879204e-05, "loss": 0.7806, "step": 68300 }, { "epoch": 0.76, "learning_rate": 3.7394624560781885e-05, "loss": 0.736, "step": 68305 }, { "epoch": 0.76, "learning_rate": 3.739370183364337e-05, "loss": 0.7745, "step": 68310 }, { "epoch": 0.76, "learning_rate": 3.739277910650486e-05, "loss": 0.7556, "step": 68315 }, { "epoch": 0.76, "learning_rate": 3.739185637936635e-05, "loss": 0.6245, "step": 68320 }, { "epoch": 0.76, "learning_rate": 3.7390933652227836e-05, "loss": 0.7081, "step": 68325 }, { "epoch": 0.76, "learning_rate": 3.739001092508932e-05, "loss": 0.6968, "step": 68330 }, { "epoch": 0.76, "learning_rate": 3.738908819795081e-05, "loss": 0.7069, "step": 68335 }, { "epoch": 0.76, "learning_rate": 3.73881654708123e-05, "loss": 0.7773, "step": 68340 }, { "epoch": 0.76, "learning_rate": 3.738724274367378e-05, "loss": 0.6658, "step": 68345 }, { "epoch": 0.76, "learning_rate": 3.738632001653527e-05, "loss": 0.7324, "step": 68350 }, { "epoch": 0.76, "learning_rate": 3.738539728939676e-05, "loss": 0.681, "step": 68355 }, { "epoch": 0.76, "learning_rate": 3.738447456225825e-05, "loss": 0.7067, "step": 68360 }, { "epoch": 0.76, "learning_rate": 3.738355183511973e-05, "loss": 0.7431, "step": 68365 }, { "epoch": 0.76, "learning_rate": 3.738262910798122e-05, "loss": 0.7415, "step": 68370 }, { "epoch": 0.76, "learning_rate": 3.7381706380842715e-05, "loss": 0.6954, "step": 68375 }, { "epoch": 0.76, "learning_rate": 3.7380783653704196e-05, "loss": 0.6897, "step": 68380 }, { "epoch": 0.76, "learning_rate": 3.7379860926565683e-05, "loss": 0.7517, "step": 68385 }, { "epoch": 0.76, "learning_rate": 3.737893819942717e-05, "loss": 0.6914, "step": 68390 }, { "epoch": 0.76, "learning_rate": 3.737801547228866e-05, "loss": 0.6954, "step": 68395 }, { "epoch": 0.76, "learning_rate": 3.737709274515015e-05, "loss": 0.6688, "step": 68400 }, { "epoch": 0.76, "learning_rate": 3.7376170018011635e-05, "loss": 0.7131, "step": 68405 }, { "epoch": 0.76, "learning_rate": 3.737524729087312e-05, "loss": 0.7231, "step": 68410 }, { "epoch": 0.76, "learning_rate": 3.737432456373461e-05, "loss": 0.6405, "step": 68415 }, { "epoch": 0.76, "learning_rate": 3.73734018365961e-05, "loss": 0.6552, "step": 68420 }, { "epoch": 0.76, "learning_rate": 3.7372479109457586e-05, "loss": 0.6964, "step": 68425 }, { "epoch": 0.76, "learning_rate": 3.7371556382319074e-05, "loss": 0.7484, "step": 68430 }, { "epoch": 0.76, "learning_rate": 3.737063365518056e-05, "loss": 0.7621, "step": 68435 }, { "epoch": 0.76, "learning_rate": 3.736971092804205e-05, "loss": 0.7242, "step": 68440 }, { "epoch": 0.76, "learning_rate": 3.736878820090354e-05, "loss": 0.7143, "step": 68445 }, { "epoch": 0.76, "learning_rate": 3.7367865473765025e-05, "loss": 0.6853, "step": 68450 }, { "epoch": 0.76, "learning_rate": 3.7366942746626507e-05, "loss": 0.7594, "step": 68455 }, { "epoch": 0.76, "learning_rate": 3.7366020019488e-05, "loss": 0.6612, "step": 68460 }, { "epoch": 0.76, "learning_rate": 3.736509729234949e-05, "loss": 0.7107, "step": 68465 }, { "epoch": 0.76, "learning_rate": 3.736417456521098e-05, "loss": 0.6766, "step": 68470 }, { "epoch": 0.76, "learning_rate": 3.736325183807246e-05, "loss": 0.6935, "step": 68475 }, { "epoch": 0.76, "learning_rate": 3.7362329110933946e-05, "loss": 0.6785, "step": 68480 }, { "epoch": 0.76, "learning_rate": 3.736140638379544e-05, "loss": 0.748, "step": 68485 }, { "epoch": 0.76, "learning_rate": 3.736048365665693e-05, "loss": 0.7332, "step": 68490 }, { "epoch": 0.76, "learning_rate": 3.735956092951841e-05, "loss": 0.7051, "step": 68495 }, { "epoch": 0.76, "learning_rate": 3.73586382023799e-05, "loss": 0.7499, "step": 68500 }, { "epoch": 0.76, "learning_rate": 3.735771547524139e-05, "loss": 0.6988, "step": 68505 }, { "epoch": 0.76, "learning_rate": 3.735679274810287e-05, "loss": 0.7428, "step": 68510 }, { "epoch": 0.76, "learning_rate": 3.735587002096436e-05, "loss": 0.6949, "step": 68515 }, { "epoch": 0.76, "learning_rate": 3.735494729382585e-05, "loss": 0.6744, "step": 68520 }, { "epoch": 0.76, "learning_rate": 3.735402456668734e-05, "loss": 0.7043, "step": 68525 }, { "epoch": 0.76, "learning_rate": 3.7353101839548824e-05, "loss": 0.6345, "step": 68530 }, { "epoch": 0.76, "learning_rate": 3.735217911241031e-05, "loss": 0.6644, "step": 68535 }, { "epoch": 0.76, "learning_rate": 3.73512563852718e-05, "loss": 0.7778, "step": 68540 }, { "epoch": 0.76, "learning_rate": 3.735033365813329e-05, "loss": 0.6925, "step": 68545 }, { "epoch": 0.76, "learning_rate": 3.7349410930994776e-05, "loss": 0.6362, "step": 68550 }, { "epoch": 0.76, "learning_rate": 3.7348488203856264e-05, "loss": 0.6909, "step": 68555 }, { "epoch": 0.76, "learning_rate": 3.734756547671775e-05, "loss": 0.714, "step": 68560 }, { "epoch": 0.76, "learning_rate": 3.734664274957924e-05, "loss": 0.6725, "step": 68565 }, { "epoch": 0.76, "learning_rate": 3.734572002244073e-05, "loss": 0.7402, "step": 68570 }, { "epoch": 0.76, "learning_rate": 3.7344797295302215e-05, "loss": 0.6753, "step": 68575 }, { "epoch": 0.76, "learning_rate": 3.73438745681637e-05, "loss": 0.7018, "step": 68580 }, { "epoch": 0.76, "learning_rate": 3.7342951841025184e-05, "loss": 0.7685, "step": 68585 }, { "epoch": 0.76, "learning_rate": 3.734202911388668e-05, "loss": 0.8026, "step": 68590 }, { "epoch": 0.76, "learning_rate": 3.7341106386748166e-05, "loss": 0.7537, "step": 68595 }, { "epoch": 0.76, "learning_rate": 3.7340183659609654e-05, "loss": 0.6565, "step": 68600 }, { "epoch": 0.76, "learning_rate": 3.7339260932471135e-05, "loss": 0.6993, "step": 68605 }, { "epoch": 0.76, "learning_rate": 3.733833820533263e-05, "loss": 0.6749, "step": 68610 }, { "epoch": 0.76, "learning_rate": 3.733741547819412e-05, "loss": 0.6903, "step": 68615 }, { "epoch": 0.76, "learning_rate": 3.73364927510556e-05, "loss": 0.7236, "step": 68620 }, { "epoch": 0.76, "learning_rate": 3.733557002391709e-05, "loss": 0.633, "step": 68625 }, { "epoch": 0.76, "learning_rate": 3.7334647296778574e-05, "loss": 0.7525, "step": 68630 }, { "epoch": 0.76, "learning_rate": 3.733372456964007e-05, "loss": 0.7145, "step": 68635 }, { "epoch": 0.76, "learning_rate": 3.733280184250155e-05, "loss": 0.6806, "step": 68640 }, { "epoch": 0.76, "learning_rate": 3.733187911536304e-05, "loss": 0.6795, "step": 68645 }, { "epoch": 0.76, "learning_rate": 3.7330956388224526e-05, "loss": 0.7837, "step": 68650 }, { "epoch": 0.76, "learning_rate": 3.7330033661086014e-05, "loss": 0.7529, "step": 68655 }, { "epoch": 0.76, "learning_rate": 3.73291109339475e-05, "loss": 0.7553, "step": 68660 }, { "epoch": 0.76, "learning_rate": 3.732818820680899e-05, "loss": 0.7195, "step": 68665 }, { "epoch": 0.76, "learning_rate": 3.732726547967048e-05, "loss": 0.6973, "step": 68670 }, { "epoch": 0.76, "learning_rate": 3.7326342752531965e-05, "loss": 0.6671, "step": 68675 }, { "epoch": 0.76, "learning_rate": 3.732542002539345e-05, "loss": 0.6834, "step": 68680 }, { "epoch": 0.76, "learning_rate": 3.732449729825494e-05, "loss": 0.7019, "step": 68685 }, { "epoch": 0.76, "learning_rate": 3.732357457111643e-05, "loss": 0.694, "step": 68690 }, { "epoch": 0.76, "learning_rate": 3.732265184397791e-05, "loss": 0.7373, "step": 68695 }, { "epoch": 0.76, "learning_rate": 3.7321729116839404e-05, "loss": 0.7165, "step": 68700 }, { "epoch": 0.76, "learning_rate": 3.732080638970089e-05, "loss": 0.7422, "step": 68705 }, { "epoch": 0.76, "learning_rate": 3.731988366256238e-05, "loss": 0.6795, "step": 68710 }, { "epoch": 0.76, "learning_rate": 3.731896093542386e-05, "loss": 0.7147, "step": 68715 }, { "epoch": 0.76, "learning_rate": 3.7318038208285356e-05, "loss": 0.7522, "step": 68720 }, { "epoch": 0.76, "learning_rate": 3.7317115481146844e-05, "loss": 0.7398, "step": 68725 }, { "epoch": 0.76, "learning_rate": 3.7316192754008325e-05, "loss": 0.6764, "step": 68730 }, { "epoch": 0.76, "learning_rate": 3.731527002686981e-05, "loss": 0.7729, "step": 68735 }, { "epoch": 0.76, "learning_rate": 3.731434729973131e-05, "loss": 0.7331, "step": 68740 }, { "epoch": 0.76, "learning_rate": 3.7313424572592795e-05, "loss": 0.7374, "step": 68745 }, { "epoch": 0.76, "learning_rate": 3.7312501845454276e-05, "loss": 0.7349, "step": 68750 }, { "epoch": 0.76, "learning_rate": 3.7311579118315764e-05, "loss": 0.7476, "step": 68755 }, { "epoch": 0.76, "learning_rate": 3.731065639117726e-05, "loss": 0.6731, "step": 68760 }, { "epoch": 0.76, "learning_rate": 3.730973366403874e-05, "loss": 0.7155, "step": 68765 }, { "epoch": 0.76, "learning_rate": 3.730881093690023e-05, "loss": 0.6519, "step": 68770 }, { "epoch": 0.76, "learning_rate": 3.7307888209761715e-05, "loss": 0.6672, "step": 68775 }, { "epoch": 0.76, "learning_rate": 3.73069654826232e-05, "loss": 0.7186, "step": 68780 }, { "epoch": 0.76, "learning_rate": 3.730604275548469e-05, "loss": 0.7061, "step": 68785 }, { "epoch": 0.76, "learning_rate": 3.730512002834618e-05, "loss": 0.6639, "step": 68790 }, { "epoch": 0.76, "learning_rate": 3.730419730120767e-05, "loss": 0.722, "step": 68795 }, { "epoch": 0.76, "learning_rate": 3.7303274574069155e-05, "loss": 0.7403, "step": 68800 }, { "epoch": 0.76, "learning_rate": 3.730235184693064e-05, "loss": 0.7063, "step": 68805 }, { "epoch": 0.76, "learning_rate": 3.730142911979213e-05, "loss": 0.7369, "step": 68810 }, { "epoch": 0.76, "learning_rate": 3.730050639265362e-05, "loss": 0.7407, "step": 68815 }, { "epoch": 0.76, "learning_rate": 3.7299583665515106e-05, "loss": 0.7067, "step": 68820 }, { "epoch": 0.76, "learning_rate": 3.7298660938376594e-05, "loss": 0.7244, "step": 68825 }, { "epoch": 0.76, "learning_rate": 3.729773821123808e-05, "loss": 0.7507, "step": 68830 }, { "epoch": 0.76, "learning_rate": 3.729681548409957e-05, "loss": 0.7526, "step": 68835 }, { "epoch": 0.76, "learning_rate": 3.729589275696105e-05, "loss": 0.753, "step": 68840 }, { "epoch": 0.76, "learning_rate": 3.729497002982254e-05, "loss": 0.7357, "step": 68845 }, { "epoch": 0.76, "learning_rate": 3.729404730268403e-05, "loss": 0.7417, "step": 68850 }, { "epoch": 0.76, "learning_rate": 3.729312457554552e-05, "loss": 0.6819, "step": 68855 }, { "epoch": 0.76, "learning_rate": 3.7292201848407e-05, "loss": 0.6638, "step": 68860 }, { "epoch": 0.76, "learning_rate": 3.729127912126849e-05, "loss": 0.6954, "step": 68865 }, { "epoch": 0.76, "learning_rate": 3.7290356394129984e-05, "loss": 0.6864, "step": 68870 }, { "epoch": 0.76, "learning_rate": 3.728943366699147e-05, "loss": 0.7155, "step": 68875 }, { "epoch": 0.76, "learning_rate": 3.728851093985295e-05, "loss": 0.741, "step": 68880 }, { "epoch": 0.76, "learning_rate": 3.728758821271444e-05, "loss": 0.6812, "step": 68885 }, { "epoch": 0.76, "learning_rate": 3.7286665485575936e-05, "loss": 0.7138, "step": 68890 }, { "epoch": 0.76, "learning_rate": 3.728574275843742e-05, "loss": 0.7325, "step": 68895 }, { "epoch": 0.76, "learning_rate": 3.7284820031298905e-05, "loss": 0.7389, "step": 68900 }, { "epoch": 0.76, "learning_rate": 3.728389730416039e-05, "loss": 0.7396, "step": 68905 }, { "epoch": 0.76, "learning_rate": 3.728297457702189e-05, "loss": 0.6982, "step": 68910 }, { "epoch": 0.76, "learning_rate": 3.728205184988337e-05, "loss": 0.6745, "step": 68915 }, { "epoch": 0.76, "learning_rate": 3.7281129122744856e-05, "loss": 0.743, "step": 68920 }, { "epoch": 0.76, "learning_rate": 3.7280206395606344e-05, "loss": 0.729, "step": 68925 }, { "epoch": 0.76, "learning_rate": 3.727928366846783e-05, "loss": 0.723, "step": 68930 }, { "epoch": 0.76, "learning_rate": 3.727836094132932e-05, "loss": 0.7341, "step": 68935 }, { "epoch": 0.76, "learning_rate": 3.727743821419081e-05, "loss": 0.7318, "step": 68940 }, { "epoch": 0.76, "learning_rate": 3.7276515487052295e-05, "loss": 0.7399, "step": 68945 }, { "epoch": 0.76, "learning_rate": 3.727559275991378e-05, "loss": 0.7241, "step": 68950 }, { "epoch": 0.76, "learning_rate": 3.727467003277527e-05, "loss": 0.6728, "step": 68955 }, { "epoch": 0.76, "learning_rate": 3.727374730563676e-05, "loss": 0.7589, "step": 68960 }, { "epoch": 0.76, "learning_rate": 3.727282457849825e-05, "loss": 0.7195, "step": 68965 }, { "epoch": 0.76, "learning_rate": 3.727190185135973e-05, "loss": 0.7534, "step": 68970 }, { "epoch": 0.76, "learning_rate": 3.727097912422122e-05, "loss": 0.7542, "step": 68975 }, { "epoch": 0.76, "learning_rate": 3.727005639708271e-05, "loss": 0.7352, "step": 68980 }, { "epoch": 0.76, "learning_rate": 3.72691336699442e-05, "loss": 0.7471, "step": 68985 }, { "epoch": 0.76, "learning_rate": 3.726821094280568e-05, "loss": 0.7372, "step": 68990 }, { "epoch": 0.76, "learning_rate": 3.726728821566717e-05, "loss": 0.6866, "step": 68995 }, { "epoch": 0.76, "learning_rate": 3.726636548852866e-05, "loss": 0.7595, "step": 69000 }, { "epoch": 0.76, "eval_loss": 0.7008190751075745, "eval_runtime": 70.1212, "eval_samples_per_second": 28.522, "eval_steps_per_second": 14.261, "step": 69000 }, { "epoch": 0.76, "learning_rate": 3.726544276139014e-05, "loss": 0.7458, "step": 69005 }, { "epoch": 0.76, "learning_rate": 3.726452003425163e-05, "loss": 0.6329, "step": 69010 }, { "epoch": 0.76, "learning_rate": 3.726359730711312e-05, "loss": 0.7296, "step": 69015 }, { "epoch": 0.76, "learning_rate": 3.726267457997461e-05, "loss": 0.7531, "step": 69020 }, { "epoch": 0.76, "learning_rate": 3.7261751852836094e-05, "loss": 0.712, "step": 69025 }, { "epoch": 0.76, "learning_rate": 3.726082912569758e-05, "loss": 0.7499, "step": 69030 }, { "epoch": 0.76, "learning_rate": 3.725990639855907e-05, "loss": 0.7196, "step": 69035 }, { "epoch": 0.76, "learning_rate": 3.725898367142056e-05, "loss": 0.701, "step": 69040 }, { "epoch": 0.76, "learning_rate": 3.7258060944282046e-05, "loss": 0.7174, "step": 69045 }, { "epoch": 0.76, "learning_rate": 3.7257138217143533e-05, "loss": 0.6554, "step": 69050 }, { "epoch": 0.76, "learning_rate": 3.725621549000502e-05, "loss": 0.6699, "step": 69055 }, { "epoch": 0.76, "learning_rate": 3.725529276286651e-05, "loss": 0.7028, "step": 69060 }, { "epoch": 0.76, "learning_rate": 3.7254370035728e-05, "loss": 0.7137, "step": 69065 }, { "epoch": 0.76, "learning_rate": 3.7253447308589485e-05, "loss": 0.6797, "step": 69070 }, { "epoch": 0.76, "learning_rate": 3.725252458145097e-05, "loss": 0.6786, "step": 69075 }, { "epoch": 0.76, "learning_rate": 3.7251601854312454e-05, "loss": 0.6731, "step": 69080 }, { "epoch": 0.76, "learning_rate": 3.725067912717395e-05, "loss": 0.701, "step": 69085 }, { "epoch": 0.77, "learning_rate": 3.7249756400035436e-05, "loss": 0.7409, "step": 69090 }, { "epoch": 0.77, "learning_rate": 3.7248833672896924e-05, "loss": 0.7186, "step": 69095 }, { "epoch": 0.77, "learning_rate": 3.7247910945758405e-05, "loss": 0.7319, "step": 69100 }, { "epoch": 0.77, "learning_rate": 3.72469882186199e-05, "loss": 0.7408, "step": 69105 }, { "epoch": 0.77, "learning_rate": 3.724606549148139e-05, "loss": 0.7556, "step": 69110 }, { "epoch": 0.77, "learning_rate": 3.724514276434287e-05, "loss": 0.7328, "step": 69115 }, { "epoch": 0.77, "learning_rate": 3.7244220037204357e-05, "loss": 0.8064, "step": 69120 }, { "epoch": 0.77, "learning_rate": 3.724329731006585e-05, "loss": 0.7827, "step": 69125 }, { "epoch": 0.77, "learning_rate": 3.724237458292734e-05, "loss": 0.7082, "step": 69130 }, { "epoch": 0.77, "learning_rate": 3.724145185578882e-05, "loss": 0.7834, "step": 69135 }, { "epoch": 0.77, "learning_rate": 3.724052912865031e-05, "loss": 0.7006, "step": 69140 }, { "epoch": 0.77, "learning_rate": 3.72396064015118e-05, "loss": 0.7165, "step": 69145 }, { "epoch": 0.77, "learning_rate": 3.7238683674373284e-05, "loss": 0.7175, "step": 69150 }, { "epoch": 0.77, "learning_rate": 3.723776094723477e-05, "loss": 0.6948, "step": 69155 }, { "epoch": 0.77, "learning_rate": 3.723683822009626e-05, "loss": 0.6898, "step": 69160 }, { "epoch": 0.77, "learning_rate": 3.723591549295775e-05, "loss": 0.6867, "step": 69165 }, { "epoch": 0.77, "learning_rate": 3.7234992765819235e-05, "loss": 0.7146, "step": 69170 }, { "epoch": 0.77, "learning_rate": 3.723407003868072e-05, "loss": 0.7658, "step": 69175 }, { "epoch": 0.77, "learning_rate": 3.723314731154221e-05, "loss": 0.7205, "step": 69180 }, { "epoch": 0.77, "learning_rate": 3.72322245844037e-05, "loss": 0.6728, "step": 69185 }, { "epoch": 0.77, "learning_rate": 3.7231301857265186e-05, "loss": 0.7321, "step": 69190 }, { "epoch": 0.77, "learning_rate": 3.7230379130126674e-05, "loss": 0.7875, "step": 69195 }, { "epoch": 0.77, "learning_rate": 3.722945640298816e-05, "loss": 0.7599, "step": 69200 }, { "epoch": 0.77, "learning_rate": 3.722853367584965e-05, "loss": 0.6794, "step": 69205 }, { "epoch": 0.77, "learning_rate": 3.722761094871114e-05, "loss": 0.7339, "step": 69210 }, { "epoch": 0.77, "learning_rate": 3.7226688221572626e-05, "loss": 0.6849, "step": 69215 }, { "epoch": 0.77, "learning_rate": 3.7225765494434114e-05, "loss": 0.7367, "step": 69220 }, { "epoch": 0.77, "learning_rate": 3.7224842767295595e-05, "loss": 0.7563, "step": 69225 }, { "epoch": 0.77, "learning_rate": 3.722392004015708e-05, "loss": 0.6814, "step": 69230 }, { "epoch": 0.77, "learning_rate": 3.722299731301858e-05, "loss": 0.6954, "step": 69235 }, { "epoch": 0.77, "learning_rate": 3.7222074585880065e-05, "loss": 0.6718, "step": 69240 }, { "epoch": 0.77, "learning_rate": 3.7221151858741546e-05, "loss": 0.7028, "step": 69245 }, { "epoch": 0.77, "learning_rate": 3.7220229131603034e-05, "loss": 0.6838, "step": 69250 }, { "epoch": 0.77, "learning_rate": 3.721930640446453e-05, "loss": 0.716, "step": 69255 }, { "epoch": 0.77, "learning_rate": 3.7218383677326016e-05, "loss": 0.7515, "step": 69260 }, { "epoch": 0.77, "learning_rate": 3.72174609501875e-05, "loss": 0.7334, "step": 69265 }, { "epoch": 0.77, "learning_rate": 3.7216538223048985e-05, "loss": 0.7299, "step": 69270 }, { "epoch": 0.77, "learning_rate": 3.721561549591048e-05, "loss": 0.6662, "step": 69275 }, { "epoch": 0.77, "learning_rate": 3.721469276877196e-05, "loss": 0.713, "step": 69280 }, { "epoch": 0.77, "learning_rate": 3.721377004163345e-05, "loss": 0.6699, "step": 69285 }, { "epoch": 0.77, "learning_rate": 3.721284731449494e-05, "loss": 0.7421, "step": 69290 }, { "epoch": 0.77, "learning_rate": 3.721192458735643e-05, "loss": 0.6892, "step": 69295 }, { "epoch": 0.77, "learning_rate": 3.721100186021791e-05, "loss": 0.6671, "step": 69300 }, { "epoch": 0.77, "learning_rate": 3.72100791330794e-05, "loss": 0.6614, "step": 69305 }, { "epoch": 0.77, "learning_rate": 3.720915640594089e-05, "loss": 0.7035, "step": 69310 }, { "epoch": 0.77, "learning_rate": 3.7208233678802376e-05, "loss": 0.7296, "step": 69315 }, { "epoch": 0.77, "learning_rate": 3.7207310951663864e-05, "loss": 0.7693, "step": 69320 }, { "epoch": 0.77, "learning_rate": 3.720638822452535e-05, "loss": 0.7015, "step": 69325 }, { "epoch": 0.77, "learning_rate": 3.720546549738684e-05, "loss": 0.6514, "step": 69330 }, { "epoch": 0.77, "learning_rate": 3.720454277024833e-05, "loss": 0.7036, "step": 69335 }, { "epoch": 0.77, "learning_rate": 3.7203620043109815e-05, "loss": 0.7579, "step": 69340 }, { "epoch": 0.77, "learning_rate": 3.72026973159713e-05, "loss": 0.7495, "step": 69345 }, { "epoch": 0.77, "learning_rate": 3.720177458883279e-05, "loss": 0.6924, "step": 69350 }, { "epoch": 0.77, "learning_rate": 3.720085186169427e-05, "loss": 0.6972, "step": 69355 }, { "epoch": 0.77, "learning_rate": 3.7199929134555766e-05, "loss": 0.7464, "step": 69360 }, { "epoch": 0.77, "learning_rate": 3.7199006407417254e-05, "loss": 0.6973, "step": 69365 }, { "epoch": 0.77, "learning_rate": 3.719808368027874e-05, "loss": 0.6734, "step": 69370 }, { "epoch": 0.77, "learning_rate": 3.719716095314022e-05, "loss": 0.6987, "step": 69375 }, { "epoch": 0.77, "learning_rate": 3.719623822600171e-05, "loss": 0.7326, "step": 69380 }, { "epoch": 0.77, "learning_rate": 3.7195315498863206e-05, "loss": 0.7296, "step": 69385 }, { "epoch": 0.77, "learning_rate": 3.719439277172469e-05, "loss": 0.741, "step": 69390 }, { "epoch": 0.77, "learning_rate": 3.7193470044586175e-05, "loss": 0.7257, "step": 69395 }, { "epoch": 0.77, "learning_rate": 3.719254731744766e-05, "loss": 0.723, "step": 69400 }, { "epoch": 0.77, "learning_rate": 3.719162459030916e-05, "loss": 0.726, "step": 69405 }, { "epoch": 0.77, "learning_rate": 3.719070186317064e-05, "loss": 0.7338, "step": 69410 }, { "epoch": 0.77, "learning_rate": 3.7189779136032126e-05, "loss": 0.7435, "step": 69415 }, { "epoch": 0.77, "learning_rate": 3.7188856408893614e-05, "loss": 0.7068, "step": 69420 }, { "epoch": 0.77, "learning_rate": 3.71879336817551e-05, "loss": 0.6975, "step": 69425 }, { "epoch": 0.77, "learning_rate": 3.718701095461659e-05, "loss": 0.7597, "step": 69430 }, { "epoch": 0.77, "learning_rate": 3.718608822747808e-05, "loss": 0.6937, "step": 69435 }, { "epoch": 0.77, "learning_rate": 3.7185165500339565e-05, "loss": 0.6875, "step": 69440 }, { "epoch": 0.77, "learning_rate": 3.718424277320105e-05, "loss": 0.7616, "step": 69445 }, { "epoch": 0.77, "learning_rate": 3.718332004606254e-05, "loss": 0.6853, "step": 69450 }, { "epoch": 0.77, "learning_rate": 3.718239731892403e-05, "loss": 0.6985, "step": 69455 }, { "epoch": 0.77, "learning_rate": 3.718147459178552e-05, "loss": 0.715, "step": 69460 }, { "epoch": 0.77, "learning_rate": 3.7180551864647e-05, "loss": 0.7439, "step": 69465 }, { "epoch": 0.77, "learning_rate": 3.717962913750849e-05, "loss": 0.7251, "step": 69470 }, { "epoch": 0.77, "learning_rate": 3.717870641036998e-05, "loss": 0.695, "step": 69475 }, { "epoch": 0.77, "learning_rate": 3.717778368323147e-05, "loss": 0.7441, "step": 69480 }, { "epoch": 0.77, "learning_rate": 3.717686095609295e-05, "loss": 0.7042, "step": 69485 }, { "epoch": 0.77, "learning_rate": 3.7175938228954444e-05, "loss": 0.7388, "step": 69490 }, { "epoch": 0.77, "learning_rate": 3.717501550181593e-05, "loss": 0.6897, "step": 69495 }, { "epoch": 0.77, "learning_rate": 3.717409277467741e-05, "loss": 0.6908, "step": 69500 }, { "epoch": 0.77, "learning_rate": 3.71731700475389e-05, "loss": 0.6843, "step": 69505 }, { "epoch": 0.77, "learning_rate": 3.7172247320400395e-05, "loss": 0.7631, "step": 69510 }, { "epoch": 0.77, "learning_rate": 3.717132459326188e-05, "loss": 0.6652, "step": 69515 }, { "epoch": 0.77, "learning_rate": 3.7170401866123364e-05, "loss": 0.7108, "step": 69520 }, { "epoch": 0.77, "learning_rate": 3.716947913898485e-05, "loss": 0.7278, "step": 69525 }, { "epoch": 0.77, "learning_rate": 3.716855641184634e-05, "loss": 0.6997, "step": 69530 }, { "epoch": 0.77, "learning_rate": 3.716763368470783e-05, "loss": 0.7642, "step": 69535 }, { "epoch": 0.77, "learning_rate": 3.7166710957569315e-05, "loss": 0.7407, "step": 69540 }, { "epoch": 0.77, "learning_rate": 3.71657882304308e-05, "loss": 0.7304, "step": 69545 }, { "epoch": 0.77, "learning_rate": 3.716486550329229e-05, "loss": 0.7333, "step": 69550 }, { "epoch": 0.77, "learning_rate": 3.716394277615378e-05, "loss": 0.6482, "step": 69555 }, { "epoch": 0.77, "learning_rate": 3.716302004901527e-05, "loss": 0.689, "step": 69560 }, { "epoch": 0.77, "learning_rate": 3.7162097321876755e-05, "loss": 0.7098, "step": 69565 }, { "epoch": 0.77, "learning_rate": 3.716117459473824e-05, "loss": 0.6739, "step": 69570 }, { "epoch": 0.77, "learning_rate": 3.716025186759973e-05, "loss": 0.6825, "step": 69575 }, { "epoch": 0.77, "learning_rate": 3.715932914046122e-05, "loss": 0.7142, "step": 69580 }, { "epoch": 0.77, "learning_rate": 3.7158406413322706e-05, "loss": 0.68, "step": 69585 }, { "epoch": 0.77, "learning_rate": 3.7157483686184194e-05, "loss": 0.7065, "step": 69590 }, { "epoch": 0.77, "learning_rate": 3.715656095904568e-05, "loss": 0.7418, "step": 69595 }, { "epoch": 0.77, "learning_rate": 3.715563823190717e-05, "loss": 0.7689, "step": 69600 }, { "epoch": 0.77, "learning_rate": 3.715471550476866e-05, "loss": 0.7356, "step": 69605 }, { "epoch": 0.77, "learning_rate": 3.715379277763014e-05, "loss": 0.6892, "step": 69610 }, { "epoch": 0.77, "learning_rate": 3.7152870050491626e-05, "loss": 0.7166, "step": 69615 }, { "epoch": 0.77, "learning_rate": 3.715194732335312e-05, "loss": 0.7408, "step": 69620 }, { "epoch": 0.77, "learning_rate": 3.715102459621461e-05, "loss": 0.7199, "step": 69625 }, { "epoch": 0.77, "learning_rate": 3.715010186907609e-05, "loss": 0.7717, "step": 69630 }, { "epoch": 0.77, "learning_rate": 3.714917914193758e-05, "loss": 0.7295, "step": 69635 }, { "epoch": 0.77, "learning_rate": 3.714825641479907e-05, "loss": 0.7535, "step": 69640 }, { "epoch": 0.77, "learning_rate": 3.714733368766056e-05, "loss": 0.7293, "step": 69645 }, { "epoch": 0.77, "learning_rate": 3.714641096052204e-05, "loss": 0.7098, "step": 69650 }, { "epoch": 0.77, "learning_rate": 3.714548823338353e-05, "loss": 0.6423, "step": 69655 }, { "epoch": 0.77, "learning_rate": 3.7144565506245024e-05, "loss": 0.7427, "step": 69660 }, { "epoch": 0.77, "learning_rate": 3.7143642779106505e-05, "loss": 0.7049, "step": 69665 }, { "epoch": 0.77, "learning_rate": 3.714272005196799e-05, "loss": 0.754, "step": 69670 }, { "epoch": 0.77, "learning_rate": 3.714179732482948e-05, "loss": 0.7317, "step": 69675 }, { "epoch": 0.77, "learning_rate": 3.714087459769097e-05, "loss": 0.6633, "step": 69680 }, { "epoch": 0.77, "learning_rate": 3.7139951870552456e-05, "loss": 0.7199, "step": 69685 }, { "epoch": 0.77, "learning_rate": 3.7139029143413944e-05, "loss": 0.6887, "step": 69690 }, { "epoch": 0.77, "learning_rate": 3.713810641627543e-05, "loss": 0.7827, "step": 69695 }, { "epoch": 0.77, "learning_rate": 3.713718368913692e-05, "loss": 0.7194, "step": 69700 }, { "epoch": 0.77, "learning_rate": 3.713626096199841e-05, "loss": 0.6842, "step": 69705 }, { "epoch": 0.77, "learning_rate": 3.7135338234859896e-05, "loss": 0.7096, "step": 69710 }, { "epoch": 0.77, "learning_rate": 3.7134415507721383e-05, "loss": 0.7054, "step": 69715 }, { "epoch": 0.77, "learning_rate": 3.713349278058287e-05, "loss": 0.7328, "step": 69720 }, { "epoch": 0.77, "learning_rate": 3.713257005344436e-05, "loss": 0.6866, "step": 69725 }, { "epoch": 0.77, "learning_rate": 3.713164732630585e-05, "loss": 0.7244, "step": 69730 }, { "epoch": 0.77, "learning_rate": 3.7130724599167335e-05, "loss": 0.701, "step": 69735 }, { "epoch": 0.77, "learning_rate": 3.7129801872028816e-05, "loss": 0.6846, "step": 69740 }, { "epoch": 0.77, "learning_rate": 3.712887914489031e-05, "loss": 0.6432, "step": 69745 }, { "epoch": 0.77, "learning_rate": 3.71279564177518e-05, "loss": 0.7131, "step": 69750 }, { "epoch": 0.77, "learning_rate": 3.7127033690613286e-05, "loss": 0.7163, "step": 69755 }, { "epoch": 0.77, "learning_rate": 3.712611096347477e-05, "loss": 0.6989, "step": 69760 }, { "epoch": 0.77, "learning_rate": 3.7125188236336255e-05, "loss": 0.7354, "step": 69765 }, { "epoch": 0.77, "learning_rate": 3.712426550919775e-05, "loss": 0.7421, "step": 69770 }, { "epoch": 0.77, "learning_rate": 3.712334278205923e-05, "loss": 0.6784, "step": 69775 }, { "epoch": 0.77, "learning_rate": 3.712242005492072e-05, "loss": 0.6842, "step": 69780 }, { "epoch": 0.77, "learning_rate": 3.7121497327782207e-05, "loss": 0.728, "step": 69785 }, { "epoch": 0.77, "learning_rate": 3.71205746006437e-05, "loss": 0.6797, "step": 69790 }, { "epoch": 0.77, "learning_rate": 3.711965187350518e-05, "loss": 0.7017, "step": 69795 }, { "epoch": 0.77, "learning_rate": 3.711872914636667e-05, "loss": 0.6926, "step": 69800 }, { "epoch": 0.77, "learning_rate": 3.711780641922816e-05, "loss": 0.7426, "step": 69805 }, { "epoch": 0.77, "learning_rate": 3.7116883692089646e-05, "loss": 0.6896, "step": 69810 }, { "epoch": 0.77, "learning_rate": 3.7115960964951134e-05, "loss": 0.7276, "step": 69815 }, { "epoch": 0.77, "learning_rate": 3.711503823781262e-05, "loss": 0.7284, "step": 69820 }, { "epoch": 0.77, "learning_rate": 3.711411551067411e-05, "loss": 0.6812, "step": 69825 }, { "epoch": 0.77, "learning_rate": 3.71131927835356e-05, "loss": 0.6982, "step": 69830 }, { "epoch": 0.77, "learning_rate": 3.7112270056397085e-05, "loss": 0.6974, "step": 69835 }, { "epoch": 0.77, "learning_rate": 3.711134732925857e-05, "loss": 0.7235, "step": 69840 }, { "epoch": 0.77, "learning_rate": 3.711042460212006e-05, "loss": 0.7064, "step": 69845 }, { "epoch": 0.77, "learning_rate": 3.710950187498154e-05, "loss": 0.7494, "step": 69850 }, { "epoch": 0.77, "learning_rate": 3.7108579147843036e-05, "loss": 0.6997, "step": 69855 }, { "epoch": 0.77, "learning_rate": 3.7107656420704524e-05, "loss": 0.7123, "step": 69860 }, { "epoch": 0.77, "learning_rate": 3.710673369356601e-05, "loss": 0.7012, "step": 69865 }, { "epoch": 0.77, "learning_rate": 3.710581096642749e-05, "loss": 0.7335, "step": 69870 }, { "epoch": 0.77, "learning_rate": 3.710488823928899e-05, "loss": 0.7178, "step": 69875 }, { "epoch": 0.77, "learning_rate": 3.7103965512150476e-05, "loss": 0.7175, "step": 69880 }, { "epoch": 0.77, "learning_rate": 3.710304278501196e-05, "loss": 0.6848, "step": 69885 }, { "epoch": 0.77, "learning_rate": 3.7102120057873445e-05, "loss": 0.6909, "step": 69890 }, { "epoch": 0.77, "learning_rate": 3.710119733073494e-05, "loss": 0.6776, "step": 69895 }, { "epoch": 0.77, "learning_rate": 3.710027460359643e-05, "loss": 0.7189, "step": 69900 }, { "epoch": 0.77, "learning_rate": 3.709935187645791e-05, "loss": 0.68, "step": 69905 }, { "epoch": 0.77, "learning_rate": 3.7098429149319396e-05, "loss": 0.7879, "step": 69910 }, { "epoch": 0.77, "learning_rate": 3.7097506422180884e-05, "loss": 0.6595, "step": 69915 }, { "epoch": 0.77, "learning_rate": 3.709658369504238e-05, "loss": 0.739, "step": 69920 }, { "epoch": 0.77, "learning_rate": 3.709566096790386e-05, "loss": 0.7229, "step": 69925 }, { "epoch": 0.77, "learning_rate": 3.709473824076535e-05, "loss": 0.7102, "step": 69930 }, { "epoch": 0.77, "learning_rate": 3.7093815513626835e-05, "loss": 0.6506, "step": 69935 }, { "epoch": 0.77, "learning_rate": 3.709289278648832e-05, "loss": 0.7035, "step": 69940 }, { "epoch": 0.77, "learning_rate": 3.709197005934981e-05, "loss": 0.7064, "step": 69945 }, { "epoch": 0.77, "learning_rate": 3.70910473322113e-05, "loss": 0.7233, "step": 69950 }, { "epoch": 0.77, "learning_rate": 3.7090124605072787e-05, "loss": 0.7771, "step": 69955 }, { "epoch": 0.77, "learning_rate": 3.7089201877934274e-05, "loss": 0.6808, "step": 69960 }, { "epoch": 0.77, "learning_rate": 3.708827915079576e-05, "loss": 0.7324, "step": 69965 }, { "epoch": 0.77, "learning_rate": 3.708735642365725e-05, "loss": 0.7458, "step": 69970 }, { "epoch": 0.77, "learning_rate": 3.708643369651874e-05, "loss": 0.7014, "step": 69975 }, { "epoch": 0.77, "learning_rate": 3.7085510969380226e-05, "loss": 0.7428, "step": 69980 }, { "epoch": 0.77, "learning_rate": 3.7084588242241714e-05, "loss": 0.7276, "step": 69985 }, { "epoch": 0.77, "learning_rate": 3.70836655151032e-05, "loss": 0.6847, "step": 69990 }, { "epoch": 0.78, "learning_rate": 3.708274278796469e-05, "loss": 0.746, "step": 69995 }, { "epoch": 0.78, "learning_rate": 3.708182006082617e-05, "loss": 0.7214, "step": 70000 }, { "epoch": 0.78, "eval_loss": 0.6618467569351196, "eval_runtime": 69.912, "eval_samples_per_second": 28.607, "eval_steps_per_second": 14.304, "step": 70000 }, { "epoch": 0.78, "learning_rate": 3.7080897333687665e-05, "loss": 0.757, "step": 70005 }, { "epoch": 0.78, "learning_rate": 3.707997460654915e-05, "loss": 0.6601, "step": 70010 }, { "epoch": 0.78, "learning_rate": 3.7079051879410634e-05, "loss": 0.6951, "step": 70015 }, { "epoch": 0.78, "learning_rate": 3.707812915227212e-05, "loss": 0.6912, "step": 70020 }, { "epoch": 0.78, "learning_rate": 3.7077206425133616e-05, "loss": 0.7379, "step": 70025 }, { "epoch": 0.78, "learning_rate": 3.7076283697995104e-05, "loss": 0.7594, "step": 70030 }, { "epoch": 0.78, "learning_rate": 3.7075360970856585e-05, "loss": 0.6924, "step": 70035 }, { "epoch": 0.78, "learning_rate": 3.707443824371807e-05, "loss": 0.792, "step": 70040 }, { "epoch": 0.78, "learning_rate": 3.707351551657957e-05, "loss": 0.7159, "step": 70045 }, { "epoch": 0.78, "learning_rate": 3.707259278944105e-05, "loss": 0.7486, "step": 70050 }, { "epoch": 0.78, "learning_rate": 3.707167006230254e-05, "loss": 0.7321, "step": 70055 }, { "epoch": 0.78, "learning_rate": 3.7070747335164025e-05, "loss": 0.7101, "step": 70060 }, { "epoch": 0.78, "learning_rate": 3.706982460802551e-05, "loss": 0.7014, "step": 70065 }, { "epoch": 0.78, "learning_rate": 3.7068901880887e-05, "loss": 0.648, "step": 70070 }, { "epoch": 0.78, "learning_rate": 3.706797915374849e-05, "loss": 0.6952, "step": 70075 }, { "epoch": 0.78, "learning_rate": 3.7067056426609976e-05, "loss": 0.685, "step": 70080 }, { "epoch": 0.78, "learning_rate": 3.7066133699471464e-05, "loss": 0.7164, "step": 70085 }, { "epoch": 0.78, "learning_rate": 3.706521097233295e-05, "loss": 0.6703, "step": 70090 }, { "epoch": 0.78, "learning_rate": 3.706428824519444e-05, "loss": 0.7203, "step": 70095 }, { "epoch": 0.78, "learning_rate": 3.706336551805593e-05, "loss": 0.668, "step": 70100 }, { "epoch": 0.78, "learning_rate": 3.7062442790917415e-05, "loss": 0.6968, "step": 70105 }, { "epoch": 0.78, "learning_rate": 3.70615200637789e-05, "loss": 0.7908, "step": 70110 }, { "epoch": 0.78, "learning_rate": 3.706059733664039e-05, "loss": 0.7272, "step": 70115 }, { "epoch": 0.78, "learning_rate": 3.705967460950188e-05, "loss": 0.691, "step": 70120 }, { "epoch": 0.78, "learning_rate": 3.705875188236336e-05, "loss": 0.68, "step": 70125 }, { "epoch": 0.78, "learning_rate": 3.7057829155224855e-05, "loss": 0.6869, "step": 70130 }, { "epoch": 0.78, "learning_rate": 3.705690642808634e-05, "loss": 0.7331, "step": 70135 }, { "epoch": 0.78, "learning_rate": 3.705598370094783e-05, "loss": 0.6813, "step": 70140 }, { "epoch": 0.78, "learning_rate": 3.705506097380931e-05, "loss": 0.6635, "step": 70145 }, { "epoch": 0.78, "learning_rate": 3.70541382466708e-05, "loss": 0.7157, "step": 70150 }, { "epoch": 0.78, "learning_rate": 3.7053215519532294e-05, "loss": 0.707, "step": 70155 }, { "epoch": 0.78, "learning_rate": 3.7052292792393775e-05, "loss": 0.7485, "step": 70160 }, { "epoch": 0.78, "learning_rate": 3.705137006525526e-05, "loss": 0.696, "step": 70165 }, { "epoch": 0.78, "learning_rate": 3.705044733811675e-05, "loss": 0.7366, "step": 70170 }, { "epoch": 0.78, "learning_rate": 3.7049524610978245e-05, "loss": 0.7865, "step": 70175 }, { "epoch": 0.78, "learning_rate": 3.7048601883839726e-05, "loss": 0.6604, "step": 70180 }, { "epoch": 0.78, "learning_rate": 3.7047679156701214e-05, "loss": 0.7199, "step": 70185 }, { "epoch": 0.78, "learning_rate": 3.70467564295627e-05, "loss": 0.725, "step": 70190 }, { "epoch": 0.78, "learning_rate": 3.704583370242419e-05, "loss": 0.7094, "step": 70195 }, { "epoch": 0.78, "learning_rate": 3.704491097528568e-05, "loss": 0.7333, "step": 70200 }, { "epoch": 0.78, "learning_rate": 3.7043988248147165e-05, "loss": 0.6997, "step": 70205 }, { "epoch": 0.78, "learning_rate": 3.704306552100865e-05, "loss": 0.6838, "step": 70210 }, { "epoch": 0.78, "learning_rate": 3.704214279387014e-05, "loss": 0.7078, "step": 70215 }, { "epoch": 0.78, "learning_rate": 3.704122006673163e-05, "loss": 0.6718, "step": 70220 }, { "epoch": 0.78, "learning_rate": 3.704029733959312e-05, "loss": 0.7527, "step": 70225 }, { "epoch": 0.78, "learning_rate": 3.7039374612454605e-05, "loss": 0.7219, "step": 70230 }, { "epoch": 0.78, "learning_rate": 3.7038451885316086e-05, "loss": 0.712, "step": 70235 }, { "epoch": 0.78, "learning_rate": 3.703752915817758e-05, "loss": 0.7425, "step": 70240 }, { "epoch": 0.78, "learning_rate": 3.703660643103907e-05, "loss": 0.726, "step": 70245 }, { "epoch": 0.78, "learning_rate": 3.7035683703900556e-05, "loss": 0.6867, "step": 70250 }, { "epoch": 0.78, "learning_rate": 3.703476097676204e-05, "loss": 0.7285, "step": 70255 }, { "epoch": 0.78, "learning_rate": 3.703383824962353e-05, "loss": 0.7018, "step": 70260 }, { "epoch": 0.78, "learning_rate": 3.703291552248502e-05, "loss": 0.7455, "step": 70265 }, { "epoch": 0.78, "learning_rate": 3.70319927953465e-05, "loss": 0.7022, "step": 70270 }, { "epoch": 0.78, "learning_rate": 3.703107006820799e-05, "loss": 0.6776, "step": 70275 }, { "epoch": 0.78, "learning_rate": 3.703014734106948e-05, "loss": 0.6578, "step": 70280 }, { "epoch": 0.78, "learning_rate": 3.702922461393097e-05, "loss": 0.7334, "step": 70285 }, { "epoch": 0.78, "learning_rate": 3.702830188679245e-05, "loss": 0.7007, "step": 70290 }, { "epoch": 0.78, "learning_rate": 3.702737915965394e-05, "loss": 0.6791, "step": 70295 }, { "epoch": 0.78, "learning_rate": 3.702645643251543e-05, "loss": 0.7061, "step": 70300 }, { "epoch": 0.78, "learning_rate": 3.702553370537692e-05, "loss": 0.6744, "step": 70305 }, { "epoch": 0.78, "learning_rate": 3.7024610978238404e-05, "loss": 0.6814, "step": 70310 }, { "epoch": 0.78, "learning_rate": 3.702368825109989e-05, "loss": 0.6582, "step": 70315 }, { "epoch": 0.78, "learning_rate": 3.702276552396138e-05, "loss": 0.7484, "step": 70320 }, { "epoch": 0.78, "learning_rate": 3.702184279682287e-05, "loss": 0.7015, "step": 70325 }, { "epoch": 0.78, "learning_rate": 3.7020920069684355e-05, "loss": 0.768, "step": 70330 }, { "epoch": 0.78, "learning_rate": 3.701999734254584e-05, "loss": 0.6678, "step": 70335 }, { "epoch": 0.78, "learning_rate": 3.701907461540733e-05, "loss": 0.6824, "step": 70340 }, { "epoch": 0.78, "learning_rate": 3.701815188826882e-05, "loss": 0.7244, "step": 70345 }, { "epoch": 0.78, "learning_rate": 3.7017229161130306e-05, "loss": 0.7189, "step": 70350 }, { "epoch": 0.78, "learning_rate": 3.7016306433991794e-05, "loss": 0.7051, "step": 70355 }, { "epoch": 0.78, "learning_rate": 3.701538370685328e-05, "loss": 0.7265, "step": 70360 }, { "epoch": 0.78, "learning_rate": 3.701446097971476e-05, "loss": 0.7092, "step": 70365 }, { "epoch": 0.78, "learning_rate": 3.701353825257626e-05, "loss": 0.6677, "step": 70370 }, { "epoch": 0.78, "learning_rate": 3.7012615525437746e-05, "loss": 0.6727, "step": 70375 }, { "epoch": 0.78, "learning_rate": 3.7011692798299233e-05, "loss": 0.6862, "step": 70380 }, { "epoch": 0.78, "learning_rate": 3.7010770071160714e-05, "loss": 0.6832, "step": 70385 }, { "epoch": 0.78, "learning_rate": 3.700984734402221e-05, "loss": 0.7194, "step": 70390 }, { "epoch": 0.78, "learning_rate": 3.70089246168837e-05, "loss": 0.7604, "step": 70395 }, { "epoch": 0.78, "learning_rate": 3.700800188974518e-05, "loss": 0.7391, "step": 70400 }, { "epoch": 0.78, "learning_rate": 3.7007079162606666e-05, "loss": 0.7041, "step": 70405 }, { "epoch": 0.78, "learning_rate": 3.700615643546816e-05, "loss": 0.6785, "step": 70410 }, { "epoch": 0.78, "learning_rate": 3.700523370832965e-05, "loss": 0.7453, "step": 70415 }, { "epoch": 0.78, "learning_rate": 3.700431098119113e-05, "loss": 0.698, "step": 70420 }, { "epoch": 0.78, "learning_rate": 3.700338825405262e-05, "loss": 0.7554, "step": 70425 }, { "epoch": 0.78, "learning_rate": 3.700246552691411e-05, "loss": 0.7734, "step": 70430 }, { "epoch": 0.78, "learning_rate": 3.700154279977559e-05, "loss": 0.729, "step": 70435 }, { "epoch": 0.78, "learning_rate": 3.700062007263708e-05, "loss": 0.6974, "step": 70440 }, { "epoch": 0.78, "learning_rate": 3.699969734549857e-05, "loss": 0.7049, "step": 70445 }, { "epoch": 0.78, "learning_rate": 3.6998774618360057e-05, "loss": 0.7072, "step": 70450 }, { "epoch": 0.78, "learning_rate": 3.6997851891221544e-05, "loss": 0.6789, "step": 70455 }, { "epoch": 0.78, "learning_rate": 3.699692916408303e-05, "loss": 0.7188, "step": 70460 }, { "epoch": 0.78, "learning_rate": 3.699600643694452e-05, "loss": 0.6991, "step": 70465 }, { "epoch": 0.78, "learning_rate": 3.699508370980601e-05, "loss": 0.7118, "step": 70470 }, { "epoch": 0.78, "learning_rate": 3.6994160982667496e-05, "loss": 0.6734, "step": 70475 }, { "epoch": 0.78, "learning_rate": 3.6993238255528984e-05, "loss": 0.6771, "step": 70480 }, { "epoch": 0.78, "learning_rate": 3.699231552839047e-05, "loss": 0.6605, "step": 70485 }, { "epoch": 0.78, "learning_rate": 3.699139280125196e-05, "loss": 0.731, "step": 70490 }, { "epoch": 0.78, "learning_rate": 3.699047007411345e-05, "loss": 0.7558, "step": 70495 }, { "epoch": 0.78, "learning_rate": 3.6989547346974935e-05, "loss": 0.7189, "step": 70500 }, { "epoch": 0.78, "learning_rate": 3.698862461983642e-05, "loss": 0.6774, "step": 70505 }, { "epoch": 0.78, "learning_rate": 3.6987701892697904e-05, "loss": 0.701, "step": 70510 }, { "epoch": 0.78, "learning_rate": 3.698677916555939e-05, "loss": 0.681, "step": 70515 }, { "epoch": 0.78, "learning_rate": 3.6985856438420886e-05, "loss": 0.7315, "step": 70520 }, { "epoch": 0.78, "learning_rate": 3.6984933711282374e-05, "loss": 0.7382, "step": 70525 }, { "epoch": 0.78, "learning_rate": 3.6984010984143855e-05, "loss": 0.6828, "step": 70530 }, { "epoch": 0.78, "learning_rate": 3.698308825700534e-05, "loss": 0.7813, "step": 70535 }, { "epoch": 0.78, "learning_rate": 3.698216552986684e-05, "loss": 0.7299, "step": 70540 }, { "epoch": 0.78, "learning_rate": 3.698124280272832e-05, "loss": 0.6795, "step": 70545 }, { "epoch": 0.78, "learning_rate": 3.698032007558981e-05, "loss": 0.6981, "step": 70550 }, { "epoch": 0.78, "learning_rate": 3.6979397348451295e-05, "loss": 0.7432, "step": 70555 }, { "epoch": 0.78, "learning_rate": 3.697847462131279e-05, "loss": 0.7125, "step": 70560 }, { "epoch": 0.78, "learning_rate": 3.697755189417427e-05, "loss": 0.6239, "step": 70565 }, { "epoch": 0.78, "learning_rate": 3.697662916703576e-05, "loss": 0.7593, "step": 70570 }, { "epoch": 0.78, "learning_rate": 3.6975706439897246e-05, "loss": 0.6981, "step": 70575 }, { "epoch": 0.78, "learning_rate": 3.6974783712758734e-05, "loss": 0.7189, "step": 70580 }, { "epoch": 0.78, "learning_rate": 3.697386098562022e-05, "loss": 0.7497, "step": 70585 }, { "epoch": 0.78, "learning_rate": 3.697293825848171e-05, "loss": 0.7443, "step": 70590 }, { "epoch": 0.78, "learning_rate": 3.69720155313432e-05, "loss": 0.7085, "step": 70595 }, { "epoch": 0.78, "learning_rate": 3.6971092804204685e-05, "loss": 0.7683, "step": 70600 }, { "epoch": 0.78, "learning_rate": 3.697017007706617e-05, "loss": 0.7098, "step": 70605 }, { "epoch": 0.78, "learning_rate": 3.696924734992766e-05, "loss": 0.7253, "step": 70610 }, { "epoch": 0.78, "learning_rate": 3.696832462278915e-05, "loss": 0.7071, "step": 70615 }, { "epoch": 0.78, "learning_rate": 3.696740189565063e-05, "loss": 0.7529, "step": 70620 }, { "epoch": 0.78, "learning_rate": 3.6966479168512124e-05, "loss": 0.7201, "step": 70625 }, { "epoch": 0.78, "learning_rate": 3.696555644137361e-05, "loss": 0.7142, "step": 70630 }, { "epoch": 0.78, "learning_rate": 3.69646337142351e-05, "loss": 0.7172, "step": 70635 }, { "epoch": 0.78, "learning_rate": 3.696371098709658e-05, "loss": 0.7179, "step": 70640 }, { "epoch": 0.78, "learning_rate": 3.6962788259958076e-05, "loss": 0.7538, "step": 70645 }, { "epoch": 0.78, "learning_rate": 3.6961865532819564e-05, "loss": 0.6545, "step": 70650 }, { "epoch": 0.78, "learning_rate": 3.6960942805681045e-05, "loss": 0.8096, "step": 70655 }, { "epoch": 0.78, "learning_rate": 3.696002007854253e-05, "loss": 0.6714, "step": 70660 }, { "epoch": 0.78, "learning_rate": 3.695909735140402e-05, "loss": 0.7198, "step": 70665 }, { "epoch": 0.78, "learning_rate": 3.6958174624265515e-05, "loss": 0.7222, "step": 70670 }, { "epoch": 0.78, "learning_rate": 3.6957251897126996e-05, "loss": 0.7303, "step": 70675 }, { "epoch": 0.78, "learning_rate": 3.6956329169988484e-05, "loss": 0.7493, "step": 70680 }, { "epoch": 0.78, "learning_rate": 3.695540644284997e-05, "loss": 0.7053, "step": 70685 }, { "epoch": 0.78, "learning_rate": 3.6954483715711466e-05, "loss": 0.7269, "step": 70690 }, { "epoch": 0.78, "learning_rate": 3.695356098857295e-05, "loss": 0.6965, "step": 70695 }, { "epoch": 0.78, "learning_rate": 3.6952638261434435e-05, "loss": 0.6958, "step": 70700 }, { "epoch": 0.78, "learning_rate": 3.695171553429592e-05, "loss": 0.725, "step": 70705 }, { "epoch": 0.78, "learning_rate": 3.695079280715741e-05, "loss": 0.747, "step": 70710 }, { "epoch": 0.78, "learning_rate": 3.69498700800189e-05, "loss": 0.7104, "step": 70715 }, { "epoch": 0.78, "learning_rate": 3.694894735288039e-05, "loss": 0.7021, "step": 70720 }, { "epoch": 0.78, "learning_rate": 3.6948024625741875e-05, "loss": 0.6856, "step": 70725 }, { "epoch": 0.78, "learning_rate": 3.694710189860336e-05, "loss": 0.6624, "step": 70730 }, { "epoch": 0.78, "learning_rate": 3.694617917146485e-05, "loss": 0.6973, "step": 70735 }, { "epoch": 0.78, "learning_rate": 3.694525644432634e-05, "loss": 0.6996, "step": 70740 }, { "epoch": 0.78, "learning_rate": 3.6944333717187826e-05, "loss": 0.6358, "step": 70745 }, { "epoch": 0.78, "learning_rate": 3.694341099004931e-05, "loss": 0.7522, "step": 70750 }, { "epoch": 0.78, "learning_rate": 3.69424882629108e-05, "loss": 0.681, "step": 70755 }, { "epoch": 0.78, "learning_rate": 3.694156553577229e-05, "loss": 0.7674, "step": 70760 }, { "epoch": 0.78, "learning_rate": 3.694064280863378e-05, "loss": 0.7327, "step": 70765 }, { "epoch": 0.78, "learning_rate": 3.693972008149526e-05, "loss": 0.7087, "step": 70770 }, { "epoch": 0.78, "learning_rate": 3.693879735435675e-05, "loss": 0.6775, "step": 70775 }, { "epoch": 0.78, "learning_rate": 3.693787462721824e-05, "loss": 0.7125, "step": 70780 }, { "epoch": 0.78, "learning_rate": 3.693695190007972e-05, "loss": 0.7151, "step": 70785 }, { "epoch": 0.78, "learning_rate": 3.693602917294121e-05, "loss": 0.752, "step": 70790 }, { "epoch": 0.78, "learning_rate": 3.6935106445802705e-05, "loss": 0.7298, "step": 70795 }, { "epoch": 0.78, "learning_rate": 3.693418371866419e-05, "loss": 0.7234, "step": 70800 }, { "epoch": 0.78, "learning_rate": 3.6933260991525673e-05, "loss": 0.6572, "step": 70805 }, { "epoch": 0.78, "learning_rate": 3.693233826438716e-05, "loss": 0.6909, "step": 70810 }, { "epoch": 0.78, "learning_rate": 3.6931415537248656e-05, "loss": 0.691, "step": 70815 }, { "epoch": 0.78, "learning_rate": 3.693049281011014e-05, "loss": 0.7404, "step": 70820 }, { "epoch": 0.78, "learning_rate": 3.6929570082971625e-05, "loss": 0.7273, "step": 70825 }, { "epoch": 0.78, "learning_rate": 3.692864735583311e-05, "loss": 0.7269, "step": 70830 }, { "epoch": 0.78, "learning_rate": 3.69277246286946e-05, "loss": 0.7477, "step": 70835 }, { "epoch": 0.78, "learning_rate": 3.692680190155609e-05, "loss": 0.7142, "step": 70840 }, { "epoch": 0.78, "learning_rate": 3.6925879174417576e-05, "loss": 0.6956, "step": 70845 }, { "epoch": 0.78, "learning_rate": 3.6924956447279064e-05, "loss": 0.7781, "step": 70850 }, { "epoch": 0.78, "learning_rate": 3.692403372014055e-05, "loss": 0.685, "step": 70855 }, { "epoch": 0.78, "learning_rate": 3.692311099300204e-05, "loss": 0.6859, "step": 70860 }, { "epoch": 0.78, "learning_rate": 3.692218826586353e-05, "loss": 0.7494, "step": 70865 }, { "epoch": 0.78, "learning_rate": 3.6921265538725015e-05, "loss": 0.6773, "step": 70870 }, { "epoch": 0.78, "learning_rate": 3.69203428115865e-05, "loss": 0.65, "step": 70875 }, { "epoch": 0.78, "learning_rate": 3.691942008444799e-05, "loss": 0.6854, "step": 70880 }, { "epoch": 0.78, "learning_rate": 3.691849735730948e-05, "loss": 0.6674, "step": 70885 }, { "epoch": 0.78, "learning_rate": 3.691757463017097e-05, "loss": 0.7316, "step": 70890 }, { "epoch": 0.78, "learning_rate": 3.691665190303245e-05, "loss": 0.6815, "step": 70895 }, { "epoch": 0.79, "learning_rate": 3.6915729175893936e-05, "loss": 0.7098, "step": 70900 }, { "epoch": 0.79, "learning_rate": 3.691480644875543e-05, "loss": 0.6923, "step": 70905 }, { "epoch": 0.79, "learning_rate": 3.691388372161692e-05, "loss": 0.6829, "step": 70910 }, { "epoch": 0.79, "learning_rate": 3.69129609944784e-05, "loss": 0.6724, "step": 70915 }, { "epoch": 0.79, "learning_rate": 3.691203826733989e-05, "loss": 0.7111, "step": 70920 }, { "epoch": 0.79, "learning_rate": 3.691111554020138e-05, "loss": 0.716, "step": 70925 }, { "epoch": 0.79, "learning_rate": 3.691019281306286e-05, "loss": 0.7412, "step": 70930 }, { "epoch": 0.79, "learning_rate": 3.690927008592435e-05, "loss": 0.6775, "step": 70935 }, { "epoch": 0.79, "learning_rate": 3.690834735878584e-05, "loss": 0.7515, "step": 70940 }, { "epoch": 0.79, "learning_rate": 3.690742463164733e-05, "loss": 0.7602, "step": 70945 }, { "epoch": 0.79, "learning_rate": 3.6906501904508814e-05, "loss": 0.6873, "step": 70950 }, { "epoch": 0.79, "learning_rate": 3.69055791773703e-05, "loss": 0.7481, "step": 70955 }, { "epoch": 0.79, "learning_rate": 3.690465645023179e-05, "loss": 0.6394, "step": 70960 }, { "epoch": 0.79, "learning_rate": 3.690373372309328e-05, "loss": 0.6722, "step": 70965 }, { "epoch": 0.79, "learning_rate": 3.6902810995954766e-05, "loss": 0.665, "step": 70970 }, { "epoch": 0.79, "learning_rate": 3.6901888268816254e-05, "loss": 0.7015, "step": 70975 }, { "epoch": 0.79, "learning_rate": 3.690096554167774e-05, "loss": 0.7324, "step": 70980 }, { "epoch": 0.79, "learning_rate": 3.690004281453923e-05, "loss": 0.7102, "step": 70985 }, { "epoch": 0.79, "learning_rate": 3.689912008740072e-05, "loss": 0.7076, "step": 70990 }, { "epoch": 0.79, "learning_rate": 3.6898197360262205e-05, "loss": 0.6606, "step": 70995 }, { "epoch": 0.79, "learning_rate": 3.689727463312369e-05, "loss": 0.7111, "step": 71000 }, { "epoch": 0.79, "eval_loss": 0.6850002408027649, "eval_runtime": 69.5835, "eval_samples_per_second": 28.742, "eval_steps_per_second": 14.371, "step": 71000 }, { "epoch": 0.79, "learning_rate": 3.6896351905985174e-05, "loss": 0.722, "step": 71005 }, { "epoch": 0.79, "learning_rate": 3.689542917884667e-05, "loss": 0.7023, "step": 71010 }, { "epoch": 0.79, "learning_rate": 3.6894506451708156e-05, "loss": 0.757, "step": 71015 }, { "epoch": 0.79, "learning_rate": 3.6893583724569644e-05, "loss": 0.7295, "step": 71020 }, { "epoch": 0.79, "learning_rate": 3.6892660997431125e-05, "loss": 0.7109, "step": 71025 }, { "epoch": 0.79, "learning_rate": 3.689173827029262e-05, "loss": 0.7533, "step": 71030 }, { "epoch": 0.79, "learning_rate": 3.689081554315411e-05, "loss": 0.7351, "step": 71035 }, { "epoch": 0.79, "learning_rate": 3.688989281601559e-05, "loss": 0.7487, "step": 71040 }, { "epoch": 0.79, "learning_rate": 3.688897008887708e-05, "loss": 0.7105, "step": 71045 }, { "epoch": 0.79, "learning_rate": 3.6888047361738564e-05, "loss": 0.6913, "step": 71050 }, { "epoch": 0.79, "learning_rate": 3.688712463460006e-05, "loss": 0.7479, "step": 71055 }, { "epoch": 0.79, "learning_rate": 3.688620190746154e-05, "loss": 0.6782, "step": 71060 }, { "epoch": 0.79, "learning_rate": 3.688527918032303e-05, "loss": 0.72, "step": 71065 }, { "epoch": 0.79, "learning_rate": 3.6884356453184516e-05, "loss": 0.7651, "step": 71070 }, { "epoch": 0.79, "learning_rate": 3.688343372604601e-05, "loss": 0.6654, "step": 71075 }, { "epoch": 0.79, "learning_rate": 3.688251099890749e-05, "loss": 0.7276, "step": 71080 }, { "epoch": 0.79, "learning_rate": 3.688158827176898e-05, "loss": 0.7968, "step": 71085 }, { "epoch": 0.79, "learning_rate": 3.688066554463047e-05, "loss": 0.7223, "step": 71090 }, { "epoch": 0.79, "learning_rate": 3.6879742817491955e-05, "loss": 0.6894, "step": 71095 }, { "epoch": 0.79, "learning_rate": 3.687882009035344e-05, "loss": 0.7421, "step": 71100 }, { "epoch": 0.79, "learning_rate": 3.687789736321493e-05, "loss": 0.6705, "step": 71105 }, { "epoch": 0.79, "learning_rate": 3.687697463607642e-05, "loss": 0.6863, "step": 71110 }, { "epoch": 0.79, "learning_rate": 3.6876051908937907e-05, "loss": 0.6635, "step": 71115 }, { "epoch": 0.79, "learning_rate": 3.6875129181799394e-05, "loss": 0.7337, "step": 71120 }, { "epoch": 0.79, "learning_rate": 3.687420645466088e-05, "loss": 0.7161, "step": 71125 }, { "epoch": 0.79, "learning_rate": 3.687328372752237e-05, "loss": 0.7232, "step": 71130 }, { "epoch": 0.79, "learning_rate": 3.687236100038385e-05, "loss": 0.7481, "step": 71135 }, { "epoch": 0.79, "learning_rate": 3.6871438273245346e-05, "loss": 0.7384, "step": 71140 }, { "epoch": 0.79, "learning_rate": 3.6870515546106834e-05, "loss": 0.7449, "step": 71145 }, { "epoch": 0.79, "learning_rate": 3.686959281896832e-05, "loss": 0.7189, "step": 71150 }, { "epoch": 0.79, "learning_rate": 3.68686700918298e-05, "loss": 0.7293, "step": 71155 }, { "epoch": 0.79, "learning_rate": 3.68677473646913e-05, "loss": 0.6914, "step": 71160 }, { "epoch": 0.79, "learning_rate": 3.6866824637552785e-05, "loss": 0.7147, "step": 71165 }, { "epoch": 0.79, "learning_rate": 3.6865901910414266e-05, "loss": 0.7323, "step": 71170 }, { "epoch": 0.79, "learning_rate": 3.6864979183275754e-05, "loss": 0.7488, "step": 71175 }, { "epoch": 0.79, "learning_rate": 3.686405645613725e-05, "loss": 0.6905, "step": 71180 }, { "epoch": 0.79, "learning_rate": 3.6863133728998736e-05, "loss": 0.6838, "step": 71185 }, { "epoch": 0.79, "learning_rate": 3.686221100186022e-05, "loss": 0.7274, "step": 71190 }, { "epoch": 0.79, "learning_rate": 3.6861288274721705e-05, "loss": 0.7081, "step": 71195 }, { "epoch": 0.79, "learning_rate": 3.686036554758319e-05, "loss": 0.682, "step": 71200 }, { "epoch": 0.79, "learning_rate": 3.685944282044468e-05, "loss": 0.7073, "step": 71205 }, { "epoch": 0.79, "learning_rate": 3.685852009330617e-05, "loss": 0.7422, "step": 71210 }, { "epoch": 0.79, "learning_rate": 3.685759736616766e-05, "loss": 0.7494, "step": 71215 }, { "epoch": 0.79, "learning_rate": 3.6856674639029145e-05, "loss": 0.696, "step": 71220 }, { "epoch": 0.79, "learning_rate": 3.685575191189063e-05, "loss": 0.6742, "step": 71225 }, { "epoch": 0.79, "learning_rate": 3.685482918475212e-05, "loss": 0.7012, "step": 71230 }, { "epoch": 0.79, "learning_rate": 3.685390645761361e-05, "loss": 0.7736, "step": 71235 }, { "epoch": 0.79, "learning_rate": 3.6852983730475096e-05, "loss": 0.7315, "step": 71240 }, { "epoch": 0.79, "learning_rate": 3.6852061003336584e-05, "loss": 0.7116, "step": 71245 }, { "epoch": 0.79, "learning_rate": 3.685113827619807e-05, "loss": 0.7257, "step": 71250 }, { "epoch": 0.79, "learning_rate": 3.685021554905956e-05, "loss": 0.7941, "step": 71255 }, { "epoch": 0.79, "learning_rate": 3.684929282192105e-05, "loss": 0.7928, "step": 71260 }, { "epoch": 0.79, "learning_rate": 3.6848370094782535e-05, "loss": 0.6608, "step": 71265 }, { "epoch": 0.79, "learning_rate": 3.684744736764402e-05, "loss": 0.7001, "step": 71270 }, { "epoch": 0.79, "learning_rate": 3.684652464050551e-05, "loss": 0.6948, "step": 71275 }, { "epoch": 0.79, "learning_rate": 3.684560191336699e-05, "loss": 0.6754, "step": 71280 }, { "epoch": 0.79, "learning_rate": 3.684467918622848e-05, "loss": 0.6421, "step": 71285 }, { "epoch": 0.79, "learning_rate": 3.6843756459089974e-05, "loss": 0.6978, "step": 71290 }, { "epoch": 0.79, "learning_rate": 3.684283373195146e-05, "loss": 0.7822, "step": 71295 }, { "epoch": 0.79, "learning_rate": 3.684191100481294e-05, "loss": 0.6857, "step": 71300 }, { "epoch": 0.79, "learning_rate": 3.684098827767443e-05, "loss": 0.6807, "step": 71305 }, { "epoch": 0.79, "learning_rate": 3.6840065550535926e-05, "loss": 0.7411, "step": 71310 }, { "epoch": 0.79, "learning_rate": 3.683914282339741e-05, "loss": 0.7252, "step": 71315 }, { "epoch": 0.79, "learning_rate": 3.6838220096258895e-05, "loss": 0.7663, "step": 71320 }, { "epoch": 0.79, "learning_rate": 3.683729736912038e-05, "loss": 0.7027, "step": 71325 }, { "epoch": 0.79, "learning_rate": 3.683637464198188e-05, "loss": 0.7194, "step": 71330 }, { "epoch": 0.79, "learning_rate": 3.683545191484336e-05, "loss": 0.688, "step": 71335 }, { "epoch": 0.79, "learning_rate": 3.6834529187704846e-05, "loss": 0.701, "step": 71340 }, { "epoch": 0.79, "learning_rate": 3.6833606460566334e-05, "loss": 0.7413, "step": 71345 }, { "epoch": 0.79, "learning_rate": 3.683268373342782e-05, "loss": 0.6896, "step": 71350 }, { "epoch": 0.79, "learning_rate": 3.683176100628931e-05, "loss": 0.6691, "step": 71355 }, { "epoch": 0.79, "learning_rate": 3.68308382791508e-05, "loss": 0.6684, "step": 71360 }, { "epoch": 0.79, "learning_rate": 3.6829915552012285e-05, "loss": 0.6707, "step": 71365 }, { "epoch": 0.79, "learning_rate": 3.682899282487377e-05, "loss": 0.6982, "step": 71370 }, { "epoch": 0.79, "learning_rate": 3.682807009773526e-05, "loss": 0.6458, "step": 71375 }, { "epoch": 0.79, "learning_rate": 3.682714737059675e-05, "loss": 0.6839, "step": 71380 }, { "epoch": 0.79, "learning_rate": 3.682622464345824e-05, "loss": 0.7704, "step": 71385 }, { "epoch": 0.79, "learning_rate": 3.682530191631972e-05, "loss": 0.7099, "step": 71390 }, { "epoch": 0.79, "learning_rate": 3.682437918918121e-05, "loss": 0.7407, "step": 71395 }, { "epoch": 0.79, "learning_rate": 3.68234564620427e-05, "loss": 0.715, "step": 71400 }, { "epoch": 0.79, "learning_rate": 3.682253373490419e-05, "loss": 0.7265, "step": 71405 }, { "epoch": 0.79, "learning_rate": 3.682161100776567e-05, "loss": 0.7356, "step": 71410 }, { "epoch": 0.79, "learning_rate": 3.6820688280627164e-05, "loss": 0.7139, "step": 71415 }, { "epoch": 0.79, "learning_rate": 3.681976555348865e-05, "loss": 0.7076, "step": 71420 }, { "epoch": 0.79, "learning_rate": 3.681884282635013e-05, "loss": 0.6477, "step": 71425 }, { "epoch": 0.79, "learning_rate": 3.681792009921162e-05, "loss": 0.7003, "step": 71430 }, { "epoch": 0.79, "learning_rate": 3.681699737207311e-05, "loss": 0.7755, "step": 71435 }, { "epoch": 0.79, "learning_rate": 3.68160746449346e-05, "loss": 0.699, "step": 71440 }, { "epoch": 0.79, "learning_rate": 3.6815151917796084e-05, "loss": 0.6914, "step": 71445 }, { "epoch": 0.79, "learning_rate": 3.681422919065757e-05, "loss": 0.7073, "step": 71450 }, { "epoch": 0.79, "learning_rate": 3.681330646351906e-05, "loss": 0.6735, "step": 71455 }, { "epoch": 0.79, "learning_rate": 3.6812383736380555e-05, "loss": 0.6959, "step": 71460 }, { "epoch": 0.79, "learning_rate": 3.6811461009242036e-05, "loss": 0.7072, "step": 71465 }, { "epoch": 0.79, "learning_rate": 3.6810538282103523e-05, "loss": 0.7123, "step": 71470 }, { "epoch": 0.79, "learning_rate": 3.680961555496501e-05, "loss": 0.7226, "step": 71475 }, { "epoch": 0.79, "learning_rate": 3.68086928278265e-05, "loss": 0.678, "step": 71480 }, { "epoch": 0.79, "learning_rate": 3.680777010068799e-05, "loss": 0.6858, "step": 71485 }, { "epoch": 0.79, "learning_rate": 3.6806847373549475e-05, "loss": 0.7492, "step": 71490 }, { "epoch": 0.79, "learning_rate": 3.680592464641096e-05, "loss": 0.7178, "step": 71495 }, { "epoch": 0.79, "learning_rate": 3.6805001919272444e-05, "loss": 0.7774, "step": 71500 }, { "epoch": 0.79, "learning_rate": 3.680407919213394e-05, "loss": 0.7191, "step": 71505 }, { "epoch": 0.79, "learning_rate": 3.6803156464995426e-05, "loss": 0.7119, "step": 71510 }, { "epoch": 0.79, "learning_rate": 3.6802233737856914e-05, "loss": 0.7265, "step": 71515 }, { "epoch": 0.79, "learning_rate": 3.6801311010718395e-05, "loss": 0.7031, "step": 71520 }, { "epoch": 0.79, "learning_rate": 3.680038828357989e-05, "loss": 0.7345, "step": 71525 }, { "epoch": 0.79, "learning_rate": 3.679946555644138e-05, "loss": 0.7086, "step": 71530 }, { "epoch": 0.79, "learning_rate": 3.6798542829302865e-05, "loss": 0.683, "step": 71535 }, { "epoch": 0.79, "learning_rate": 3.6797620102164347e-05, "loss": 0.6813, "step": 71540 }, { "epoch": 0.79, "learning_rate": 3.679669737502584e-05, "loss": 0.7018, "step": 71545 }, { "epoch": 0.79, "learning_rate": 3.679577464788733e-05, "loss": 0.7555, "step": 71550 }, { "epoch": 0.79, "learning_rate": 3.679485192074881e-05, "loss": 0.7648, "step": 71555 }, { "epoch": 0.79, "learning_rate": 3.67939291936103e-05, "loss": 0.6705, "step": 71560 }, { "epoch": 0.79, "learning_rate": 3.679300646647179e-05, "loss": 0.6884, "step": 71565 }, { "epoch": 0.79, "learning_rate": 3.679208373933328e-05, "loss": 0.7308, "step": 71570 }, { "epoch": 0.79, "learning_rate": 3.679116101219476e-05, "loss": 0.6232, "step": 71575 }, { "epoch": 0.79, "learning_rate": 3.679023828505625e-05, "loss": 0.705, "step": 71580 }, { "epoch": 0.79, "learning_rate": 3.678931555791774e-05, "loss": 0.7125, "step": 71585 }, { "epoch": 0.79, "learning_rate": 3.6788392830779225e-05, "loss": 0.7122, "step": 71590 }, { "epoch": 0.79, "learning_rate": 3.678747010364071e-05, "loss": 0.7163, "step": 71595 }, { "epoch": 0.79, "learning_rate": 3.67865473765022e-05, "loss": 0.7527, "step": 71600 }, { "epoch": 0.79, "learning_rate": 3.678562464936369e-05, "loss": 0.7317, "step": 71605 }, { "epoch": 0.79, "learning_rate": 3.6784701922225176e-05, "loss": 0.6808, "step": 71610 }, { "epoch": 0.79, "learning_rate": 3.6783779195086664e-05, "loss": 0.6627, "step": 71615 }, { "epoch": 0.79, "learning_rate": 3.678285646794815e-05, "loss": 0.7373, "step": 71620 }, { "epoch": 0.79, "learning_rate": 3.678193374080964e-05, "loss": 0.7365, "step": 71625 }, { "epoch": 0.79, "learning_rate": 3.678101101367113e-05, "loss": 0.7076, "step": 71630 }, { "epoch": 0.79, "learning_rate": 3.6780088286532616e-05, "loss": 0.7244, "step": 71635 }, { "epoch": 0.79, "learning_rate": 3.6779165559394104e-05, "loss": 0.7204, "step": 71640 }, { "epoch": 0.79, "learning_rate": 3.677824283225559e-05, "loss": 0.7136, "step": 71645 }, { "epoch": 0.79, "learning_rate": 3.677732010511708e-05, "loss": 0.7938, "step": 71650 }, { "epoch": 0.79, "learning_rate": 3.677639737797857e-05, "loss": 0.7447, "step": 71655 }, { "epoch": 0.79, "learning_rate": 3.6775474650840055e-05, "loss": 0.6955, "step": 71660 }, { "epoch": 0.79, "learning_rate": 3.6774551923701536e-05, "loss": 0.706, "step": 71665 }, { "epoch": 0.79, "learning_rate": 3.6773629196563024e-05, "loss": 0.7318, "step": 71670 }, { "epoch": 0.79, "learning_rate": 3.677270646942452e-05, "loss": 0.7208, "step": 71675 }, { "epoch": 0.79, "learning_rate": 3.6771783742286006e-05, "loss": 0.6584, "step": 71680 }, { "epoch": 0.79, "learning_rate": 3.677086101514749e-05, "loss": 0.6979, "step": 71685 }, { "epoch": 0.79, "learning_rate": 3.6769938288008975e-05, "loss": 0.7068, "step": 71690 }, { "epoch": 0.79, "learning_rate": 3.676901556087047e-05, "loss": 0.7181, "step": 71695 }, { "epoch": 0.79, "learning_rate": 3.676809283373195e-05, "loss": 0.6928, "step": 71700 }, { "epoch": 0.79, "learning_rate": 3.676717010659344e-05, "loss": 0.7505, "step": 71705 }, { "epoch": 0.79, "learning_rate": 3.676624737945493e-05, "loss": 0.7559, "step": 71710 }, { "epoch": 0.79, "learning_rate": 3.676532465231642e-05, "loss": 0.7011, "step": 71715 }, { "epoch": 0.79, "learning_rate": 3.67644019251779e-05, "loss": 0.7255, "step": 71720 }, { "epoch": 0.79, "learning_rate": 3.676347919803939e-05, "loss": 0.6885, "step": 71725 }, { "epoch": 0.79, "learning_rate": 3.676255647090088e-05, "loss": 0.7562, "step": 71730 }, { "epoch": 0.79, "learning_rate": 3.6761633743762366e-05, "loss": 0.6831, "step": 71735 }, { "epoch": 0.79, "learning_rate": 3.6760711016623854e-05, "loss": 0.7063, "step": 71740 }, { "epoch": 0.79, "learning_rate": 3.675978828948534e-05, "loss": 0.7384, "step": 71745 }, { "epoch": 0.79, "learning_rate": 3.675886556234683e-05, "loss": 0.7562, "step": 71750 }, { "epoch": 0.79, "learning_rate": 3.675794283520832e-05, "loss": 0.629, "step": 71755 }, { "epoch": 0.79, "learning_rate": 3.6757020108069805e-05, "loss": 0.7134, "step": 71760 }, { "epoch": 0.79, "learning_rate": 3.675609738093129e-05, "loss": 0.6791, "step": 71765 }, { "epoch": 0.79, "learning_rate": 3.675517465379278e-05, "loss": 0.6726, "step": 71770 }, { "epoch": 0.79, "learning_rate": 3.675425192665426e-05, "loss": 0.7027, "step": 71775 }, { "epoch": 0.79, "learning_rate": 3.6753329199515757e-05, "loss": 0.6856, "step": 71780 }, { "epoch": 0.79, "learning_rate": 3.6752406472377244e-05, "loss": 0.7168, "step": 71785 }, { "epoch": 0.79, "learning_rate": 3.675148374523873e-05, "loss": 0.702, "step": 71790 }, { "epoch": 0.79, "learning_rate": 3.675056101810021e-05, "loss": 0.7286, "step": 71795 }, { "epoch": 0.8, "learning_rate": 3.674963829096171e-05, "loss": 0.6979, "step": 71800 }, { "epoch": 0.8, "learning_rate": 3.6748715563823196e-05, "loss": 0.6771, "step": 71805 }, { "epoch": 0.8, "learning_rate": 3.674779283668468e-05, "loss": 0.6633, "step": 71810 }, { "epoch": 0.8, "learning_rate": 3.6746870109546165e-05, "loss": 0.7278, "step": 71815 }, { "epoch": 0.8, "learning_rate": 3.674594738240765e-05, "loss": 0.7127, "step": 71820 }, { "epoch": 0.8, "learning_rate": 3.674502465526915e-05, "loss": 0.7653, "step": 71825 }, { "epoch": 0.8, "learning_rate": 3.674410192813063e-05, "loss": 0.7005, "step": 71830 }, { "epoch": 0.8, "learning_rate": 3.6743179200992116e-05, "loss": 0.6691, "step": 71835 }, { "epoch": 0.8, "learning_rate": 3.6742256473853604e-05, "loss": 0.7281, "step": 71840 }, { "epoch": 0.8, "learning_rate": 3.67413337467151e-05, "loss": 0.7196, "step": 71845 }, { "epoch": 0.8, "learning_rate": 3.674041101957658e-05, "loss": 0.7362, "step": 71850 }, { "epoch": 0.8, "learning_rate": 3.673948829243807e-05, "loss": 0.7085, "step": 71855 }, { "epoch": 0.8, "learning_rate": 3.6738565565299555e-05, "loss": 0.7002, "step": 71860 }, { "epoch": 0.8, "learning_rate": 3.673764283816104e-05, "loss": 0.71, "step": 71865 }, { "epoch": 0.8, "learning_rate": 3.673672011102253e-05, "loss": 0.728, "step": 71870 }, { "epoch": 0.8, "learning_rate": 3.673579738388402e-05, "loss": 0.6693, "step": 71875 }, { "epoch": 0.8, "learning_rate": 3.673487465674551e-05, "loss": 0.6774, "step": 71880 }, { "epoch": 0.8, "learning_rate": 3.673395192960699e-05, "loss": 0.7284, "step": 71885 }, { "epoch": 0.8, "learning_rate": 3.673302920246848e-05, "loss": 0.8072, "step": 71890 }, { "epoch": 0.8, "learning_rate": 3.673210647532997e-05, "loss": 0.7038, "step": 71895 }, { "epoch": 0.8, "learning_rate": 3.673118374819146e-05, "loss": 0.754, "step": 71900 }, { "epoch": 0.8, "learning_rate": 3.673026102105294e-05, "loss": 0.7151, "step": 71905 }, { "epoch": 0.8, "learning_rate": 3.6729338293914434e-05, "loss": 0.6554, "step": 71910 }, { "epoch": 0.8, "learning_rate": 3.672841556677592e-05, "loss": 0.6582, "step": 71915 }, { "epoch": 0.8, "learning_rate": 3.672749283963741e-05, "loss": 0.7557, "step": 71920 }, { "epoch": 0.8, "learning_rate": 3.672657011249889e-05, "loss": 0.7198, "step": 71925 }, { "epoch": 0.8, "learning_rate": 3.6725647385360385e-05, "loss": 0.7114, "step": 71930 }, { "epoch": 0.8, "learning_rate": 3.672472465822187e-05, "loss": 0.6916, "step": 71935 }, { "epoch": 0.8, "learning_rate": 3.6723801931083354e-05, "loss": 0.7361, "step": 71940 }, { "epoch": 0.8, "learning_rate": 3.672287920394484e-05, "loss": 0.7302, "step": 71945 }, { "epoch": 0.8, "learning_rate": 3.6721956476806337e-05, "loss": 0.6943, "step": 71950 }, { "epoch": 0.8, "learning_rate": 3.6721033749667824e-05, "loss": 0.7524, "step": 71955 }, { "epoch": 0.8, "learning_rate": 3.6720111022529306e-05, "loss": 0.6919, "step": 71960 }, { "epoch": 0.8, "learning_rate": 3.671918829539079e-05, "loss": 0.6868, "step": 71965 }, { "epoch": 0.8, "learning_rate": 3.671826556825228e-05, "loss": 0.6992, "step": 71970 }, { "epoch": 0.8, "learning_rate": 3.671734284111377e-05, "loss": 0.6704, "step": 71975 }, { "epoch": 0.8, "learning_rate": 3.671642011397526e-05, "loss": 0.7367, "step": 71980 }, { "epoch": 0.8, "learning_rate": 3.6715497386836745e-05, "loss": 0.6739, "step": 71985 }, { "epoch": 0.8, "learning_rate": 3.671457465969823e-05, "loss": 0.7112, "step": 71990 }, { "epoch": 0.8, "learning_rate": 3.671365193255972e-05, "loss": 0.6994, "step": 71995 }, { "epoch": 0.8, "learning_rate": 3.671272920542121e-05, "loss": 0.7375, "step": 72000 }, { "epoch": 0.8, "eval_loss": 0.6909182071685791, "eval_runtime": 69.4793, "eval_samples_per_second": 28.786, "eval_steps_per_second": 14.393, "step": 72000 }, { "epoch": 0.8, "learning_rate": 3.6711806478282696e-05, "loss": 0.7071, "step": 72005 }, { "epoch": 0.8, "learning_rate": 3.6710883751144184e-05, "loss": 0.7957, "step": 72010 }, { "epoch": 0.8, "learning_rate": 3.670996102400567e-05, "loss": 0.7357, "step": 72015 }, { "epoch": 0.8, "learning_rate": 3.670903829686716e-05, "loss": 0.6887, "step": 72020 }, { "epoch": 0.8, "learning_rate": 3.670811556972865e-05, "loss": 0.7403, "step": 72025 }, { "epoch": 0.8, "learning_rate": 3.6707192842590135e-05, "loss": 0.7423, "step": 72030 }, { "epoch": 0.8, "learning_rate": 3.6706270115451616e-05, "loss": 0.6861, "step": 72035 }, { "epoch": 0.8, "learning_rate": 3.670534738831311e-05, "loss": 0.672, "step": 72040 }, { "epoch": 0.8, "learning_rate": 3.67044246611746e-05, "loss": 0.7339, "step": 72045 }, { "epoch": 0.8, "learning_rate": 3.670350193403608e-05, "loss": 0.6906, "step": 72050 }, { "epoch": 0.8, "learning_rate": 3.670257920689757e-05, "loss": 0.722, "step": 72055 }, { "epoch": 0.8, "learning_rate": 3.670165647975906e-05, "loss": 0.724, "step": 72060 }, { "epoch": 0.8, "learning_rate": 3.670073375262055e-05, "loss": 0.6607, "step": 72065 }, { "epoch": 0.8, "learning_rate": 3.669981102548203e-05, "loss": 0.7057, "step": 72070 }, { "epoch": 0.8, "learning_rate": 3.669888829834352e-05, "loss": 0.7351, "step": 72075 }, { "epoch": 0.8, "learning_rate": 3.6697965571205014e-05, "loss": 0.7317, "step": 72080 }, { "epoch": 0.8, "learning_rate": 3.6697042844066495e-05, "loss": 0.6574, "step": 72085 }, { "epoch": 0.8, "learning_rate": 3.669612011692798e-05, "loss": 0.6598, "step": 72090 }, { "epoch": 0.8, "learning_rate": 3.669519738978947e-05, "loss": 0.7289, "step": 72095 }, { "epoch": 0.8, "learning_rate": 3.6694274662650965e-05, "loss": 0.6919, "step": 72100 }, { "epoch": 0.8, "learning_rate": 3.6693351935512446e-05, "loss": 0.6861, "step": 72105 }, { "epoch": 0.8, "learning_rate": 3.6692429208373934e-05, "loss": 0.7327, "step": 72110 }, { "epoch": 0.8, "learning_rate": 3.669150648123542e-05, "loss": 0.7229, "step": 72115 }, { "epoch": 0.8, "learning_rate": 3.669058375409691e-05, "loss": 0.7667, "step": 72120 }, { "epoch": 0.8, "learning_rate": 3.66896610269584e-05, "loss": 0.6936, "step": 72125 }, { "epoch": 0.8, "learning_rate": 3.6688738299819886e-05, "loss": 0.7062, "step": 72130 }, { "epoch": 0.8, "learning_rate": 3.6687815572681373e-05, "loss": 0.6885, "step": 72135 }, { "epoch": 0.8, "learning_rate": 3.668689284554286e-05, "loss": 0.6776, "step": 72140 }, { "epoch": 0.8, "learning_rate": 3.668597011840435e-05, "loss": 0.746, "step": 72145 }, { "epoch": 0.8, "learning_rate": 3.668504739126584e-05, "loss": 0.7343, "step": 72150 }, { "epoch": 0.8, "learning_rate": 3.6684124664127325e-05, "loss": 0.7693, "step": 72155 }, { "epoch": 0.8, "learning_rate": 3.6683201936988806e-05, "loss": 0.7577, "step": 72160 }, { "epoch": 0.8, "learning_rate": 3.66822792098503e-05, "loss": 0.715, "step": 72165 }, { "epoch": 0.8, "learning_rate": 3.668135648271179e-05, "loss": 0.6894, "step": 72170 }, { "epoch": 0.8, "learning_rate": 3.6680433755573276e-05, "loss": 0.7096, "step": 72175 }, { "epoch": 0.8, "learning_rate": 3.667951102843476e-05, "loss": 0.7224, "step": 72180 }, { "epoch": 0.8, "learning_rate": 3.6678588301296245e-05, "loss": 0.7582, "step": 72185 }, { "epoch": 0.8, "learning_rate": 3.667766557415774e-05, "loss": 0.685, "step": 72190 }, { "epoch": 0.8, "learning_rate": 3.667674284701923e-05, "loss": 0.6888, "step": 72195 }, { "epoch": 0.8, "learning_rate": 3.667582011988071e-05, "loss": 0.7256, "step": 72200 }, { "epoch": 0.8, "learning_rate": 3.6674897392742197e-05, "loss": 0.7375, "step": 72205 }, { "epoch": 0.8, "learning_rate": 3.667397466560369e-05, "loss": 0.755, "step": 72210 }, { "epoch": 0.8, "learning_rate": 3.667305193846517e-05, "loss": 0.7377, "step": 72215 }, { "epoch": 0.8, "learning_rate": 3.667212921132666e-05, "loss": 0.7065, "step": 72220 }, { "epoch": 0.8, "learning_rate": 3.667120648418815e-05, "loss": 0.668, "step": 72225 }, { "epoch": 0.8, "learning_rate": 3.667028375704964e-05, "loss": 0.7079, "step": 72230 }, { "epoch": 0.8, "learning_rate": 3.6669361029911124e-05, "loss": 0.6541, "step": 72235 }, { "epoch": 0.8, "learning_rate": 3.666843830277261e-05, "loss": 0.6708, "step": 72240 }, { "epoch": 0.8, "learning_rate": 3.66675155756341e-05, "loss": 0.7075, "step": 72245 }, { "epoch": 0.8, "learning_rate": 3.666659284849559e-05, "loss": 0.6803, "step": 72250 }, { "epoch": 0.8, "learning_rate": 3.6665670121357075e-05, "loss": 0.6929, "step": 72255 }, { "epoch": 0.8, "learning_rate": 3.666474739421856e-05, "loss": 0.6388, "step": 72260 }, { "epoch": 0.8, "learning_rate": 3.666382466708005e-05, "loss": 0.6755, "step": 72265 }, { "epoch": 0.8, "learning_rate": 3.666290193994154e-05, "loss": 0.651, "step": 72270 }, { "epoch": 0.8, "learning_rate": 3.6661979212803026e-05, "loss": 0.6532, "step": 72275 }, { "epoch": 0.8, "learning_rate": 3.6661056485664514e-05, "loss": 0.7401, "step": 72280 }, { "epoch": 0.8, "learning_rate": 3.6660133758526e-05, "loss": 0.7314, "step": 72285 }, { "epoch": 0.8, "learning_rate": 3.665921103138748e-05, "loss": 0.7042, "step": 72290 }, { "epoch": 0.8, "learning_rate": 3.665828830424898e-05, "loss": 0.6967, "step": 72295 }, { "epoch": 0.8, "learning_rate": 3.6657365577110466e-05, "loss": 0.6987, "step": 72300 }, { "epoch": 0.8, "learning_rate": 3.6656442849971954e-05, "loss": 0.7009, "step": 72305 }, { "epoch": 0.8, "learning_rate": 3.6655520122833435e-05, "loss": 0.7318, "step": 72310 }, { "epoch": 0.8, "learning_rate": 3.665459739569493e-05, "loss": 0.7114, "step": 72315 }, { "epoch": 0.8, "learning_rate": 3.665367466855642e-05, "loss": 0.7128, "step": 72320 }, { "epoch": 0.8, "learning_rate": 3.66527519414179e-05, "loss": 0.7707, "step": 72325 }, { "epoch": 0.8, "learning_rate": 3.6651829214279386e-05, "loss": 0.6708, "step": 72330 }, { "epoch": 0.8, "learning_rate": 3.6650906487140874e-05, "loss": 0.6969, "step": 72335 }, { "epoch": 0.8, "learning_rate": 3.664998376000237e-05, "loss": 0.7339, "step": 72340 }, { "epoch": 0.8, "learning_rate": 3.664906103286385e-05, "loss": 0.7074, "step": 72345 }, { "epoch": 0.8, "learning_rate": 3.664813830572534e-05, "loss": 0.6996, "step": 72350 }, { "epoch": 0.8, "learning_rate": 3.6647215578586825e-05, "loss": 0.7234, "step": 72355 }, { "epoch": 0.8, "learning_rate": 3.664629285144831e-05, "loss": 0.6523, "step": 72360 }, { "epoch": 0.8, "learning_rate": 3.66453701243098e-05, "loss": 0.7217, "step": 72365 }, { "epoch": 0.8, "learning_rate": 3.664444739717129e-05, "loss": 0.7012, "step": 72370 }, { "epoch": 0.8, "learning_rate": 3.664352467003278e-05, "loss": 0.7407, "step": 72375 }, { "epoch": 0.8, "learning_rate": 3.6642601942894264e-05, "loss": 0.7042, "step": 72380 }, { "epoch": 0.8, "learning_rate": 3.664167921575575e-05, "loss": 0.6465, "step": 72385 }, { "epoch": 0.8, "learning_rate": 3.664075648861724e-05, "loss": 0.6826, "step": 72390 }, { "epoch": 0.8, "learning_rate": 3.663983376147873e-05, "loss": 0.6858, "step": 72395 }, { "epoch": 0.8, "learning_rate": 3.6638911034340216e-05, "loss": 0.7572, "step": 72400 }, { "epoch": 0.8, "learning_rate": 3.6637988307201704e-05, "loss": 0.7256, "step": 72405 }, { "epoch": 0.8, "learning_rate": 3.663706558006319e-05, "loss": 0.7282, "step": 72410 }, { "epoch": 0.8, "learning_rate": 3.663614285292468e-05, "loss": 0.7647, "step": 72415 }, { "epoch": 0.8, "learning_rate": 3.663522012578616e-05, "loss": 0.7199, "step": 72420 }, { "epoch": 0.8, "learning_rate": 3.6634297398647655e-05, "loss": 0.7463, "step": 72425 }, { "epoch": 0.8, "learning_rate": 3.663337467150914e-05, "loss": 0.6677, "step": 72430 }, { "epoch": 0.8, "learning_rate": 3.6632451944370624e-05, "loss": 0.7125, "step": 72435 }, { "epoch": 0.8, "learning_rate": 3.663152921723211e-05, "loss": 0.7202, "step": 72440 }, { "epoch": 0.8, "learning_rate": 3.6630606490093607e-05, "loss": 0.6929, "step": 72445 }, { "epoch": 0.8, "learning_rate": 3.6629683762955094e-05, "loss": 0.7099, "step": 72450 }, { "epoch": 0.8, "learning_rate": 3.6628761035816575e-05, "loss": 0.6938, "step": 72455 }, { "epoch": 0.8, "learning_rate": 3.662783830867806e-05, "loss": 0.6691, "step": 72460 }, { "epoch": 0.8, "learning_rate": 3.662691558153956e-05, "loss": 0.701, "step": 72465 }, { "epoch": 0.8, "learning_rate": 3.662599285440104e-05, "loss": 0.6516, "step": 72470 }, { "epoch": 0.8, "learning_rate": 3.662507012726253e-05, "loss": 0.6466, "step": 72475 }, { "epoch": 0.8, "learning_rate": 3.6624147400124015e-05, "loss": 0.6421, "step": 72480 }, { "epoch": 0.8, "learning_rate": 3.662322467298551e-05, "loss": 0.7047, "step": 72485 }, { "epoch": 0.8, "learning_rate": 3.662230194584699e-05, "loss": 0.7191, "step": 72490 }, { "epoch": 0.8, "learning_rate": 3.662137921870848e-05, "loss": 0.6857, "step": 72495 }, { "epoch": 0.8, "learning_rate": 3.6620456491569966e-05, "loss": 0.6689, "step": 72500 }, { "epoch": 0.8, "learning_rate": 3.6619533764431454e-05, "loss": 0.7109, "step": 72505 }, { "epoch": 0.8, "learning_rate": 3.661861103729294e-05, "loss": 0.746, "step": 72510 }, { "epoch": 0.8, "learning_rate": 3.661768831015443e-05, "loss": 0.6928, "step": 72515 }, { "epoch": 0.8, "learning_rate": 3.661676558301592e-05, "loss": 0.7472, "step": 72520 }, { "epoch": 0.8, "learning_rate": 3.6615842855877405e-05, "loss": 0.6923, "step": 72525 }, { "epoch": 0.8, "learning_rate": 3.661492012873889e-05, "loss": 0.721, "step": 72530 }, { "epoch": 0.8, "learning_rate": 3.661399740160038e-05, "loss": 0.691, "step": 72535 }, { "epoch": 0.8, "learning_rate": 3.661307467446187e-05, "loss": 0.6613, "step": 72540 }, { "epoch": 0.8, "learning_rate": 3.661215194732335e-05, "loss": 0.7558, "step": 72545 }, { "epoch": 0.8, "learning_rate": 3.6611229220184845e-05, "loss": 0.675, "step": 72550 }, { "epoch": 0.8, "learning_rate": 3.661030649304633e-05, "loss": 0.7095, "step": 72555 }, { "epoch": 0.8, "learning_rate": 3.660938376590782e-05, "loss": 0.6949, "step": 72560 }, { "epoch": 0.8, "learning_rate": 3.66084610387693e-05, "loss": 0.7018, "step": 72565 }, { "epoch": 0.8, "learning_rate": 3.660753831163079e-05, "loss": 0.7615, "step": 72570 }, { "epoch": 0.8, "learning_rate": 3.6606615584492284e-05, "loss": 0.6681, "step": 72575 }, { "epoch": 0.8, "learning_rate": 3.660569285735377e-05, "loss": 0.7198, "step": 72580 }, { "epoch": 0.8, "learning_rate": 3.660477013021525e-05, "loss": 0.6721, "step": 72585 }, { "epoch": 0.8, "learning_rate": 3.660384740307674e-05, "loss": 0.6949, "step": 72590 }, { "epoch": 0.8, "learning_rate": 3.6602924675938235e-05, "loss": 0.7162, "step": 72595 }, { "epoch": 0.8, "learning_rate": 3.6602001948799716e-05, "loss": 0.6688, "step": 72600 }, { "epoch": 0.8, "learning_rate": 3.6601079221661204e-05, "loss": 0.7291, "step": 72605 }, { "epoch": 0.8, "learning_rate": 3.660015649452269e-05, "loss": 0.6124, "step": 72610 }, { "epoch": 0.8, "learning_rate": 3.6599233767384187e-05, "loss": 0.7535, "step": 72615 }, { "epoch": 0.8, "learning_rate": 3.659831104024567e-05, "loss": 0.667, "step": 72620 }, { "epoch": 0.8, "learning_rate": 3.6597388313107156e-05, "loss": 0.6533, "step": 72625 }, { "epoch": 0.8, "learning_rate": 3.659646558596864e-05, "loss": 0.7196, "step": 72630 }, { "epoch": 0.8, "learning_rate": 3.659554285883013e-05, "loss": 0.7614, "step": 72635 }, { "epoch": 0.8, "learning_rate": 3.659462013169162e-05, "loss": 0.7684, "step": 72640 }, { "epoch": 0.8, "learning_rate": 3.659369740455311e-05, "loss": 0.6848, "step": 72645 }, { "epoch": 0.8, "learning_rate": 3.6592774677414595e-05, "loss": 0.7717, "step": 72650 }, { "epoch": 0.8, "learning_rate": 3.659185195027608e-05, "loss": 0.7286, "step": 72655 }, { "epoch": 0.8, "learning_rate": 3.659092922313757e-05, "loss": 0.7015, "step": 72660 }, { "epoch": 0.8, "learning_rate": 3.659000649599906e-05, "loss": 0.7035, "step": 72665 }, { "epoch": 0.8, "learning_rate": 3.6589083768860546e-05, "loss": 0.7151, "step": 72670 }, { "epoch": 0.8, "learning_rate": 3.658816104172203e-05, "loss": 0.7355, "step": 72675 }, { "epoch": 0.8, "learning_rate": 3.658723831458352e-05, "loss": 0.6966, "step": 72680 }, { "epoch": 0.8, "learning_rate": 3.658631558744501e-05, "loss": 0.6952, "step": 72685 }, { "epoch": 0.8, "learning_rate": 3.65853928603065e-05, "loss": 0.7618, "step": 72690 }, { "epoch": 0.8, "learning_rate": 3.658447013316798e-05, "loss": 0.77, "step": 72695 }, { "epoch": 0.8, "learning_rate": 3.658354740602947e-05, "loss": 0.7654, "step": 72700 }, { "epoch": 0.81, "learning_rate": 3.658262467889096e-05, "loss": 0.6727, "step": 72705 }, { "epoch": 0.81, "learning_rate": 3.658170195175244e-05, "loss": 0.6398, "step": 72710 }, { "epoch": 0.81, "learning_rate": 3.658077922461393e-05, "loss": 0.7083, "step": 72715 }, { "epoch": 0.81, "learning_rate": 3.657985649747542e-05, "loss": 0.7548, "step": 72720 }, { "epoch": 0.81, "learning_rate": 3.657893377033691e-05, "loss": 0.7358, "step": 72725 }, { "epoch": 0.81, "learning_rate": 3.6578011043198394e-05, "loss": 0.6859, "step": 72730 }, { "epoch": 0.81, "learning_rate": 3.657708831605988e-05, "loss": 0.7207, "step": 72735 }, { "epoch": 0.81, "learning_rate": 3.657616558892137e-05, "loss": 0.6749, "step": 72740 }, { "epoch": 0.81, "learning_rate": 3.657524286178286e-05, "loss": 0.681, "step": 72745 }, { "epoch": 0.81, "learning_rate": 3.6574320134644345e-05, "loss": 0.6752, "step": 72750 }, { "epoch": 0.81, "learning_rate": 3.657339740750583e-05, "loss": 0.6577, "step": 72755 }, { "epoch": 0.81, "learning_rate": 3.657247468036732e-05, "loss": 0.6976, "step": 72760 }, { "epoch": 0.81, "learning_rate": 3.657155195322881e-05, "loss": 0.7516, "step": 72765 }, { "epoch": 0.81, "learning_rate": 3.6570629226090296e-05, "loss": 0.6807, "step": 72770 }, { "epoch": 0.81, "learning_rate": 3.6569706498951784e-05, "loss": 0.6947, "step": 72775 }, { "epoch": 0.81, "learning_rate": 3.656878377181327e-05, "loss": 0.7452, "step": 72780 }, { "epoch": 0.81, "learning_rate": 3.656786104467476e-05, "loss": 0.6259, "step": 72785 }, { "epoch": 0.81, "learning_rate": 3.656693831753625e-05, "loss": 0.7379, "step": 72790 }, { "epoch": 0.81, "learning_rate": 3.6566015590397736e-05, "loss": 0.7458, "step": 72795 }, { "epoch": 0.81, "learning_rate": 3.6565092863259223e-05, "loss": 0.7127, "step": 72800 }, { "epoch": 0.81, "learning_rate": 3.6564170136120705e-05, "loss": 0.6637, "step": 72805 }, { "epoch": 0.81, "learning_rate": 3.65632474089822e-05, "loss": 0.755, "step": 72810 }, { "epoch": 0.81, "learning_rate": 3.656232468184369e-05, "loss": 0.7161, "step": 72815 }, { "epoch": 0.81, "learning_rate": 3.656140195470517e-05, "loss": 0.6896, "step": 72820 }, { "epoch": 0.81, "learning_rate": 3.6560479227566656e-05, "loss": 0.6811, "step": 72825 }, { "epoch": 0.81, "learning_rate": 3.655955650042815e-05, "loss": 0.7145, "step": 72830 }, { "epoch": 0.81, "learning_rate": 3.655863377328964e-05, "loss": 0.6474, "step": 72835 }, { "epoch": 0.81, "learning_rate": 3.655771104615112e-05, "loss": 0.749, "step": 72840 }, { "epoch": 0.81, "learning_rate": 3.655678831901261e-05, "loss": 0.6447, "step": 72845 }, { "epoch": 0.81, "learning_rate": 3.65558655918741e-05, "loss": 0.6514, "step": 72850 }, { "epoch": 0.81, "learning_rate": 3.655494286473558e-05, "loss": 0.6813, "step": 72855 }, { "epoch": 0.81, "learning_rate": 3.655402013759707e-05, "loss": 0.6894, "step": 72860 }, { "epoch": 0.81, "learning_rate": 3.655309741045856e-05, "loss": 0.6571, "step": 72865 }, { "epoch": 0.81, "learning_rate": 3.6552174683320047e-05, "loss": 0.6592, "step": 72870 }, { "epoch": 0.81, "learning_rate": 3.6551251956181534e-05, "loss": 0.6194, "step": 72875 }, { "epoch": 0.81, "learning_rate": 3.655032922904302e-05, "loss": 0.719, "step": 72880 }, { "epoch": 0.81, "learning_rate": 3.654940650190451e-05, "loss": 0.6391, "step": 72885 }, { "epoch": 0.81, "learning_rate": 3.6548483774766e-05, "loss": 0.6794, "step": 72890 }, { "epoch": 0.81, "learning_rate": 3.6547561047627486e-05, "loss": 0.745, "step": 72895 }, { "epoch": 0.81, "learning_rate": 3.6546638320488974e-05, "loss": 0.6847, "step": 72900 }, { "epoch": 0.81, "learning_rate": 3.654571559335046e-05, "loss": 0.688, "step": 72905 }, { "epoch": 0.81, "learning_rate": 3.654479286621195e-05, "loss": 0.7157, "step": 72910 }, { "epoch": 0.81, "learning_rate": 3.654387013907344e-05, "loss": 0.6786, "step": 72915 }, { "epoch": 0.81, "learning_rate": 3.6542947411934925e-05, "loss": 0.6791, "step": 72920 }, { "epoch": 0.81, "learning_rate": 3.654202468479641e-05, "loss": 0.6224, "step": 72925 }, { "epoch": 0.81, "learning_rate": 3.6541101957657894e-05, "loss": 0.7323, "step": 72930 }, { "epoch": 0.81, "learning_rate": 3.654017923051939e-05, "loss": 0.6956, "step": 72935 }, { "epoch": 0.81, "learning_rate": 3.6539256503380876e-05, "loss": 0.7478, "step": 72940 }, { "epoch": 0.81, "learning_rate": 3.6538333776242364e-05, "loss": 0.7646, "step": 72945 }, { "epoch": 0.81, "learning_rate": 3.6537411049103845e-05, "loss": 0.7167, "step": 72950 }, { "epoch": 0.81, "learning_rate": 3.653648832196533e-05, "loss": 0.6781, "step": 72955 }, { "epoch": 0.81, "learning_rate": 3.653556559482683e-05, "loss": 0.7596, "step": 72960 }, { "epoch": 0.81, "learning_rate": 3.6534642867688316e-05, "loss": 0.6705, "step": 72965 }, { "epoch": 0.81, "learning_rate": 3.65337201405498e-05, "loss": 0.7589, "step": 72970 }, { "epoch": 0.81, "learning_rate": 3.6532797413411285e-05, "loss": 0.7441, "step": 72975 }, { "epoch": 0.81, "learning_rate": 3.653187468627278e-05, "loss": 0.7002, "step": 72980 }, { "epoch": 0.81, "learning_rate": 3.653095195913426e-05, "loss": 0.743, "step": 72985 }, { "epoch": 0.81, "learning_rate": 3.653002923199575e-05, "loss": 0.7161, "step": 72990 }, { "epoch": 0.81, "learning_rate": 3.6529106504857236e-05, "loss": 0.7275, "step": 72995 }, { "epoch": 0.81, "learning_rate": 3.652818377771873e-05, "loss": 0.6779, "step": 73000 }, { "epoch": 0.81, "eval_loss": 0.7042044401168823, "eval_runtime": 69.3375, "eval_samples_per_second": 28.844, "eval_steps_per_second": 14.422, "step": 73000 }, { "epoch": 0.81, "learning_rate": 3.652726105058021e-05, "loss": 0.7374, "step": 73005 }, { "epoch": 0.81, "learning_rate": 3.65263383234417e-05, "loss": 0.7409, "step": 73010 }, { "epoch": 0.81, "learning_rate": 3.652541559630319e-05, "loss": 0.6992, "step": 73015 }, { "epoch": 0.81, "learning_rate": 3.6524492869164675e-05, "loss": 0.6744, "step": 73020 }, { "epoch": 0.81, "learning_rate": 3.652357014202616e-05, "loss": 0.7361, "step": 73025 }, { "epoch": 0.81, "learning_rate": 3.652264741488765e-05, "loss": 0.6698, "step": 73030 }, { "epoch": 0.81, "learning_rate": 3.652172468774914e-05, "loss": 0.7366, "step": 73035 }, { "epoch": 0.81, "learning_rate": 3.652080196061063e-05, "loss": 0.7071, "step": 73040 }, { "epoch": 0.81, "learning_rate": 3.6519879233472114e-05, "loss": 0.6913, "step": 73045 }, { "epoch": 0.81, "learning_rate": 3.65189565063336e-05, "loss": 0.7238, "step": 73050 }, { "epoch": 0.81, "learning_rate": 3.651803377919509e-05, "loss": 0.6879, "step": 73055 }, { "epoch": 0.81, "learning_rate": 3.651711105205657e-05, "loss": 0.666, "step": 73060 }, { "epoch": 0.81, "learning_rate": 3.6516188324918066e-05, "loss": 0.6738, "step": 73065 }, { "epoch": 0.81, "learning_rate": 3.6515265597779554e-05, "loss": 0.6937, "step": 73070 }, { "epoch": 0.81, "learning_rate": 3.651434287064104e-05, "loss": 0.7802, "step": 73075 }, { "epoch": 0.81, "learning_rate": 3.651342014350252e-05, "loss": 0.6686, "step": 73080 }, { "epoch": 0.81, "learning_rate": 3.651249741636402e-05, "loss": 0.7513, "step": 73085 }, { "epoch": 0.81, "learning_rate": 3.6511574689225505e-05, "loss": 0.6881, "step": 73090 }, { "epoch": 0.81, "learning_rate": 3.6510651962086986e-05, "loss": 0.7201, "step": 73095 }, { "epoch": 0.81, "learning_rate": 3.6509729234948474e-05, "loss": 0.7095, "step": 73100 }, { "epoch": 0.81, "learning_rate": 3.650880650780996e-05, "loss": 0.6808, "step": 73105 }, { "epoch": 0.81, "learning_rate": 3.6507883780671456e-05, "loss": 0.7486, "step": 73110 }, { "epoch": 0.81, "learning_rate": 3.650696105353294e-05, "loss": 0.6963, "step": 73115 }, { "epoch": 0.81, "learning_rate": 3.6506038326394425e-05, "loss": 0.7214, "step": 73120 }, { "epoch": 0.81, "learning_rate": 3.650511559925591e-05, "loss": 0.6821, "step": 73125 }, { "epoch": 0.81, "learning_rate": 3.65041928721174e-05, "loss": 0.6906, "step": 73130 }, { "epoch": 0.81, "learning_rate": 3.650327014497889e-05, "loss": 0.7567, "step": 73135 }, { "epoch": 0.81, "learning_rate": 3.650234741784038e-05, "loss": 0.7078, "step": 73140 }, { "epoch": 0.81, "learning_rate": 3.6501424690701865e-05, "loss": 0.7651, "step": 73145 }, { "epoch": 0.81, "learning_rate": 3.650050196356335e-05, "loss": 0.6703, "step": 73150 }, { "epoch": 0.81, "learning_rate": 3.649957923642484e-05, "loss": 0.6881, "step": 73155 }, { "epoch": 0.81, "learning_rate": 3.649865650928633e-05, "loss": 0.7615, "step": 73160 }, { "epoch": 0.81, "learning_rate": 3.6497733782147816e-05, "loss": 0.6523, "step": 73165 }, { "epoch": 0.81, "learning_rate": 3.64968110550093e-05, "loss": 0.6428, "step": 73170 }, { "epoch": 0.81, "learning_rate": 3.649588832787079e-05, "loss": 0.7022, "step": 73175 }, { "epoch": 0.81, "learning_rate": 3.649496560073228e-05, "loss": 0.7263, "step": 73180 }, { "epoch": 0.81, "learning_rate": 3.649404287359377e-05, "loss": 0.6967, "step": 73185 }, { "epoch": 0.81, "learning_rate": 3.649312014645525e-05, "loss": 0.6895, "step": 73190 }, { "epoch": 0.81, "learning_rate": 3.649219741931674e-05, "loss": 0.6969, "step": 73195 }, { "epoch": 0.81, "learning_rate": 3.649127469217823e-05, "loss": 0.6816, "step": 73200 }, { "epoch": 0.81, "learning_rate": 3.649035196503971e-05, "loss": 0.7337, "step": 73205 }, { "epoch": 0.81, "learning_rate": 3.64894292379012e-05, "loss": 0.6735, "step": 73210 }, { "epoch": 0.81, "learning_rate": 3.6488506510762695e-05, "loss": 0.6554, "step": 73215 }, { "epoch": 0.81, "learning_rate": 3.648758378362418e-05, "loss": 0.7301, "step": 73220 }, { "epoch": 0.81, "learning_rate": 3.6486661056485663e-05, "loss": 0.7191, "step": 73225 }, { "epoch": 0.81, "learning_rate": 3.648573832934715e-05, "loss": 0.7036, "step": 73230 }, { "epoch": 0.81, "learning_rate": 3.6484815602208646e-05, "loss": 0.7149, "step": 73235 }, { "epoch": 0.81, "learning_rate": 3.648389287507013e-05, "loss": 0.7453, "step": 73240 }, { "epoch": 0.81, "learning_rate": 3.6482970147931615e-05, "loss": 0.7263, "step": 73245 }, { "epoch": 0.81, "learning_rate": 3.64820474207931e-05, "loss": 0.649, "step": 73250 }, { "epoch": 0.81, "learning_rate": 3.648112469365459e-05, "loss": 0.6997, "step": 73255 }, { "epoch": 0.81, "learning_rate": 3.648020196651608e-05, "loss": 0.6847, "step": 73260 }, { "epoch": 0.81, "learning_rate": 3.6479279239377566e-05, "loss": 0.6915, "step": 73265 }, { "epoch": 0.81, "learning_rate": 3.6478356512239054e-05, "loss": 0.7211, "step": 73270 }, { "epoch": 0.81, "learning_rate": 3.647743378510054e-05, "loss": 0.7176, "step": 73275 }, { "epoch": 0.81, "learning_rate": 3.647651105796203e-05, "loss": 0.7735, "step": 73280 }, { "epoch": 0.81, "learning_rate": 3.647558833082352e-05, "loss": 0.7186, "step": 73285 }, { "epoch": 0.81, "learning_rate": 3.6474665603685005e-05, "loss": 0.7347, "step": 73290 }, { "epoch": 0.81, "learning_rate": 3.647374287654649e-05, "loss": 0.7056, "step": 73295 }, { "epoch": 0.81, "learning_rate": 3.647282014940798e-05, "loss": 0.7524, "step": 73300 }, { "epoch": 0.81, "learning_rate": 3.647189742226947e-05, "loss": 0.6933, "step": 73305 }, { "epoch": 0.81, "learning_rate": 3.647097469513096e-05, "loss": 0.6165, "step": 73310 }, { "epoch": 0.81, "learning_rate": 3.647005196799244e-05, "loss": 0.6973, "step": 73315 }, { "epoch": 0.81, "learning_rate": 3.646912924085393e-05, "loss": 0.704, "step": 73320 }, { "epoch": 0.81, "learning_rate": 3.646820651371542e-05, "loss": 0.7337, "step": 73325 }, { "epoch": 0.81, "learning_rate": 3.646728378657691e-05, "loss": 0.7257, "step": 73330 }, { "epoch": 0.81, "learning_rate": 3.646636105943839e-05, "loss": 0.6994, "step": 73335 }, { "epoch": 0.81, "learning_rate": 3.646543833229988e-05, "loss": 0.6802, "step": 73340 }, { "epoch": 0.81, "learning_rate": 3.646451560516137e-05, "loss": 0.7425, "step": 73345 }, { "epoch": 0.81, "learning_rate": 3.646359287802286e-05, "loss": 0.6697, "step": 73350 }, { "epoch": 0.81, "learning_rate": 3.646267015088434e-05, "loss": 0.7026, "step": 73355 }, { "epoch": 0.81, "learning_rate": 3.646174742374583e-05, "loss": 0.7027, "step": 73360 }, { "epoch": 0.81, "learning_rate": 3.646082469660732e-05, "loss": 0.6781, "step": 73365 }, { "epoch": 0.81, "learning_rate": 3.6459901969468804e-05, "loss": 0.6675, "step": 73370 }, { "epoch": 0.81, "learning_rate": 3.645897924233029e-05, "loss": 0.7334, "step": 73375 }, { "epoch": 0.81, "learning_rate": 3.645805651519178e-05, "loss": 0.6267, "step": 73380 }, { "epoch": 0.81, "learning_rate": 3.6457133788053275e-05, "loss": 0.7183, "step": 73385 }, { "epoch": 0.81, "learning_rate": 3.6456211060914756e-05, "loss": 0.7388, "step": 73390 }, { "epoch": 0.81, "learning_rate": 3.6455288333776244e-05, "loss": 0.7054, "step": 73395 }, { "epoch": 0.81, "learning_rate": 3.645436560663773e-05, "loss": 0.7096, "step": 73400 }, { "epoch": 0.81, "learning_rate": 3.645344287949922e-05, "loss": 0.6766, "step": 73405 }, { "epoch": 0.81, "learning_rate": 3.645252015236071e-05, "loss": 0.7106, "step": 73410 }, { "epoch": 0.81, "learning_rate": 3.6451597425222195e-05, "loss": 0.646, "step": 73415 }, { "epoch": 0.81, "learning_rate": 3.645067469808368e-05, "loss": 0.6872, "step": 73420 }, { "epoch": 0.81, "learning_rate": 3.644975197094517e-05, "loss": 0.7577, "step": 73425 }, { "epoch": 0.81, "learning_rate": 3.644882924380666e-05, "loss": 0.7051, "step": 73430 }, { "epoch": 0.81, "learning_rate": 3.6447906516668146e-05, "loss": 0.7192, "step": 73435 }, { "epoch": 0.81, "learning_rate": 3.6446983789529634e-05, "loss": 0.7197, "step": 73440 }, { "epoch": 0.81, "learning_rate": 3.6446061062391115e-05, "loss": 0.6863, "step": 73445 }, { "epoch": 0.81, "learning_rate": 3.644513833525261e-05, "loss": 0.7188, "step": 73450 }, { "epoch": 0.81, "learning_rate": 3.64442156081141e-05, "loss": 0.6624, "step": 73455 }, { "epoch": 0.81, "learning_rate": 3.6443292880975586e-05, "loss": 0.6845, "step": 73460 }, { "epoch": 0.81, "learning_rate": 3.644237015383707e-05, "loss": 0.7093, "step": 73465 }, { "epoch": 0.81, "learning_rate": 3.644144742669856e-05, "loss": 0.7117, "step": 73470 }, { "epoch": 0.81, "learning_rate": 3.644052469956005e-05, "loss": 0.7093, "step": 73475 }, { "epoch": 0.81, "learning_rate": 3.643960197242153e-05, "loss": 0.7051, "step": 73480 }, { "epoch": 0.81, "learning_rate": 3.643867924528302e-05, "loss": 0.779, "step": 73485 }, { "epoch": 0.81, "learning_rate": 3.6437756518144506e-05, "loss": 0.6309, "step": 73490 }, { "epoch": 0.81, "learning_rate": 3.6436833791006e-05, "loss": 0.7351, "step": 73495 }, { "epoch": 0.81, "learning_rate": 3.643591106386748e-05, "loss": 0.736, "step": 73500 }, { "epoch": 0.81, "learning_rate": 3.643498833672897e-05, "loss": 0.6802, "step": 73505 }, { "epoch": 0.81, "learning_rate": 3.643406560959046e-05, "loss": 0.7163, "step": 73510 }, { "epoch": 0.81, "learning_rate": 3.6433142882451945e-05, "loss": 0.7099, "step": 73515 }, { "epoch": 0.81, "learning_rate": 3.643222015531343e-05, "loss": 0.754, "step": 73520 }, { "epoch": 0.81, "learning_rate": 3.643129742817492e-05, "loss": 0.7115, "step": 73525 }, { "epoch": 0.81, "learning_rate": 3.643037470103641e-05, "loss": 0.745, "step": 73530 }, { "epoch": 0.81, "learning_rate": 3.6429451973897897e-05, "loss": 0.7123, "step": 73535 }, { "epoch": 0.81, "learning_rate": 3.6428529246759384e-05, "loss": 0.7308, "step": 73540 }, { "epoch": 0.81, "learning_rate": 3.642760651962087e-05, "loss": 0.716, "step": 73545 }, { "epoch": 0.81, "learning_rate": 3.642668379248236e-05, "loss": 0.7602, "step": 73550 }, { "epoch": 0.81, "learning_rate": 3.642576106534384e-05, "loss": 0.7423, "step": 73555 }, { "epoch": 0.81, "learning_rate": 3.6424838338205336e-05, "loss": 0.678, "step": 73560 }, { "epoch": 0.81, "learning_rate": 3.6423915611066824e-05, "loss": 0.763, "step": 73565 }, { "epoch": 0.81, "learning_rate": 3.642299288392831e-05, "loss": 0.6839, "step": 73570 }, { "epoch": 0.81, "learning_rate": 3.642207015678979e-05, "loss": 0.7211, "step": 73575 }, { "epoch": 0.81, "learning_rate": 3.642114742965129e-05, "loss": 0.6694, "step": 73580 }, { "epoch": 0.81, "learning_rate": 3.6420224702512775e-05, "loss": 0.7152, "step": 73585 }, { "epoch": 0.81, "learning_rate": 3.6419301975374256e-05, "loss": 0.6913, "step": 73590 }, { "epoch": 0.81, "learning_rate": 3.6418379248235744e-05, "loss": 0.7398, "step": 73595 }, { "epoch": 0.81, "learning_rate": 3.641745652109724e-05, "loss": 0.7018, "step": 73600 }, { "epoch": 0.82, "learning_rate": 3.6416533793958726e-05, "loss": 0.6823, "step": 73605 }, { "epoch": 0.82, "learning_rate": 3.641561106682021e-05, "loss": 0.6692, "step": 73610 }, { "epoch": 0.82, "learning_rate": 3.6414688339681695e-05, "loss": 0.7684, "step": 73615 }, { "epoch": 0.82, "learning_rate": 3.641376561254319e-05, "loss": 0.7052, "step": 73620 }, { "epoch": 0.82, "learning_rate": 3.641284288540467e-05, "loss": 0.672, "step": 73625 }, { "epoch": 0.82, "learning_rate": 3.641192015826616e-05, "loss": 0.7375, "step": 73630 }, { "epoch": 0.82, "learning_rate": 3.641099743112765e-05, "loss": 0.7294, "step": 73635 }, { "epoch": 0.82, "learning_rate": 3.6410074703989135e-05, "loss": 0.7028, "step": 73640 }, { "epoch": 0.82, "learning_rate": 3.640915197685062e-05, "loss": 0.7002, "step": 73645 }, { "epoch": 0.82, "learning_rate": 3.640822924971211e-05, "loss": 0.7471, "step": 73650 }, { "epoch": 0.82, "learning_rate": 3.64073065225736e-05, "loss": 0.7407, "step": 73655 }, { "epoch": 0.82, "learning_rate": 3.6406383795435086e-05, "loss": 0.6789, "step": 73660 }, { "epoch": 0.82, "learning_rate": 3.6405461068296574e-05, "loss": 0.6977, "step": 73665 }, { "epoch": 0.82, "learning_rate": 3.640453834115806e-05, "loss": 0.6686, "step": 73670 }, { "epoch": 0.82, "learning_rate": 3.640361561401955e-05, "loss": 0.6666, "step": 73675 }, { "epoch": 0.82, "learning_rate": 3.640269288688104e-05, "loss": 0.6838, "step": 73680 }, { "epoch": 0.82, "learning_rate": 3.6401770159742525e-05, "loss": 0.702, "step": 73685 }, { "epoch": 0.82, "learning_rate": 3.640084743260401e-05, "loss": 0.6722, "step": 73690 }, { "epoch": 0.82, "learning_rate": 3.63999247054655e-05, "loss": 0.7494, "step": 73695 }, { "epoch": 0.82, "learning_rate": 3.639900197832698e-05, "loss": 0.686, "step": 73700 }, { "epoch": 0.82, "learning_rate": 3.639807925118847e-05, "loss": 0.7296, "step": 73705 }, { "epoch": 0.82, "learning_rate": 3.6397156524049964e-05, "loss": 0.6429, "step": 73710 }, { "epoch": 0.82, "learning_rate": 3.639623379691145e-05, "loss": 0.6948, "step": 73715 }, { "epoch": 0.82, "learning_rate": 3.639531106977293e-05, "loss": 0.6933, "step": 73720 }, { "epoch": 0.82, "learning_rate": 3.639438834263442e-05, "loss": 0.6965, "step": 73725 }, { "epoch": 0.82, "learning_rate": 3.6393465615495916e-05, "loss": 0.7199, "step": 73730 }, { "epoch": 0.82, "learning_rate": 3.6392542888357404e-05, "loss": 0.7085, "step": 73735 }, { "epoch": 0.82, "learning_rate": 3.6391620161218885e-05, "loss": 0.7724, "step": 73740 }, { "epoch": 0.82, "learning_rate": 3.639069743408037e-05, "loss": 0.7091, "step": 73745 }, { "epoch": 0.82, "learning_rate": 3.638977470694187e-05, "loss": 0.6783, "step": 73750 }, { "epoch": 0.82, "learning_rate": 3.638885197980335e-05, "loss": 0.7084, "step": 73755 }, { "epoch": 0.82, "learning_rate": 3.6387929252664836e-05, "loss": 0.775, "step": 73760 }, { "epoch": 0.82, "learning_rate": 3.6387006525526324e-05, "loss": 0.7198, "step": 73765 }, { "epoch": 0.82, "learning_rate": 3.638608379838782e-05, "loss": 0.7243, "step": 73770 }, { "epoch": 0.82, "learning_rate": 3.63851610712493e-05, "loss": 0.7303, "step": 73775 }, { "epoch": 0.82, "learning_rate": 3.638423834411079e-05, "loss": 0.7159, "step": 73780 }, { "epoch": 0.82, "learning_rate": 3.6383315616972275e-05, "loss": 0.6688, "step": 73785 }, { "epoch": 0.82, "learning_rate": 3.638239288983376e-05, "loss": 0.6868, "step": 73790 }, { "epoch": 0.82, "learning_rate": 3.638147016269525e-05, "loss": 0.7025, "step": 73795 }, { "epoch": 0.82, "learning_rate": 3.638054743555674e-05, "loss": 0.7743, "step": 73800 }, { "epoch": 0.82, "learning_rate": 3.637962470841823e-05, "loss": 0.6616, "step": 73805 }, { "epoch": 0.82, "learning_rate": 3.6378701981279715e-05, "loss": 0.7171, "step": 73810 }, { "epoch": 0.82, "learning_rate": 3.63777792541412e-05, "loss": 0.7055, "step": 73815 }, { "epoch": 0.82, "learning_rate": 3.637685652700269e-05, "loss": 0.6437, "step": 73820 }, { "epoch": 0.82, "learning_rate": 3.637593379986418e-05, "loss": 0.7537, "step": 73825 }, { "epoch": 0.82, "learning_rate": 3.637501107272566e-05, "loss": 0.7009, "step": 73830 }, { "epoch": 0.82, "learning_rate": 3.6374088345587154e-05, "loss": 0.6547, "step": 73835 }, { "epoch": 0.82, "learning_rate": 3.637316561844864e-05, "loss": 0.7252, "step": 73840 }, { "epoch": 0.82, "learning_rate": 3.637224289131013e-05, "loss": 0.6624, "step": 73845 }, { "epoch": 0.82, "learning_rate": 3.637132016417161e-05, "loss": 0.6901, "step": 73850 }, { "epoch": 0.82, "learning_rate": 3.63703974370331e-05, "loss": 0.7497, "step": 73855 }, { "epoch": 0.82, "learning_rate": 3.636947470989459e-05, "loss": 0.6533, "step": 73860 }, { "epoch": 0.82, "learning_rate": 3.6368551982756074e-05, "loss": 0.674, "step": 73865 }, { "epoch": 0.82, "learning_rate": 3.636762925561756e-05, "loss": 0.709, "step": 73870 }, { "epoch": 0.82, "learning_rate": 3.636670652847905e-05, "loss": 0.7222, "step": 73875 }, { "epoch": 0.82, "learning_rate": 3.6365783801340545e-05, "loss": 0.7564, "step": 73880 }, { "epoch": 0.82, "learning_rate": 3.6364861074202026e-05, "loss": 0.6969, "step": 73885 }, { "epoch": 0.82, "learning_rate": 3.6363938347063513e-05, "loss": 0.7264, "step": 73890 }, { "epoch": 0.82, "learning_rate": 3.6363015619925e-05, "loss": 0.735, "step": 73895 }, { "epoch": 0.82, "learning_rate": 3.636209289278649e-05, "loss": 0.641, "step": 73900 }, { "epoch": 0.82, "learning_rate": 3.636117016564798e-05, "loss": 0.7397, "step": 73905 }, { "epoch": 0.82, "learning_rate": 3.6360247438509465e-05, "loss": 0.6592, "step": 73910 }, { "epoch": 0.82, "learning_rate": 3.635932471137095e-05, "loss": 0.6977, "step": 73915 }, { "epoch": 0.82, "learning_rate": 3.635840198423244e-05, "loss": 0.7214, "step": 73920 }, { "epoch": 0.82, "learning_rate": 3.635747925709393e-05, "loss": 0.6899, "step": 73925 }, { "epoch": 0.82, "learning_rate": 3.6356556529955416e-05, "loss": 0.7016, "step": 73930 }, { "epoch": 0.82, "learning_rate": 3.6355633802816904e-05, "loss": 0.6884, "step": 73935 }, { "epoch": 0.82, "learning_rate": 3.6354711075678385e-05, "loss": 0.7452, "step": 73940 }, { "epoch": 0.82, "learning_rate": 3.635378834853988e-05, "loss": 0.6916, "step": 73945 }, { "epoch": 0.82, "learning_rate": 3.635286562140137e-05, "loss": 0.7267, "step": 73950 }, { "epoch": 0.82, "learning_rate": 3.6351942894262855e-05, "loss": 0.6804, "step": 73955 }, { "epoch": 0.82, "learning_rate": 3.6351020167124337e-05, "loss": 0.686, "step": 73960 }, { "epoch": 0.82, "learning_rate": 3.635009743998583e-05, "loss": 0.6739, "step": 73965 }, { "epoch": 0.82, "learning_rate": 3.634917471284732e-05, "loss": 0.6948, "step": 73970 }, { "epoch": 0.82, "learning_rate": 3.63482519857088e-05, "loss": 0.694, "step": 73975 }, { "epoch": 0.82, "learning_rate": 3.634732925857029e-05, "loss": 0.7014, "step": 73980 }, { "epoch": 0.82, "learning_rate": 3.634640653143178e-05, "loss": 0.674, "step": 73985 }, { "epoch": 0.82, "learning_rate": 3.634548380429327e-05, "loss": 0.757, "step": 73990 }, { "epoch": 0.82, "learning_rate": 3.634456107715475e-05, "loss": 0.7292, "step": 73995 }, { "epoch": 0.82, "learning_rate": 3.634363835001624e-05, "loss": 0.6646, "step": 74000 }, { "epoch": 0.82, "eval_loss": 0.6633651852607727, "eval_runtime": 69.259, "eval_samples_per_second": 28.877, "eval_steps_per_second": 14.439, "step": 74000 }, { "epoch": 0.82, "learning_rate": 3.634271562287773e-05, "loss": 0.7221, "step": 74005 }, { "epoch": 0.82, "learning_rate": 3.6341792895739215e-05, "loss": 0.7528, "step": 74010 }, { "epoch": 0.82, "learning_rate": 3.63408701686007e-05, "loss": 0.7142, "step": 74015 }, { "epoch": 0.82, "learning_rate": 3.633994744146219e-05, "loss": 0.685, "step": 74020 }, { "epoch": 0.82, "learning_rate": 3.633902471432368e-05, "loss": 0.7356, "step": 74025 }, { "epoch": 0.82, "learning_rate": 3.6338101987185166e-05, "loss": 0.7488, "step": 74030 }, { "epoch": 0.82, "learning_rate": 3.6337179260046654e-05, "loss": 0.6593, "step": 74035 }, { "epoch": 0.82, "learning_rate": 3.633625653290814e-05, "loss": 0.6741, "step": 74040 }, { "epoch": 0.82, "learning_rate": 3.633533380576963e-05, "loss": 0.6975, "step": 74045 }, { "epoch": 0.82, "learning_rate": 3.633441107863112e-05, "loss": 0.7075, "step": 74050 }, { "epoch": 0.82, "learning_rate": 3.6333488351492606e-05, "loss": 0.6744, "step": 74055 }, { "epoch": 0.82, "learning_rate": 3.6332565624354094e-05, "loss": 0.7495, "step": 74060 }, { "epoch": 0.82, "learning_rate": 3.633164289721558e-05, "loss": 0.7625, "step": 74065 }, { "epoch": 0.82, "learning_rate": 3.633072017007707e-05, "loss": 0.6953, "step": 74070 }, { "epoch": 0.82, "learning_rate": 3.632979744293856e-05, "loss": 0.7025, "step": 74075 }, { "epoch": 0.82, "learning_rate": 3.6328874715800045e-05, "loss": 0.6714, "step": 74080 }, { "epoch": 0.82, "learning_rate": 3.6327951988661526e-05, "loss": 0.6894, "step": 74085 }, { "epoch": 0.82, "learning_rate": 3.6327029261523014e-05, "loss": 0.7058, "step": 74090 }, { "epoch": 0.82, "learning_rate": 3.632610653438451e-05, "loss": 0.7326, "step": 74095 }, { "epoch": 0.82, "learning_rate": 3.6325183807245996e-05, "loss": 0.6825, "step": 74100 }, { "epoch": 0.82, "learning_rate": 3.632426108010748e-05, "loss": 0.7438, "step": 74105 }, { "epoch": 0.82, "learning_rate": 3.6323338352968965e-05, "loss": 0.7574, "step": 74110 }, { "epoch": 0.82, "learning_rate": 3.632241562583046e-05, "loss": 0.7158, "step": 74115 }, { "epoch": 0.82, "learning_rate": 3.632149289869195e-05, "loss": 0.6938, "step": 74120 }, { "epoch": 0.82, "learning_rate": 3.632057017155343e-05, "loss": 0.6684, "step": 74125 }, { "epoch": 0.82, "learning_rate": 3.631964744441492e-05, "loss": 0.6326, "step": 74130 }, { "epoch": 0.82, "learning_rate": 3.631872471727641e-05, "loss": 0.7024, "step": 74135 }, { "epoch": 0.82, "learning_rate": 3.631780199013789e-05, "loss": 0.6892, "step": 74140 }, { "epoch": 0.82, "learning_rate": 3.631687926299938e-05, "loss": 0.7273, "step": 74145 }, { "epoch": 0.82, "learning_rate": 3.631595653586087e-05, "loss": 0.7127, "step": 74150 }, { "epoch": 0.82, "learning_rate": 3.631503380872236e-05, "loss": 0.7844, "step": 74155 }, { "epoch": 0.82, "learning_rate": 3.6314111081583844e-05, "loss": 0.7377, "step": 74160 }, { "epoch": 0.82, "learning_rate": 3.631318835444533e-05, "loss": 0.6514, "step": 74165 }, { "epoch": 0.82, "learning_rate": 3.631226562730682e-05, "loss": 0.6908, "step": 74170 }, { "epoch": 0.82, "learning_rate": 3.631134290016831e-05, "loss": 0.6902, "step": 74175 }, { "epoch": 0.82, "learning_rate": 3.6310420173029795e-05, "loss": 0.7109, "step": 74180 }, { "epoch": 0.82, "learning_rate": 3.630949744589128e-05, "loss": 0.7648, "step": 74185 }, { "epoch": 0.82, "learning_rate": 3.630857471875277e-05, "loss": 0.7784, "step": 74190 }, { "epoch": 0.82, "learning_rate": 3.630765199161426e-05, "loss": 0.7756, "step": 74195 }, { "epoch": 0.82, "learning_rate": 3.6306729264475747e-05, "loss": 0.6942, "step": 74200 }, { "epoch": 0.82, "learning_rate": 3.6305806537337234e-05, "loss": 0.7318, "step": 74205 }, { "epoch": 0.82, "learning_rate": 3.630488381019872e-05, "loss": 0.6725, "step": 74210 }, { "epoch": 0.82, "learning_rate": 3.63039610830602e-05, "loss": 0.6856, "step": 74215 }, { "epoch": 0.82, "learning_rate": 3.63030383559217e-05, "loss": 0.7023, "step": 74220 }, { "epoch": 0.82, "learning_rate": 3.6302115628783186e-05, "loss": 0.7631, "step": 74225 }, { "epoch": 0.82, "learning_rate": 3.6301192901644674e-05, "loss": 0.7577, "step": 74230 }, { "epoch": 0.82, "learning_rate": 3.6300270174506155e-05, "loss": 0.7342, "step": 74235 }, { "epoch": 0.82, "learning_rate": 3.629934744736764e-05, "loss": 0.6987, "step": 74240 }, { "epoch": 0.82, "learning_rate": 3.629842472022914e-05, "loss": 0.7425, "step": 74245 }, { "epoch": 0.82, "learning_rate": 3.629750199309062e-05, "loss": 0.7148, "step": 74250 }, { "epoch": 0.82, "learning_rate": 3.6296579265952106e-05, "loss": 0.6551, "step": 74255 }, { "epoch": 0.82, "learning_rate": 3.6295656538813594e-05, "loss": 0.6461, "step": 74260 }, { "epoch": 0.82, "learning_rate": 3.629473381167509e-05, "loss": 0.7122, "step": 74265 }, { "epoch": 0.82, "learning_rate": 3.629381108453657e-05, "loss": 0.7063, "step": 74270 }, { "epoch": 0.82, "learning_rate": 3.629288835739806e-05, "loss": 0.7147, "step": 74275 }, { "epoch": 0.82, "learning_rate": 3.6291965630259545e-05, "loss": 0.6735, "step": 74280 }, { "epoch": 0.82, "learning_rate": 3.629104290312103e-05, "loss": 0.6835, "step": 74285 }, { "epoch": 0.82, "learning_rate": 3.629012017598252e-05, "loss": 0.6559, "step": 74290 }, { "epoch": 0.82, "learning_rate": 3.628919744884401e-05, "loss": 0.7527, "step": 74295 }, { "epoch": 0.82, "learning_rate": 3.62882747217055e-05, "loss": 0.6808, "step": 74300 }, { "epoch": 0.82, "learning_rate": 3.6287351994566985e-05, "loss": 0.6572, "step": 74305 }, { "epoch": 0.82, "learning_rate": 3.628642926742847e-05, "loss": 0.7314, "step": 74310 }, { "epoch": 0.82, "learning_rate": 3.628550654028996e-05, "loss": 0.6958, "step": 74315 }, { "epoch": 0.82, "learning_rate": 3.628458381315145e-05, "loss": 0.6497, "step": 74320 }, { "epoch": 0.82, "learning_rate": 3.628366108601293e-05, "loss": 0.7131, "step": 74325 }, { "epoch": 0.82, "learning_rate": 3.6282738358874424e-05, "loss": 0.7575, "step": 74330 }, { "epoch": 0.82, "learning_rate": 3.628181563173591e-05, "loss": 0.6542, "step": 74335 }, { "epoch": 0.82, "learning_rate": 3.62808929045974e-05, "loss": 0.7189, "step": 74340 }, { "epoch": 0.82, "learning_rate": 3.627997017745888e-05, "loss": 0.7069, "step": 74345 }, { "epoch": 0.82, "learning_rate": 3.6279047450320375e-05, "loss": 0.7393, "step": 74350 }, { "epoch": 0.82, "learning_rate": 3.627812472318186e-05, "loss": 0.6801, "step": 74355 }, { "epoch": 0.82, "learning_rate": 3.6277201996043344e-05, "loss": 0.77, "step": 74360 }, { "epoch": 0.82, "learning_rate": 3.627627926890483e-05, "loss": 0.6662, "step": 74365 }, { "epoch": 0.82, "learning_rate": 3.6275356541766327e-05, "loss": 0.6634, "step": 74370 }, { "epoch": 0.82, "learning_rate": 3.6274433814627814e-05, "loss": 0.7173, "step": 74375 }, { "epoch": 0.82, "learning_rate": 3.6273511087489296e-05, "loss": 0.7337, "step": 74380 }, { "epoch": 0.82, "learning_rate": 3.627258836035078e-05, "loss": 0.6757, "step": 74385 }, { "epoch": 0.82, "learning_rate": 3.627166563321227e-05, "loss": 0.7153, "step": 74390 }, { "epoch": 0.82, "learning_rate": 3.6270742906073766e-05, "loss": 0.7015, "step": 74395 }, { "epoch": 0.82, "learning_rate": 3.626982017893525e-05, "loss": 0.7243, "step": 74400 }, { "epoch": 0.82, "learning_rate": 3.6268897451796735e-05, "loss": 0.7013, "step": 74405 }, { "epoch": 0.82, "learning_rate": 3.626797472465822e-05, "loss": 0.694, "step": 74410 }, { "epoch": 0.82, "learning_rate": 3.626705199751971e-05, "loss": 0.7064, "step": 74415 }, { "epoch": 0.82, "learning_rate": 3.62661292703812e-05, "loss": 0.7133, "step": 74420 }, { "epoch": 0.82, "learning_rate": 3.6265206543242686e-05, "loss": 0.7118, "step": 74425 }, { "epoch": 0.82, "learning_rate": 3.6264283816104174e-05, "loss": 0.6866, "step": 74430 }, { "epoch": 0.82, "learning_rate": 3.626336108896566e-05, "loss": 0.6433, "step": 74435 }, { "epoch": 0.82, "learning_rate": 3.626243836182715e-05, "loss": 0.7416, "step": 74440 }, { "epoch": 0.82, "learning_rate": 3.626151563468864e-05, "loss": 0.6492, "step": 74445 }, { "epoch": 0.82, "learning_rate": 3.6260592907550125e-05, "loss": 0.7492, "step": 74450 }, { "epoch": 0.82, "learning_rate": 3.625967018041161e-05, "loss": 0.7148, "step": 74455 }, { "epoch": 0.82, "learning_rate": 3.62587474532731e-05, "loss": 0.6717, "step": 74460 }, { "epoch": 0.82, "learning_rate": 3.625782472613459e-05, "loss": 0.7087, "step": 74465 }, { "epoch": 0.82, "learning_rate": 3.625690199899608e-05, "loss": 0.7137, "step": 74470 }, { "epoch": 0.82, "learning_rate": 3.625597927185756e-05, "loss": 0.6834, "step": 74475 }, { "epoch": 0.82, "learning_rate": 3.625505654471905e-05, "loss": 0.7262, "step": 74480 }, { "epoch": 0.82, "learning_rate": 3.625413381758054e-05, "loss": 0.6978, "step": 74485 }, { "epoch": 0.82, "learning_rate": 3.625321109044202e-05, "loss": 0.6986, "step": 74490 }, { "epoch": 0.82, "learning_rate": 3.625228836330351e-05, "loss": 0.6933, "step": 74495 }, { "epoch": 0.82, "learning_rate": 3.6251365636165004e-05, "loss": 0.7085, "step": 74500 }, { "epoch": 0.82, "learning_rate": 3.625044290902649e-05, "loss": 0.6843, "step": 74505 }, { "epoch": 0.83, "learning_rate": 3.624952018188797e-05, "loss": 0.749, "step": 74510 }, { "epoch": 0.83, "learning_rate": 3.624859745474946e-05, "loss": 0.7373, "step": 74515 }, { "epoch": 0.83, "learning_rate": 3.6247674727610955e-05, "loss": 0.7284, "step": 74520 }, { "epoch": 0.83, "learning_rate": 3.6246752000472436e-05, "loss": 0.6523, "step": 74525 }, { "epoch": 0.83, "learning_rate": 3.6245829273333924e-05, "loss": 0.7295, "step": 74530 }, { "epoch": 0.83, "learning_rate": 3.624490654619541e-05, "loss": 0.6878, "step": 74535 }, { "epoch": 0.83, "learning_rate": 3.62439838190569e-05, "loss": 0.7015, "step": 74540 }, { "epoch": 0.83, "learning_rate": 3.624306109191839e-05, "loss": 0.7467, "step": 74545 }, { "epoch": 0.83, "learning_rate": 3.6242138364779876e-05, "loss": 0.7248, "step": 74550 }, { "epoch": 0.83, "learning_rate": 3.6241215637641363e-05, "loss": 0.692, "step": 74555 }, { "epoch": 0.83, "learning_rate": 3.624029291050285e-05, "loss": 0.7441, "step": 74560 }, { "epoch": 0.83, "learning_rate": 3.623937018336434e-05, "loss": 0.659, "step": 74565 }, { "epoch": 0.83, "learning_rate": 3.623844745622583e-05, "loss": 0.7991, "step": 74570 }, { "epoch": 0.83, "learning_rate": 3.6237524729087315e-05, "loss": 0.7374, "step": 74575 }, { "epoch": 0.83, "learning_rate": 3.62366020019488e-05, "loss": 0.6851, "step": 74580 }, { "epoch": 0.83, "learning_rate": 3.623567927481029e-05, "loss": 0.695, "step": 74585 }, { "epoch": 0.83, "learning_rate": 3.623475654767178e-05, "loss": 0.7262, "step": 74590 }, { "epoch": 0.83, "learning_rate": 3.6233833820533266e-05, "loss": 0.6964, "step": 74595 }, { "epoch": 0.83, "learning_rate": 3.623291109339475e-05, "loss": 0.6895, "step": 74600 }, { "epoch": 0.83, "learning_rate": 3.623198836625624e-05, "loss": 0.6928, "step": 74605 }, { "epoch": 0.83, "learning_rate": 3.623106563911773e-05, "loss": 0.6329, "step": 74610 }, { "epoch": 0.83, "learning_rate": 3.623014291197922e-05, "loss": 0.7555, "step": 74615 }, { "epoch": 0.83, "learning_rate": 3.62292201848407e-05, "loss": 0.6736, "step": 74620 }, { "epoch": 0.83, "learning_rate": 3.6228297457702187e-05, "loss": 0.6989, "step": 74625 }, { "epoch": 0.83, "learning_rate": 3.622737473056368e-05, "loss": 0.7106, "step": 74630 }, { "epoch": 0.83, "learning_rate": 3.622645200342516e-05, "loss": 0.6726, "step": 74635 }, { "epoch": 0.83, "learning_rate": 3.622552927628665e-05, "loss": 0.6959, "step": 74640 }, { "epoch": 0.83, "learning_rate": 3.622460654914814e-05, "loss": 0.687, "step": 74645 }, { "epoch": 0.83, "learning_rate": 3.622368382200963e-05, "loss": 0.6823, "step": 74650 }, { "epoch": 0.83, "learning_rate": 3.6222761094871114e-05, "loss": 0.6852, "step": 74655 }, { "epoch": 0.83, "learning_rate": 3.62218383677326e-05, "loss": 0.6902, "step": 74660 }, { "epoch": 0.83, "learning_rate": 3.622091564059409e-05, "loss": 0.7012, "step": 74665 }, { "epoch": 0.83, "learning_rate": 3.621999291345558e-05, "loss": 0.779, "step": 74670 }, { "epoch": 0.83, "learning_rate": 3.6219070186317065e-05, "loss": 0.6737, "step": 74675 }, { "epoch": 0.83, "learning_rate": 3.621814745917855e-05, "loss": 0.7467, "step": 74680 }, { "epoch": 0.83, "learning_rate": 3.621722473204004e-05, "loss": 0.722, "step": 74685 }, { "epoch": 0.83, "learning_rate": 3.621630200490153e-05, "loss": 0.698, "step": 74690 }, { "epoch": 0.83, "learning_rate": 3.6215379277763016e-05, "loss": 0.7464, "step": 74695 }, { "epoch": 0.83, "learning_rate": 3.6214456550624504e-05, "loss": 0.6838, "step": 74700 }, { "epoch": 0.83, "learning_rate": 3.621353382348599e-05, "loss": 0.7087, "step": 74705 }, { "epoch": 0.83, "learning_rate": 3.621261109634747e-05, "loss": 0.7467, "step": 74710 }, { "epoch": 0.83, "learning_rate": 3.621168836920897e-05, "loss": 0.7122, "step": 74715 }, { "epoch": 0.83, "learning_rate": 3.6210765642070456e-05, "loss": 0.7009, "step": 74720 }, { "epoch": 0.83, "learning_rate": 3.6209842914931944e-05, "loss": 0.6998, "step": 74725 }, { "epoch": 0.83, "learning_rate": 3.6208920187793425e-05, "loss": 0.7162, "step": 74730 }, { "epoch": 0.83, "learning_rate": 3.620799746065492e-05, "loss": 0.7309, "step": 74735 }, { "epoch": 0.83, "learning_rate": 3.620707473351641e-05, "loss": 0.7393, "step": 74740 }, { "epoch": 0.83, "learning_rate": 3.620615200637789e-05, "loss": 0.7238, "step": 74745 }, { "epoch": 0.83, "learning_rate": 3.6205229279239376e-05, "loss": 0.7313, "step": 74750 }, { "epoch": 0.83, "learning_rate": 3.620430655210087e-05, "loss": 0.6935, "step": 74755 }, { "epoch": 0.83, "learning_rate": 3.620338382496236e-05, "loss": 0.6336, "step": 74760 }, { "epoch": 0.83, "learning_rate": 3.620246109782384e-05, "loss": 0.7194, "step": 74765 }, { "epoch": 0.83, "learning_rate": 3.620153837068533e-05, "loss": 0.6953, "step": 74770 }, { "epoch": 0.83, "learning_rate": 3.6200615643546815e-05, "loss": 0.6962, "step": 74775 }, { "epoch": 0.83, "learning_rate": 3.619969291640831e-05, "loss": 0.7285, "step": 74780 }, { "epoch": 0.83, "learning_rate": 3.619877018926979e-05, "loss": 0.7008, "step": 74785 }, { "epoch": 0.83, "learning_rate": 3.619784746213128e-05, "loss": 0.6755, "step": 74790 }, { "epoch": 0.83, "learning_rate": 3.619692473499277e-05, "loss": 0.6427, "step": 74795 }, { "epoch": 0.83, "learning_rate": 3.6196002007854254e-05, "loss": 0.6986, "step": 74800 }, { "epoch": 0.83, "learning_rate": 3.619507928071574e-05, "loss": 0.6959, "step": 74805 }, { "epoch": 0.83, "learning_rate": 3.619415655357723e-05, "loss": 0.643, "step": 74810 }, { "epoch": 0.83, "learning_rate": 3.619323382643872e-05, "loss": 0.6598, "step": 74815 }, { "epoch": 0.83, "learning_rate": 3.6192311099300206e-05, "loss": 0.7232, "step": 74820 }, { "epoch": 0.83, "learning_rate": 3.6191388372161694e-05, "loss": 0.7462, "step": 74825 }, { "epoch": 0.83, "learning_rate": 3.619046564502318e-05, "loss": 0.6761, "step": 74830 }, { "epoch": 0.83, "learning_rate": 3.618954291788467e-05, "loss": 0.6464, "step": 74835 }, { "epoch": 0.83, "learning_rate": 3.618862019074615e-05, "loss": 0.6256, "step": 74840 }, { "epoch": 0.83, "learning_rate": 3.6187697463607645e-05, "loss": 0.7463, "step": 74845 }, { "epoch": 0.83, "learning_rate": 3.618677473646913e-05, "loss": 0.6927, "step": 74850 }, { "epoch": 0.83, "learning_rate": 3.618585200933062e-05, "loss": 0.6796, "step": 74855 }, { "epoch": 0.83, "learning_rate": 3.61849292821921e-05, "loss": 0.7015, "step": 74860 }, { "epoch": 0.83, "learning_rate": 3.6184006555053597e-05, "loss": 0.7313, "step": 74865 }, { "epoch": 0.83, "learning_rate": 3.6183083827915084e-05, "loss": 0.6705, "step": 74870 }, { "epoch": 0.83, "learning_rate": 3.6182161100776565e-05, "loss": 0.7308, "step": 74875 }, { "epoch": 0.83, "learning_rate": 3.618123837363805e-05, "loss": 0.7159, "step": 74880 }, { "epoch": 0.83, "learning_rate": 3.618031564649955e-05, "loss": 0.6978, "step": 74885 }, { "epoch": 0.83, "learning_rate": 3.6179392919361036e-05, "loss": 0.6732, "step": 74890 }, { "epoch": 0.83, "learning_rate": 3.617847019222252e-05, "loss": 0.703, "step": 74895 }, { "epoch": 0.83, "learning_rate": 3.6177547465084005e-05, "loss": 0.6811, "step": 74900 }, { "epoch": 0.83, "learning_rate": 3.61766247379455e-05, "loss": 0.6745, "step": 74905 }, { "epoch": 0.83, "learning_rate": 3.617570201080698e-05, "loss": 0.6583, "step": 74910 }, { "epoch": 0.83, "learning_rate": 3.617477928366847e-05, "loss": 0.67, "step": 74915 }, { "epoch": 0.83, "learning_rate": 3.6173856556529956e-05, "loss": 0.7214, "step": 74920 }, { "epoch": 0.83, "learning_rate": 3.6172933829391444e-05, "loss": 0.7096, "step": 74925 }, { "epoch": 0.83, "learning_rate": 3.617201110225293e-05, "loss": 0.6882, "step": 74930 }, { "epoch": 0.83, "learning_rate": 3.617108837511442e-05, "loss": 0.714, "step": 74935 }, { "epoch": 0.83, "learning_rate": 3.617016564797591e-05, "loss": 0.6214, "step": 74940 }, { "epoch": 0.83, "learning_rate": 3.6169242920837395e-05, "loss": 0.7281, "step": 74945 }, { "epoch": 0.83, "learning_rate": 3.616832019369888e-05, "loss": 0.6865, "step": 74950 }, { "epoch": 0.83, "learning_rate": 3.616739746656037e-05, "loss": 0.7239, "step": 74955 }, { "epoch": 0.83, "learning_rate": 3.616647473942186e-05, "loss": 0.7034, "step": 74960 }, { "epoch": 0.83, "learning_rate": 3.616555201228335e-05, "loss": 0.7344, "step": 74965 }, { "epoch": 0.83, "learning_rate": 3.6164629285144835e-05, "loss": 0.6845, "step": 74970 }, { "epoch": 0.83, "learning_rate": 3.616370655800632e-05, "loss": 0.7046, "step": 74975 }, { "epoch": 0.83, "learning_rate": 3.616278383086781e-05, "loss": 0.7499, "step": 74980 }, { "epoch": 0.83, "learning_rate": 3.616186110372929e-05, "loss": 0.6584, "step": 74985 }, { "epoch": 0.83, "learning_rate": 3.6160938376590786e-05, "loss": 0.7159, "step": 74990 }, { "epoch": 0.83, "learning_rate": 3.6160015649452274e-05, "loss": 0.7022, "step": 74995 }, { "epoch": 0.83, "learning_rate": 3.615909292231376e-05, "loss": 0.6616, "step": 75000 }, { "epoch": 0.83, "eval_loss": 0.7020450234413147, "eval_runtime": 69.2456, "eval_samples_per_second": 28.883, "eval_steps_per_second": 14.441, "step": 75000 }, { "epoch": 0.83, "learning_rate": 3.615817019517524e-05, "loss": 0.734, "step": 75005 }, { "epoch": 0.83, "learning_rate": 3.615724746803673e-05, "loss": 0.757, "step": 75010 }, { "epoch": 0.83, "learning_rate": 3.6156324740898225e-05, "loss": 0.7412, "step": 75015 }, { "epoch": 0.83, "learning_rate": 3.6155402013759706e-05, "loss": 0.6592, "step": 75020 }, { "epoch": 0.83, "learning_rate": 3.6154479286621194e-05, "loss": 0.6731, "step": 75025 }, { "epoch": 0.83, "learning_rate": 3.615355655948268e-05, "loss": 0.7066, "step": 75030 }, { "epoch": 0.83, "learning_rate": 3.6152633832344177e-05, "loss": 0.6838, "step": 75035 }, { "epoch": 0.83, "learning_rate": 3.615171110520566e-05, "loss": 0.6524, "step": 75040 }, { "epoch": 0.83, "learning_rate": 3.6150788378067146e-05, "loss": 0.7043, "step": 75045 }, { "epoch": 0.83, "learning_rate": 3.614986565092863e-05, "loss": 0.7321, "step": 75050 }, { "epoch": 0.83, "learning_rate": 3.614894292379012e-05, "loss": 0.7029, "step": 75055 }, { "epoch": 0.83, "learning_rate": 3.614802019665161e-05, "loss": 0.7018, "step": 75060 }, { "epoch": 0.83, "learning_rate": 3.61470974695131e-05, "loss": 0.7185, "step": 75065 }, { "epoch": 0.83, "learning_rate": 3.6146174742374585e-05, "loss": 0.6562, "step": 75070 }, { "epoch": 0.83, "learning_rate": 3.614525201523607e-05, "loss": 0.6813, "step": 75075 }, { "epoch": 0.83, "learning_rate": 3.614432928809756e-05, "loss": 0.777, "step": 75080 }, { "epoch": 0.83, "learning_rate": 3.614340656095905e-05, "loss": 0.724, "step": 75085 }, { "epoch": 0.83, "learning_rate": 3.6142483833820536e-05, "loss": 0.7958, "step": 75090 }, { "epoch": 0.83, "learning_rate": 3.614156110668202e-05, "loss": 0.7674, "step": 75095 }, { "epoch": 0.83, "learning_rate": 3.614063837954351e-05, "loss": 0.6365, "step": 75100 }, { "epoch": 0.83, "learning_rate": 3.6139715652405e-05, "loss": 0.7087, "step": 75105 }, { "epoch": 0.83, "learning_rate": 3.613879292526649e-05, "loss": 0.7082, "step": 75110 }, { "epoch": 0.83, "learning_rate": 3.613787019812797e-05, "loss": 0.7046, "step": 75115 }, { "epoch": 0.83, "learning_rate": 3.613694747098946e-05, "loss": 0.6865, "step": 75120 }, { "epoch": 0.83, "learning_rate": 3.613602474385095e-05, "loss": 0.7859, "step": 75125 }, { "epoch": 0.83, "learning_rate": 3.613510201671243e-05, "loss": 0.7028, "step": 75130 }, { "epoch": 0.83, "learning_rate": 3.613417928957392e-05, "loss": 0.6301, "step": 75135 }, { "epoch": 0.83, "learning_rate": 3.6133256562435415e-05, "loss": 0.7064, "step": 75140 }, { "epoch": 0.83, "learning_rate": 3.61323338352969e-05, "loss": 0.6786, "step": 75145 }, { "epoch": 0.83, "learning_rate": 3.6131411108158384e-05, "loss": 0.709, "step": 75150 }, { "epoch": 0.83, "learning_rate": 3.613048838101987e-05, "loss": 0.6711, "step": 75155 }, { "epoch": 0.83, "learning_rate": 3.612956565388136e-05, "loss": 0.6342, "step": 75160 }, { "epoch": 0.83, "learning_rate": 3.6128642926742854e-05, "loss": 0.7204, "step": 75165 }, { "epoch": 0.83, "learning_rate": 3.6127720199604335e-05, "loss": 0.7027, "step": 75170 }, { "epoch": 0.83, "learning_rate": 3.612679747246582e-05, "loss": 0.7049, "step": 75175 }, { "epoch": 0.83, "learning_rate": 3.612587474532731e-05, "loss": 0.6989, "step": 75180 }, { "epoch": 0.83, "learning_rate": 3.61249520181888e-05, "loss": 0.7743, "step": 75185 }, { "epoch": 0.83, "learning_rate": 3.6124029291050286e-05, "loss": 0.7115, "step": 75190 }, { "epoch": 0.83, "learning_rate": 3.6123106563911774e-05, "loss": 0.6911, "step": 75195 }, { "epoch": 0.83, "learning_rate": 3.612218383677326e-05, "loss": 0.663, "step": 75200 }, { "epoch": 0.83, "learning_rate": 3.612126110963475e-05, "loss": 0.7113, "step": 75205 }, { "epoch": 0.83, "learning_rate": 3.612033838249624e-05, "loss": 0.6969, "step": 75210 }, { "epoch": 0.83, "learning_rate": 3.6119415655357726e-05, "loss": 0.6947, "step": 75215 }, { "epoch": 0.83, "learning_rate": 3.6118492928219213e-05, "loss": 0.7185, "step": 75220 }, { "epoch": 0.83, "learning_rate": 3.6117570201080695e-05, "loss": 0.695, "step": 75225 }, { "epoch": 0.83, "learning_rate": 3.611664747394219e-05, "loss": 0.6846, "step": 75230 }, { "epoch": 0.83, "learning_rate": 3.611572474680368e-05, "loss": 0.7246, "step": 75235 }, { "epoch": 0.83, "learning_rate": 3.6114802019665165e-05, "loss": 0.6254, "step": 75240 }, { "epoch": 0.83, "learning_rate": 3.6113879292526646e-05, "loss": 0.7094, "step": 75245 }, { "epoch": 0.83, "learning_rate": 3.611295656538814e-05, "loss": 0.7265, "step": 75250 }, { "epoch": 0.83, "learning_rate": 3.611203383824963e-05, "loss": 0.686, "step": 75255 }, { "epoch": 0.83, "learning_rate": 3.611111111111111e-05, "loss": 0.7169, "step": 75260 }, { "epoch": 0.83, "learning_rate": 3.61101883839726e-05, "loss": 0.6819, "step": 75265 }, { "epoch": 0.83, "learning_rate": 3.610926565683409e-05, "loss": 0.6651, "step": 75270 }, { "epoch": 0.83, "learning_rate": 3.610834292969558e-05, "loss": 0.7266, "step": 75275 }, { "epoch": 0.83, "learning_rate": 3.610742020255706e-05, "loss": 0.7606, "step": 75280 }, { "epoch": 0.83, "learning_rate": 3.610649747541855e-05, "loss": 0.7422, "step": 75285 }, { "epoch": 0.83, "learning_rate": 3.610557474828004e-05, "loss": 0.6507, "step": 75290 }, { "epoch": 0.83, "learning_rate": 3.6104652021141524e-05, "loss": 0.695, "step": 75295 }, { "epoch": 0.83, "learning_rate": 3.610372929400301e-05, "loss": 0.6725, "step": 75300 }, { "epoch": 0.83, "learning_rate": 3.61028065668645e-05, "loss": 0.7079, "step": 75305 }, { "epoch": 0.83, "learning_rate": 3.610188383972599e-05, "loss": 0.6702, "step": 75310 }, { "epoch": 0.83, "learning_rate": 3.6100961112587476e-05, "loss": 0.708, "step": 75315 }, { "epoch": 0.83, "learning_rate": 3.6100038385448964e-05, "loss": 0.7014, "step": 75320 }, { "epoch": 0.83, "learning_rate": 3.609911565831045e-05, "loss": 0.618, "step": 75325 }, { "epoch": 0.83, "learning_rate": 3.609819293117194e-05, "loss": 0.7129, "step": 75330 }, { "epoch": 0.83, "learning_rate": 3.609727020403343e-05, "loss": 0.745, "step": 75335 }, { "epoch": 0.83, "learning_rate": 3.6096347476894915e-05, "loss": 0.6878, "step": 75340 }, { "epoch": 0.83, "learning_rate": 3.60954247497564e-05, "loss": 0.7239, "step": 75345 }, { "epoch": 0.83, "learning_rate": 3.609450202261789e-05, "loss": 0.711, "step": 75350 }, { "epoch": 0.83, "learning_rate": 3.609357929547938e-05, "loss": 0.7396, "step": 75355 }, { "epoch": 0.83, "learning_rate": 3.6092656568340866e-05, "loss": 0.641, "step": 75360 }, { "epoch": 0.83, "learning_rate": 3.6091733841202354e-05, "loss": 0.7359, "step": 75365 }, { "epoch": 0.83, "learning_rate": 3.6090811114063835e-05, "loss": 0.6392, "step": 75370 }, { "epoch": 0.83, "learning_rate": 3.608988838692532e-05, "loss": 0.7576, "step": 75375 }, { "epoch": 0.83, "learning_rate": 3.608896565978682e-05, "loss": 0.6555, "step": 75380 }, { "epoch": 0.83, "learning_rate": 3.6088042932648306e-05, "loss": 0.7167, "step": 75385 }, { "epoch": 0.83, "learning_rate": 3.608712020550979e-05, "loss": 0.6787, "step": 75390 }, { "epoch": 0.83, "learning_rate": 3.6086197478371275e-05, "loss": 0.7265, "step": 75395 }, { "epoch": 0.83, "learning_rate": 3.608527475123277e-05, "loss": 0.7389, "step": 75400 }, { "epoch": 0.83, "learning_rate": 3.608435202409425e-05, "loss": 0.7347, "step": 75405 }, { "epoch": 0.83, "learning_rate": 3.608342929695574e-05, "loss": 0.7145, "step": 75410 }, { "epoch": 0.84, "learning_rate": 3.6082506569817226e-05, "loss": 0.7283, "step": 75415 }, { "epoch": 0.84, "learning_rate": 3.608158384267872e-05, "loss": 0.722, "step": 75420 }, { "epoch": 0.84, "learning_rate": 3.60806611155402e-05, "loss": 0.6694, "step": 75425 }, { "epoch": 0.84, "learning_rate": 3.607973838840169e-05, "loss": 0.7019, "step": 75430 }, { "epoch": 0.84, "learning_rate": 3.607881566126318e-05, "loss": 0.682, "step": 75435 }, { "epoch": 0.84, "learning_rate": 3.6077892934124665e-05, "loss": 0.7336, "step": 75440 }, { "epoch": 0.84, "learning_rate": 3.607697020698615e-05, "loss": 0.7268, "step": 75445 }, { "epoch": 0.84, "learning_rate": 3.607604747984764e-05, "loss": 0.7227, "step": 75450 }, { "epoch": 0.84, "learning_rate": 3.607512475270913e-05, "loss": 0.7192, "step": 75455 }, { "epoch": 0.84, "learning_rate": 3.607420202557062e-05, "loss": 0.7049, "step": 75460 }, { "epoch": 0.84, "learning_rate": 3.6073279298432104e-05, "loss": 0.6506, "step": 75465 }, { "epoch": 0.84, "learning_rate": 3.607235657129359e-05, "loss": 0.6787, "step": 75470 }, { "epoch": 0.84, "learning_rate": 3.607143384415508e-05, "loss": 0.7082, "step": 75475 }, { "epoch": 0.84, "learning_rate": 3.607051111701656e-05, "loss": 0.7045, "step": 75480 }, { "epoch": 0.84, "learning_rate": 3.6069588389878056e-05, "loss": 0.71, "step": 75485 }, { "epoch": 0.84, "learning_rate": 3.6068665662739544e-05, "loss": 0.7286, "step": 75490 }, { "epoch": 0.84, "learning_rate": 3.606774293560103e-05, "loss": 0.7389, "step": 75495 }, { "epoch": 0.84, "learning_rate": 3.606682020846251e-05, "loss": 0.7787, "step": 75500 }, { "epoch": 0.84, "learning_rate": 3.606589748132401e-05, "loss": 0.7189, "step": 75505 }, { "epoch": 0.84, "learning_rate": 3.6064974754185495e-05, "loss": 0.7041, "step": 75510 }, { "epoch": 0.84, "learning_rate": 3.6064052027046976e-05, "loss": 0.7336, "step": 75515 }, { "epoch": 0.84, "learning_rate": 3.6063129299908464e-05, "loss": 0.7005, "step": 75520 }, { "epoch": 0.84, "learning_rate": 3.606220657276995e-05, "loss": 0.6619, "step": 75525 }, { "epoch": 0.84, "learning_rate": 3.6061283845631447e-05, "loss": 0.7293, "step": 75530 }, { "epoch": 0.84, "learning_rate": 3.606036111849293e-05, "loss": 0.6634, "step": 75535 }, { "epoch": 0.84, "learning_rate": 3.6059438391354415e-05, "loss": 0.7117, "step": 75540 }, { "epoch": 0.84, "learning_rate": 3.60585156642159e-05, "loss": 0.6899, "step": 75545 }, { "epoch": 0.84, "learning_rate": 3.60575929370774e-05, "loss": 0.7234, "step": 75550 }, { "epoch": 0.84, "learning_rate": 3.605667020993888e-05, "loss": 0.6793, "step": 75555 }, { "epoch": 0.84, "learning_rate": 3.605574748280037e-05, "loss": 0.6649, "step": 75560 }, { "epoch": 0.84, "learning_rate": 3.6054824755661855e-05, "loss": 0.695, "step": 75565 }, { "epoch": 0.84, "learning_rate": 3.605390202852334e-05, "loss": 0.7319, "step": 75570 }, { "epoch": 0.84, "learning_rate": 3.605297930138483e-05, "loss": 0.7246, "step": 75575 }, { "epoch": 0.84, "learning_rate": 3.605205657424632e-05, "loss": 0.7051, "step": 75580 }, { "epoch": 0.84, "learning_rate": 3.6051133847107806e-05, "loss": 0.6881, "step": 75585 }, { "epoch": 0.84, "learning_rate": 3.6050211119969294e-05, "loss": 0.6988, "step": 75590 }, { "epoch": 0.84, "learning_rate": 3.604928839283078e-05, "loss": 0.7387, "step": 75595 }, { "epoch": 0.84, "learning_rate": 3.604836566569227e-05, "loss": 0.6978, "step": 75600 }, { "epoch": 0.84, "learning_rate": 3.604744293855376e-05, "loss": 0.7261, "step": 75605 }, { "epoch": 0.84, "learning_rate": 3.604652021141524e-05, "loss": 0.7015, "step": 75610 }, { "epoch": 0.84, "learning_rate": 3.604559748427673e-05, "loss": 0.7401, "step": 75615 }, { "epoch": 0.84, "learning_rate": 3.604467475713822e-05, "loss": 0.6735, "step": 75620 }, { "epoch": 0.84, "learning_rate": 3.604375202999971e-05, "loss": 0.6608, "step": 75625 }, { "epoch": 0.84, "learning_rate": 3.604282930286119e-05, "loss": 0.6388, "step": 75630 }, { "epoch": 0.84, "learning_rate": 3.6041906575722685e-05, "loss": 0.7094, "step": 75635 }, { "epoch": 0.84, "learning_rate": 3.604098384858417e-05, "loss": 0.7262, "step": 75640 }, { "epoch": 0.84, "learning_rate": 3.6040061121445653e-05, "loss": 0.7001, "step": 75645 }, { "epoch": 0.84, "learning_rate": 3.603913839430714e-05, "loss": 0.6515, "step": 75650 }, { "epoch": 0.84, "learning_rate": 3.6038215667168636e-05, "loss": 0.755, "step": 75655 }, { "epoch": 0.84, "learning_rate": 3.6037292940030124e-05, "loss": 0.7763, "step": 75660 }, { "epoch": 0.84, "learning_rate": 3.6036370212891605e-05, "loss": 0.6769, "step": 75665 }, { "epoch": 0.84, "learning_rate": 3.603544748575309e-05, "loss": 0.7163, "step": 75670 }, { "epoch": 0.84, "learning_rate": 3.603452475861458e-05, "loss": 0.6187, "step": 75675 }, { "epoch": 0.84, "learning_rate": 3.603360203147607e-05, "loss": 0.6536, "step": 75680 }, { "epoch": 0.84, "learning_rate": 3.6032679304337556e-05, "loss": 0.7251, "step": 75685 }, { "epoch": 0.84, "learning_rate": 3.6031756577199044e-05, "loss": 0.7359, "step": 75690 }, { "epoch": 0.84, "learning_rate": 3.603083385006053e-05, "loss": 0.7215, "step": 75695 }, { "epoch": 0.84, "learning_rate": 3.602991112292202e-05, "loss": 0.7071, "step": 75700 }, { "epoch": 0.84, "learning_rate": 3.602898839578351e-05, "loss": 0.6577, "step": 75705 }, { "epoch": 0.84, "learning_rate": 3.6028065668644996e-05, "loss": 0.6368, "step": 75710 }, { "epoch": 0.84, "learning_rate": 3.602714294150648e-05, "loss": 0.7414, "step": 75715 }, { "epoch": 0.84, "learning_rate": 3.602622021436797e-05, "loss": 0.6827, "step": 75720 }, { "epoch": 0.84, "learning_rate": 3.602529748722946e-05, "loss": 0.7347, "step": 75725 }, { "epoch": 0.84, "learning_rate": 3.602437476009095e-05, "loss": 0.713, "step": 75730 }, { "epoch": 0.84, "learning_rate": 3.6023452032952435e-05, "loss": 0.7091, "step": 75735 }, { "epoch": 0.84, "learning_rate": 3.602252930581392e-05, "loss": 0.7115, "step": 75740 }, { "epoch": 0.84, "learning_rate": 3.602160657867541e-05, "loss": 0.7976, "step": 75745 }, { "epoch": 0.84, "learning_rate": 3.60206838515369e-05, "loss": 0.7132, "step": 75750 }, { "epoch": 0.84, "learning_rate": 3.601976112439838e-05, "loss": 0.7015, "step": 75755 }, { "epoch": 0.84, "learning_rate": 3.601883839725987e-05, "loss": 0.641, "step": 75760 }, { "epoch": 0.84, "learning_rate": 3.601791567012136e-05, "loss": 0.6785, "step": 75765 }, { "epoch": 0.84, "learning_rate": 3.601699294298285e-05, "loss": 0.7877, "step": 75770 }, { "epoch": 0.84, "learning_rate": 3.601607021584433e-05, "loss": 0.7103, "step": 75775 }, { "epoch": 0.84, "learning_rate": 3.601514748870582e-05, "loss": 0.7242, "step": 75780 }, { "epoch": 0.84, "learning_rate": 3.601422476156731e-05, "loss": 0.7743, "step": 75785 }, { "epoch": 0.84, "learning_rate": 3.6013302034428794e-05, "loss": 0.7242, "step": 75790 }, { "epoch": 0.84, "learning_rate": 3.601237930729028e-05, "loss": 0.6895, "step": 75795 }, { "epoch": 0.84, "learning_rate": 3.601145658015177e-05, "loss": 0.7196, "step": 75800 }, { "epoch": 0.84, "learning_rate": 3.6010533853013265e-05, "loss": 0.6931, "step": 75805 }, { "epoch": 0.84, "learning_rate": 3.6009611125874746e-05, "loss": 0.6465, "step": 75810 }, { "epoch": 0.84, "learning_rate": 3.6008688398736234e-05, "loss": 0.6488, "step": 75815 }, { "epoch": 0.84, "learning_rate": 3.600776567159772e-05, "loss": 0.6753, "step": 75820 }, { "epoch": 0.84, "learning_rate": 3.600684294445921e-05, "loss": 0.7379, "step": 75825 }, { "epoch": 0.84, "learning_rate": 3.60059202173207e-05, "loss": 0.7133, "step": 75830 }, { "epoch": 0.84, "learning_rate": 3.6004997490182185e-05, "loss": 0.7588, "step": 75835 }, { "epoch": 0.84, "learning_rate": 3.600407476304367e-05, "loss": 0.7052, "step": 75840 }, { "epoch": 0.84, "learning_rate": 3.600315203590516e-05, "loss": 0.6726, "step": 75845 }, { "epoch": 0.84, "learning_rate": 3.600222930876665e-05, "loss": 0.6832, "step": 75850 }, { "epoch": 0.84, "learning_rate": 3.6001306581628136e-05, "loss": 0.7033, "step": 75855 }, { "epoch": 0.84, "learning_rate": 3.6000383854489624e-05, "loss": 0.7691, "step": 75860 }, { "epoch": 0.84, "learning_rate": 3.5999461127351105e-05, "loss": 0.7247, "step": 75865 }, { "epoch": 0.84, "learning_rate": 3.59985384002126e-05, "loss": 0.7251, "step": 75870 }, { "epoch": 0.84, "learning_rate": 3.599761567307409e-05, "loss": 0.6587, "step": 75875 }, { "epoch": 0.84, "learning_rate": 3.5996692945935576e-05, "loss": 0.716, "step": 75880 }, { "epoch": 0.84, "learning_rate": 3.599577021879706e-05, "loss": 0.7305, "step": 75885 }, { "epoch": 0.84, "learning_rate": 3.599484749165855e-05, "loss": 0.6512, "step": 75890 }, { "epoch": 0.84, "learning_rate": 3.599392476452004e-05, "loss": 0.7187, "step": 75895 }, { "epoch": 0.84, "learning_rate": 3.599300203738152e-05, "loss": 0.6743, "step": 75900 }, { "epoch": 0.84, "learning_rate": 3.599207931024301e-05, "loss": 0.749, "step": 75905 }, { "epoch": 0.84, "learning_rate": 3.5991156583104496e-05, "loss": 0.6994, "step": 75910 }, { "epoch": 0.84, "learning_rate": 3.599023385596599e-05, "loss": 0.6668, "step": 75915 }, { "epoch": 0.84, "learning_rate": 3.598931112882747e-05, "loss": 0.72, "step": 75920 }, { "epoch": 0.84, "learning_rate": 3.598838840168896e-05, "loss": 0.6909, "step": 75925 }, { "epoch": 0.84, "learning_rate": 3.598746567455045e-05, "loss": 0.7186, "step": 75930 }, { "epoch": 0.84, "learning_rate": 3.598654294741194e-05, "loss": 0.6971, "step": 75935 }, { "epoch": 0.84, "learning_rate": 3.598562022027342e-05, "loss": 0.684, "step": 75940 }, { "epoch": 0.84, "learning_rate": 3.598469749313491e-05, "loss": 0.7269, "step": 75945 }, { "epoch": 0.84, "learning_rate": 3.59837747659964e-05, "loss": 0.6818, "step": 75950 }, { "epoch": 0.84, "learning_rate": 3.5982852038857887e-05, "loss": 0.6235, "step": 75955 }, { "epoch": 0.84, "learning_rate": 3.5981929311719374e-05, "loss": 0.6752, "step": 75960 }, { "epoch": 0.84, "learning_rate": 3.598100658458086e-05, "loss": 0.6864, "step": 75965 }, { "epoch": 0.84, "learning_rate": 3.598008385744235e-05, "loss": 0.725, "step": 75970 }, { "epoch": 0.84, "learning_rate": 3.597916113030384e-05, "loss": 0.6944, "step": 75975 }, { "epoch": 0.84, "learning_rate": 3.5978238403165326e-05, "loss": 0.7319, "step": 75980 }, { "epoch": 0.84, "learning_rate": 3.5977315676026814e-05, "loss": 0.694, "step": 75985 }, { "epoch": 0.84, "learning_rate": 3.59763929488883e-05, "loss": 0.7097, "step": 75990 }, { "epoch": 0.84, "learning_rate": 3.597547022174978e-05, "loss": 0.6812, "step": 75995 }, { "epoch": 0.84, "learning_rate": 3.597454749461128e-05, "loss": 0.6762, "step": 76000 }, { "epoch": 0.84, "eval_loss": 0.6638379096984863, "eval_runtime": 69.2691, "eval_samples_per_second": 28.873, "eval_steps_per_second": 14.436, "step": 76000 }, { "epoch": 0.84, "learning_rate": 3.5973624767472765e-05, "loss": 0.6361, "step": 76005 }, { "epoch": 0.84, "learning_rate": 3.597270204033425e-05, "loss": 0.7055, "step": 76010 }, { "epoch": 0.84, "learning_rate": 3.5971779313195734e-05, "loss": 0.719, "step": 76015 }, { "epoch": 0.84, "learning_rate": 3.597085658605723e-05, "loss": 0.737, "step": 76020 }, { "epoch": 0.84, "learning_rate": 3.5969933858918716e-05, "loss": 0.7276, "step": 76025 }, { "epoch": 0.84, "learning_rate": 3.59690111317802e-05, "loss": 0.7153, "step": 76030 }, { "epoch": 0.84, "learning_rate": 3.5968088404641685e-05, "loss": 0.6778, "step": 76035 }, { "epoch": 0.84, "learning_rate": 3.596716567750318e-05, "loss": 0.7265, "step": 76040 }, { "epoch": 0.84, "learning_rate": 3.596624295036467e-05, "loss": 0.7031, "step": 76045 }, { "epoch": 0.84, "learning_rate": 3.596532022322615e-05, "loss": 0.6758, "step": 76050 }, { "epoch": 0.84, "learning_rate": 3.596439749608764e-05, "loss": 0.7846, "step": 76055 }, { "epoch": 0.84, "learning_rate": 3.5963474768949125e-05, "loss": 0.6728, "step": 76060 }, { "epoch": 0.84, "learning_rate": 3.596255204181061e-05, "loss": 0.7064, "step": 76065 }, { "epoch": 0.84, "learning_rate": 3.59616293146721e-05, "loss": 0.6923, "step": 76070 }, { "epoch": 0.84, "learning_rate": 3.596070658753359e-05, "loss": 0.7039, "step": 76075 }, { "epoch": 0.84, "learning_rate": 3.5959783860395076e-05, "loss": 0.715, "step": 76080 }, { "epoch": 0.84, "learning_rate": 3.5958861133256564e-05, "loss": 0.7026, "step": 76085 }, { "epoch": 0.84, "learning_rate": 3.595793840611805e-05, "loss": 0.6551, "step": 76090 }, { "epoch": 0.84, "learning_rate": 3.595701567897954e-05, "loss": 0.7046, "step": 76095 }, { "epoch": 0.84, "learning_rate": 3.595609295184103e-05, "loss": 0.774, "step": 76100 }, { "epoch": 0.84, "learning_rate": 3.5955170224702515e-05, "loss": 0.6501, "step": 76105 }, { "epoch": 0.84, "learning_rate": 3.5954247497564e-05, "loss": 0.7414, "step": 76110 }, { "epoch": 0.84, "learning_rate": 3.595332477042549e-05, "loss": 0.6893, "step": 76115 }, { "epoch": 0.84, "learning_rate": 3.595240204328698e-05, "loss": 0.6725, "step": 76120 }, { "epoch": 0.84, "learning_rate": 3.595147931614847e-05, "loss": 0.7354, "step": 76125 }, { "epoch": 0.84, "learning_rate": 3.5950556589009954e-05, "loss": 0.7276, "step": 76130 }, { "epoch": 0.84, "learning_rate": 3.594963386187144e-05, "loss": 0.7099, "step": 76135 }, { "epoch": 0.84, "learning_rate": 3.594871113473292e-05, "loss": 0.694, "step": 76140 }, { "epoch": 0.84, "learning_rate": 3.594778840759441e-05, "loss": 0.7111, "step": 76145 }, { "epoch": 0.84, "learning_rate": 3.5946865680455906e-05, "loss": 0.7041, "step": 76150 }, { "epoch": 0.84, "learning_rate": 3.5945942953317394e-05, "loss": 0.6651, "step": 76155 }, { "epoch": 0.84, "learning_rate": 3.5945020226178875e-05, "loss": 0.6539, "step": 76160 }, { "epoch": 0.84, "learning_rate": 3.594409749904036e-05, "loss": 0.7168, "step": 76165 }, { "epoch": 0.84, "learning_rate": 3.594317477190186e-05, "loss": 0.6593, "step": 76170 }, { "epoch": 0.84, "learning_rate": 3.594225204476334e-05, "loss": 0.675, "step": 76175 }, { "epoch": 0.84, "learning_rate": 3.5941329317624826e-05, "loss": 0.6954, "step": 76180 }, { "epoch": 0.84, "learning_rate": 3.5940406590486314e-05, "loss": 0.6981, "step": 76185 }, { "epoch": 0.84, "learning_rate": 3.593948386334781e-05, "loss": 0.7477, "step": 76190 }, { "epoch": 0.84, "learning_rate": 3.593856113620929e-05, "loss": 0.6989, "step": 76195 }, { "epoch": 0.84, "learning_rate": 3.593763840907078e-05, "loss": 0.7303, "step": 76200 }, { "epoch": 0.84, "learning_rate": 3.5936715681932265e-05, "loss": 0.6534, "step": 76205 }, { "epoch": 0.84, "learning_rate": 3.593579295479375e-05, "loss": 0.7236, "step": 76210 }, { "epoch": 0.84, "learning_rate": 3.593487022765524e-05, "loss": 0.7425, "step": 76215 }, { "epoch": 0.84, "learning_rate": 3.593394750051673e-05, "loss": 0.7081, "step": 76220 }, { "epoch": 0.84, "learning_rate": 3.593302477337822e-05, "loss": 0.6647, "step": 76225 }, { "epoch": 0.84, "learning_rate": 3.5932102046239705e-05, "loss": 0.633, "step": 76230 }, { "epoch": 0.84, "learning_rate": 3.593117931910119e-05, "loss": 0.7212, "step": 76235 }, { "epoch": 0.84, "learning_rate": 3.593025659196268e-05, "loss": 0.6772, "step": 76240 }, { "epoch": 0.84, "learning_rate": 3.592933386482417e-05, "loss": 0.6941, "step": 76245 }, { "epoch": 0.84, "learning_rate": 3.592841113768565e-05, "loss": 0.7289, "step": 76250 }, { "epoch": 0.84, "learning_rate": 3.5927488410547144e-05, "loss": 0.7532, "step": 76255 }, { "epoch": 0.84, "learning_rate": 3.592656568340863e-05, "loss": 0.6241, "step": 76260 }, { "epoch": 0.84, "learning_rate": 3.592564295627012e-05, "loss": 0.7058, "step": 76265 }, { "epoch": 0.84, "learning_rate": 3.59247202291316e-05, "loss": 0.7638, "step": 76270 }, { "epoch": 0.84, "learning_rate": 3.5923797501993095e-05, "loss": 0.6794, "step": 76275 }, { "epoch": 0.84, "learning_rate": 3.592287477485458e-05, "loss": 0.6964, "step": 76280 }, { "epoch": 0.84, "learning_rate": 3.5921952047716064e-05, "loss": 0.7027, "step": 76285 }, { "epoch": 0.84, "learning_rate": 3.592102932057755e-05, "loss": 0.6748, "step": 76290 }, { "epoch": 0.84, "learning_rate": 3.592010659343904e-05, "loss": 0.7098, "step": 76295 }, { "epoch": 0.84, "learning_rate": 3.5919183866300535e-05, "loss": 0.6555, "step": 76300 }, { "epoch": 0.84, "learning_rate": 3.5918261139162016e-05, "loss": 0.6691, "step": 76305 }, { "epoch": 0.84, "learning_rate": 3.5917338412023503e-05, "loss": 0.6931, "step": 76310 }, { "epoch": 0.85, "learning_rate": 3.591641568488499e-05, "loss": 0.691, "step": 76315 }, { "epoch": 0.85, "learning_rate": 3.5915492957746486e-05, "loss": 0.6835, "step": 76320 }, { "epoch": 0.85, "learning_rate": 3.591457023060797e-05, "loss": 0.6934, "step": 76325 }, { "epoch": 0.85, "learning_rate": 3.5913647503469455e-05, "loss": 0.7516, "step": 76330 }, { "epoch": 0.85, "learning_rate": 3.591272477633094e-05, "loss": 0.7192, "step": 76335 }, { "epoch": 0.85, "learning_rate": 3.591180204919243e-05, "loss": 0.7345, "step": 76340 }, { "epoch": 0.85, "learning_rate": 3.591087932205392e-05, "loss": 0.722, "step": 76345 }, { "epoch": 0.85, "learning_rate": 3.5909956594915406e-05, "loss": 0.6603, "step": 76350 }, { "epoch": 0.85, "learning_rate": 3.5909033867776894e-05, "loss": 0.6502, "step": 76355 }, { "epoch": 0.85, "learning_rate": 3.5908111140638375e-05, "loss": 0.6443, "step": 76360 }, { "epoch": 0.85, "learning_rate": 3.590718841349987e-05, "loss": 0.7226, "step": 76365 }, { "epoch": 0.85, "learning_rate": 3.590626568636136e-05, "loss": 0.6512, "step": 76370 }, { "epoch": 0.85, "learning_rate": 3.5905342959222845e-05, "loss": 0.7464, "step": 76375 }, { "epoch": 0.85, "learning_rate": 3.5904420232084327e-05, "loss": 0.6841, "step": 76380 }, { "epoch": 0.85, "learning_rate": 3.590349750494582e-05, "loss": 0.7286, "step": 76385 }, { "epoch": 0.85, "learning_rate": 3.590257477780731e-05, "loss": 0.6769, "step": 76390 }, { "epoch": 0.85, "learning_rate": 3.59016520506688e-05, "loss": 0.7042, "step": 76395 }, { "epoch": 0.85, "learning_rate": 3.590072932353028e-05, "loss": 0.6814, "step": 76400 }, { "epoch": 0.85, "learning_rate": 3.589980659639177e-05, "loss": 0.6891, "step": 76405 }, { "epoch": 0.85, "learning_rate": 3.589888386925326e-05, "loss": 0.7055, "step": 76410 }, { "epoch": 0.85, "learning_rate": 3.589796114211474e-05, "loss": 0.7019, "step": 76415 }, { "epoch": 0.85, "learning_rate": 3.589703841497623e-05, "loss": 0.7218, "step": 76420 }, { "epoch": 0.85, "learning_rate": 3.5896115687837724e-05, "loss": 0.7338, "step": 76425 }, { "epoch": 0.85, "learning_rate": 3.589519296069921e-05, "loss": 0.7006, "step": 76430 }, { "epoch": 0.85, "learning_rate": 3.589427023356069e-05, "loss": 0.716, "step": 76435 }, { "epoch": 0.85, "learning_rate": 3.589334750642218e-05, "loss": 0.7524, "step": 76440 }, { "epoch": 0.85, "learning_rate": 3.589242477928367e-05, "loss": 0.7261, "step": 76445 }, { "epoch": 0.85, "learning_rate": 3.5891502052145156e-05, "loss": 0.693, "step": 76450 }, { "epoch": 0.85, "learning_rate": 3.5890579325006644e-05, "loss": 0.7427, "step": 76455 }, { "epoch": 0.85, "learning_rate": 3.588965659786813e-05, "loss": 0.7097, "step": 76460 }, { "epoch": 0.85, "learning_rate": 3.588873387072962e-05, "loss": 0.7319, "step": 76465 }, { "epoch": 0.85, "learning_rate": 3.588781114359111e-05, "loss": 0.7055, "step": 76470 }, { "epoch": 0.85, "learning_rate": 3.5886888416452596e-05, "loss": 0.7599, "step": 76475 }, { "epoch": 0.85, "learning_rate": 3.5885965689314084e-05, "loss": 0.6929, "step": 76480 }, { "epoch": 0.85, "learning_rate": 3.588504296217557e-05, "loss": 0.6964, "step": 76485 }, { "epoch": 0.85, "learning_rate": 3.588412023503706e-05, "loss": 0.7462, "step": 76490 }, { "epoch": 0.85, "learning_rate": 3.588319750789855e-05, "loss": 0.6933, "step": 76495 }, { "epoch": 0.85, "learning_rate": 3.5882274780760035e-05, "loss": 0.7106, "step": 76500 }, { "epoch": 0.85, "learning_rate": 3.588135205362152e-05, "loss": 0.7294, "step": 76505 }, { "epoch": 0.85, "learning_rate": 3.5880429326483004e-05, "loss": 0.6217, "step": 76510 }, { "epoch": 0.85, "learning_rate": 3.58795065993445e-05, "loss": 0.6943, "step": 76515 }, { "epoch": 0.85, "learning_rate": 3.5878583872205986e-05, "loss": 0.7545, "step": 76520 }, { "epoch": 0.85, "learning_rate": 3.587766114506747e-05, "loss": 0.6987, "step": 76525 }, { "epoch": 0.85, "learning_rate": 3.5876738417928955e-05, "loss": 0.6841, "step": 76530 }, { "epoch": 0.85, "learning_rate": 3.587581569079045e-05, "loss": 0.7115, "step": 76535 }, { "epoch": 0.85, "learning_rate": 3.587489296365194e-05, "loss": 0.7025, "step": 76540 }, { "epoch": 0.85, "learning_rate": 3.587397023651342e-05, "loss": 0.7095, "step": 76545 }, { "epoch": 0.85, "learning_rate": 3.587304750937491e-05, "loss": 0.6342, "step": 76550 }, { "epoch": 0.85, "learning_rate": 3.58721247822364e-05, "loss": 0.6952, "step": 76555 }, { "epoch": 0.85, "learning_rate": 3.587120205509788e-05, "loss": 0.7412, "step": 76560 }, { "epoch": 0.85, "learning_rate": 3.587027932795937e-05, "loss": 0.6868, "step": 76565 }, { "epoch": 0.85, "learning_rate": 3.586935660082086e-05, "loss": 0.6429, "step": 76570 }, { "epoch": 0.85, "learning_rate": 3.586843387368235e-05, "loss": 0.6642, "step": 76575 }, { "epoch": 0.85, "learning_rate": 3.5867511146543834e-05, "loss": 0.711, "step": 76580 }, { "epoch": 0.85, "learning_rate": 3.586658841940532e-05, "loss": 0.7934, "step": 76585 }, { "epoch": 0.85, "learning_rate": 3.586566569226681e-05, "loss": 0.6835, "step": 76590 }, { "epoch": 0.85, "learning_rate": 3.58647429651283e-05, "loss": 0.6987, "step": 76595 }, { "epoch": 0.85, "learning_rate": 3.5863820237989785e-05, "loss": 0.675, "step": 76600 }, { "epoch": 0.85, "learning_rate": 3.586289751085127e-05, "loss": 0.7303, "step": 76605 }, { "epoch": 0.85, "learning_rate": 3.586197478371276e-05, "loss": 0.6814, "step": 76610 }, { "epoch": 0.85, "learning_rate": 3.586105205657425e-05, "loss": 0.7182, "step": 76615 }, { "epoch": 0.85, "learning_rate": 3.5860129329435737e-05, "loss": 0.6892, "step": 76620 }, { "epoch": 0.85, "learning_rate": 3.5859206602297224e-05, "loss": 0.6944, "step": 76625 }, { "epoch": 0.85, "learning_rate": 3.585828387515871e-05, "loss": 0.6579, "step": 76630 }, { "epoch": 0.85, "learning_rate": 3.585736114802019e-05, "loss": 0.7206, "step": 76635 }, { "epoch": 0.85, "learning_rate": 3.585643842088169e-05, "loss": 0.6796, "step": 76640 }, { "epoch": 0.85, "learning_rate": 3.5855515693743176e-05, "loss": 0.7055, "step": 76645 }, { "epoch": 0.85, "learning_rate": 3.5854592966604664e-05, "loss": 0.7116, "step": 76650 }, { "epoch": 0.85, "learning_rate": 3.5853670239466145e-05, "loss": 0.7017, "step": 76655 }, { "epoch": 0.85, "learning_rate": 3.585274751232763e-05, "loss": 0.6723, "step": 76660 }, { "epoch": 0.85, "learning_rate": 3.585182478518913e-05, "loss": 0.7035, "step": 76665 }, { "epoch": 0.85, "learning_rate": 3.5850902058050615e-05, "loss": 0.6839, "step": 76670 }, { "epoch": 0.85, "learning_rate": 3.5849979330912096e-05, "loss": 0.7186, "step": 76675 }, { "epoch": 0.85, "learning_rate": 3.5849056603773584e-05, "loss": 0.726, "step": 76680 }, { "epoch": 0.85, "learning_rate": 3.584813387663508e-05, "loss": 0.6718, "step": 76685 }, { "epoch": 0.85, "learning_rate": 3.584721114949656e-05, "loss": 0.6718, "step": 76690 }, { "epoch": 0.85, "learning_rate": 3.584628842235805e-05, "loss": 0.6312, "step": 76695 }, { "epoch": 0.85, "learning_rate": 3.5845365695219535e-05, "loss": 0.7067, "step": 76700 }, { "epoch": 0.85, "learning_rate": 3.584444296808103e-05, "loss": 0.7782, "step": 76705 }, { "epoch": 0.85, "learning_rate": 3.584352024094251e-05, "loss": 0.6442, "step": 76710 }, { "epoch": 0.85, "learning_rate": 3.5842597513804e-05, "loss": 0.6696, "step": 76715 }, { "epoch": 0.85, "learning_rate": 3.584167478666549e-05, "loss": 0.7222, "step": 76720 }, { "epoch": 0.85, "learning_rate": 3.5840752059526975e-05, "loss": 0.6785, "step": 76725 }, { "epoch": 0.85, "learning_rate": 3.583982933238846e-05, "loss": 0.6634, "step": 76730 }, { "epoch": 0.85, "learning_rate": 3.583890660524995e-05, "loss": 0.6651, "step": 76735 }, { "epoch": 0.85, "learning_rate": 3.583798387811144e-05, "loss": 0.6526, "step": 76740 }, { "epoch": 0.85, "learning_rate": 3.5837061150972926e-05, "loss": 0.7064, "step": 76745 }, { "epoch": 0.85, "learning_rate": 3.5836138423834414e-05, "loss": 0.7338, "step": 76750 }, { "epoch": 0.85, "learning_rate": 3.58352156966959e-05, "loss": 0.708, "step": 76755 }, { "epoch": 0.85, "learning_rate": 3.583429296955739e-05, "loss": 0.6946, "step": 76760 }, { "epoch": 0.85, "learning_rate": 3.583337024241887e-05, "loss": 0.7128, "step": 76765 }, { "epoch": 0.85, "learning_rate": 3.5832447515280365e-05, "loss": 0.6914, "step": 76770 }, { "epoch": 0.85, "learning_rate": 3.583152478814185e-05, "loss": 0.6923, "step": 76775 }, { "epoch": 0.85, "learning_rate": 3.583060206100334e-05, "loss": 0.6737, "step": 76780 }, { "epoch": 0.85, "learning_rate": 3.582967933386482e-05, "loss": 0.7596, "step": 76785 }, { "epoch": 0.85, "learning_rate": 3.5828756606726317e-05, "loss": 0.7049, "step": 76790 }, { "epoch": 0.85, "learning_rate": 3.5827833879587804e-05, "loss": 0.6833, "step": 76795 }, { "epoch": 0.85, "learning_rate": 3.5826911152449286e-05, "loss": 0.7077, "step": 76800 }, { "epoch": 0.85, "learning_rate": 3.582598842531077e-05, "loss": 0.7004, "step": 76805 }, { "epoch": 0.85, "learning_rate": 3.582506569817227e-05, "loss": 0.6866, "step": 76810 }, { "epoch": 0.85, "learning_rate": 3.5824142971033756e-05, "loss": 0.6765, "step": 76815 }, { "epoch": 0.85, "learning_rate": 3.582322024389524e-05, "loss": 0.663, "step": 76820 }, { "epoch": 0.85, "learning_rate": 3.5822297516756725e-05, "loss": 0.7148, "step": 76825 }, { "epoch": 0.85, "learning_rate": 3.582137478961821e-05, "loss": 0.6885, "step": 76830 }, { "epoch": 0.85, "learning_rate": 3.58204520624797e-05, "loss": 0.7175, "step": 76835 }, { "epoch": 0.85, "learning_rate": 3.581952933534119e-05, "loss": 0.6967, "step": 76840 }, { "epoch": 0.85, "learning_rate": 3.5818606608202676e-05, "loss": 0.6756, "step": 76845 }, { "epoch": 0.85, "learning_rate": 3.5817683881064164e-05, "loss": 0.6921, "step": 76850 }, { "epoch": 0.85, "learning_rate": 3.581676115392565e-05, "loss": 0.6068, "step": 76855 }, { "epoch": 0.85, "learning_rate": 3.581583842678714e-05, "loss": 0.7041, "step": 76860 }, { "epoch": 0.85, "learning_rate": 3.581491569964863e-05, "loss": 0.6967, "step": 76865 }, { "epoch": 0.85, "learning_rate": 3.5813992972510115e-05, "loss": 0.7431, "step": 76870 }, { "epoch": 0.85, "learning_rate": 3.58130702453716e-05, "loss": 0.7272, "step": 76875 }, { "epoch": 0.85, "learning_rate": 3.581214751823309e-05, "loss": 0.6967, "step": 76880 }, { "epoch": 0.85, "learning_rate": 3.581122479109458e-05, "loss": 0.7242, "step": 76885 }, { "epoch": 0.85, "learning_rate": 3.581030206395607e-05, "loss": 0.7376, "step": 76890 }, { "epoch": 0.85, "learning_rate": 3.580937933681755e-05, "loss": 0.6968, "step": 76895 }, { "epoch": 0.85, "learning_rate": 3.580845660967904e-05, "loss": 0.7565, "step": 76900 }, { "epoch": 0.85, "learning_rate": 3.580753388254053e-05, "loss": 0.7023, "step": 76905 }, { "epoch": 0.85, "learning_rate": 3.580661115540201e-05, "loss": 0.7022, "step": 76910 }, { "epoch": 0.85, "learning_rate": 3.58056884282635e-05, "loss": 0.6844, "step": 76915 }, { "epoch": 0.85, "learning_rate": 3.5804765701124994e-05, "loss": 0.6974, "step": 76920 }, { "epoch": 0.85, "learning_rate": 3.580384297398648e-05, "loss": 0.7385, "step": 76925 }, { "epoch": 0.85, "learning_rate": 3.580292024684796e-05, "loss": 0.6935, "step": 76930 }, { "epoch": 0.85, "learning_rate": 3.580199751970945e-05, "loss": 0.6797, "step": 76935 }, { "epoch": 0.85, "learning_rate": 3.5801074792570945e-05, "loss": 0.7259, "step": 76940 }, { "epoch": 0.85, "learning_rate": 3.5800152065432426e-05, "loss": 0.7756, "step": 76945 }, { "epoch": 0.85, "learning_rate": 3.5799229338293914e-05, "loss": 0.6822, "step": 76950 }, { "epoch": 0.85, "learning_rate": 3.57983066111554e-05, "loss": 0.6785, "step": 76955 }, { "epoch": 0.85, "learning_rate": 3.57973838840169e-05, "loss": 0.7452, "step": 76960 }, { "epoch": 0.85, "learning_rate": 3.579646115687838e-05, "loss": 0.7178, "step": 76965 }, { "epoch": 0.85, "learning_rate": 3.5795538429739866e-05, "loss": 0.6946, "step": 76970 }, { "epoch": 0.85, "learning_rate": 3.5794615702601353e-05, "loss": 0.6723, "step": 76975 }, { "epoch": 0.85, "learning_rate": 3.579369297546284e-05, "loss": 0.6906, "step": 76980 }, { "epoch": 0.85, "learning_rate": 3.579277024832433e-05, "loss": 0.7439, "step": 76985 }, { "epoch": 0.85, "learning_rate": 3.579184752118582e-05, "loss": 0.6994, "step": 76990 }, { "epoch": 0.85, "learning_rate": 3.5790924794047305e-05, "loss": 0.7121, "step": 76995 }, { "epoch": 0.85, "learning_rate": 3.579000206690879e-05, "loss": 0.7509, "step": 77000 }, { "epoch": 0.85, "eval_loss": 0.6541317105293274, "eval_runtime": 69.2737, "eval_samples_per_second": 28.871, "eval_steps_per_second": 14.435, "step": 77000 }, { "epoch": 0.85, "learning_rate": 3.578907933977028e-05, "loss": 0.7083, "step": 77005 }, { "epoch": 0.85, "learning_rate": 3.578815661263177e-05, "loss": 0.6989, "step": 77010 }, { "epoch": 0.85, "learning_rate": 3.5787233885493256e-05, "loss": 0.6654, "step": 77015 }, { "epoch": 0.85, "learning_rate": 3.578631115835474e-05, "loss": 0.6898, "step": 77020 }, { "epoch": 0.85, "learning_rate": 3.578538843121623e-05, "loss": 0.726, "step": 77025 }, { "epoch": 0.85, "learning_rate": 3.578446570407772e-05, "loss": 0.6909, "step": 77030 }, { "epoch": 0.85, "learning_rate": 3.578354297693921e-05, "loss": 0.6893, "step": 77035 }, { "epoch": 0.85, "learning_rate": 3.578262024980069e-05, "loss": 0.7204, "step": 77040 }, { "epoch": 0.85, "learning_rate": 3.5781697522662177e-05, "loss": 0.7715, "step": 77045 }, { "epoch": 0.85, "learning_rate": 3.578077479552367e-05, "loss": 0.6994, "step": 77050 }, { "epoch": 0.85, "learning_rate": 3.577985206838516e-05, "loss": 0.7118, "step": 77055 }, { "epoch": 0.85, "learning_rate": 3.577892934124664e-05, "loss": 0.7097, "step": 77060 }, { "epoch": 0.85, "learning_rate": 3.577800661410813e-05, "loss": 0.741, "step": 77065 }, { "epoch": 0.85, "learning_rate": 3.577708388696962e-05, "loss": 0.6874, "step": 77070 }, { "epoch": 0.85, "learning_rate": 3.5776161159831104e-05, "loss": 0.7613, "step": 77075 }, { "epoch": 0.85, "learning_rate": 3.577523843269259e-05, "loss": 0.7119, "step": 77080 }, { "epoch": 0.85, "learning_rate": 3.577431570555408e-05, "loss": 0.7223, "step": 77085 }, { "epoch": 0.85, "learning_rate": 3.5773392978415574e-05, "loss": 0.6802, "step": 77090 }, { "epoch": 0.85, "learning_rate": 3.5772470251277055e-05, "loss": 0.7663, "step": 77095 }, { "epoch": 0.85, "learning_rate": 3.577154752413854e-05, "loss": 0.6799, "step": 77100 }, { "epoch": 0.85, "learning_rate": 3.577062479700003e-05, "loss": 0.6919, "step": 77105 }, { "epoch": 0.85, "learning_rate": 3.576970206986152e-05, "loss": 0.7316, "step": 77110 }, { "epoch": 0.85, "learning_rate": 3.5768779342723006e-05, "loss": 0.7096, "step": 77115 }, { "epoch": 0.85, "learning_rate": 3.5767856615584494e-05, "loss": 0.6876, "step": 77120 }, { "epoch": 0.85, "learning_rate": 3.576693388844598e-05, "loss": 0.7042, "step": 77125 }, { "epoch": 0.85, "learning_rate": 3.576601116130747e-05, "loss": 0.7081, "step": 77130 }, { "epoch": 0.85, "learning_rate": 3.576508843416896e-05, "loss": 0.6963, "step": 77135 }, { "epoch": 0.85, "learning_rate": 3.5764165707030446e-05, "loss": 0.6865, "step": 77140 }, { "epoch": 0.85, "learning_rate": 3.5763242979891934e-05, "loss": 0.6925, "step": 77145 }, { "epoch": 0.85, "learning_rate": 3.5762320252753415e-05, "loss": 0.7215, "step": 77150 }, { "epoch": 0.85, "learning_rate": 3.576139752561491e-05, "loss": 0.7419, "step": 77155 }, { "epoch": 0.85, "learning_rate": 3.57604747984764e-05, "loss": 0.7069, "step": 77160 }, { "epoch": 0.85, "learning_rate": 3.5759552071337885e-05, "loss": 0.7175, "step": 77165 }, { "epoch": 0.85, "learning_rate": 3.5758629344199366e-05, "loss": 0.6677, "step": 77170 }, { "epoch": 0.85, "learning_rate": 3.575770661706086e-05, "loss": 0.7182, "step": 77175 }, { "epoch": 0.85, "learning_rate": 3.575678388992235e-05, "loss": 0.7011, "step": 77180 }, { "epoch": 0.85, "learning_rate": 3.575586116278383e-05, "loss": 0.7331, "step": 77185 }, { "epoch": 0.85, "learning_rate": 3.575493843564532e-05, "loss": 0.6989, "step": 77190 }, { "epoch": 0.85, "learning_rate": 3.5754015708506805e-05, "loss": 0.6517, "step": 77195 }, { "epoch": 0.85, "learning_rate": 3.57530929813683e-05, "loss": 0.6799, "step": 77200 }, { "epoch": 0.85, "learning_rate": 3.575217025422978e-05, "loss": 0.7439, "step": 77205 }, { "epoch": 0.85, "learning_rate": 3.575124752709127e-05, "loss": 0.6416, "step": 77210 }, { "epoch": 0.85, "learning_rate": 3.575032479995276e-05, "loss": 0.7097, "step": 77215 }, { "epoch": 0.86, "learning_rate": 3.5749402072814244e-05, "loss": 0.7098, "step": 77220 }, { "epoch": 0.86, "learning_rate": 3.574847934567573e-05, "loss": 0.6999, "step": 77225 }, { "epoch": 0.86, "learning_rate": 3.574755661853722e-05, "loss": 0.6673, "step": 77230 }, { "epoch": 0.86, "learning_rate": 3.574663389139871e-05, "loss": 0.7049, "step": 77235 }, { "epoch": 0.86, "learning_rate": 3.5745711164260196e-05, "loss": 0.7399, "step": 77240 }, { "epoch": 0.86, "learning_rate": 3.5744788437121684e-05, "loss": 0.6786, "step": 77245 }, { "epoch": 0.86, "learning_rate": 3.574386570998317e-05, "loss": 0.6948, "step": 77250 }, { "epoch": 0.86, "learning_rate": 3.574294298284466e-05, "loss": 0.7264, "step": 77255 }, { "epoch": 0.86, "learning_rate": 3.574202025570615e-05, "loss": 0.7065, "step": 77260 }, { "epoch": 0.86, "learning_rate": 3.5741097528567635e-05, "loss": 0.724, "step": 77265 }, { "epoch": 0.86, "learning_rate": 3.574017480142912e-05, "loss": 0.722, "step": 77270 }, { "epoch": 0.86, "learning_rate": 3.573925207429061e-05, "loss": 0.6634, "step": 77275 }, { "epoch": 0.86, "learning_rate": 3.573832934715209e-05, "loss": 0.7256, "step": 77280 }, { "epoch": 0.86, "learning_rate": 3.5737406620013587e-05, "loss": 0.6341, "step": 77285 }, { "epoch": 0.86, "learning_rate": 3.5736483892875074e-05, "loss": 0.73, "step": 77290 }, { "epoch": 0.86, "learning_rate": 3.5735561165736555e-05, "loss": 0.759, "step": 77295 }, { "epoch": 0.86, "learning_rate": 3.573463843859804e-05, "loss": 0.7185, "step": 77300 }, { "epoch": 0.86, "learning_rate": 3.573371571145954e-05, "loss": 0.7131, "step": 77305 }, { "epoch": 0.86, "learning_rate": 3.5732792984321026e-05, "loss": 0.719, "step": 77310 }, { "epoch": 0.86, "learning_rate": 3.573187025718251e-05, "loss": 0.6842, "step": 77315 }, { "epoch": 0.86, "learning_rate": 3.5730947530043995e-05, "loss": 0.7627, "step": 77320 }, { "epoch": 0.86, "learning_rate": 3.573002480290549e-05, "loss": 0.728, "step": 77325 }, { "epoch": 0.86, "learning_rate": 3.572910207576697e-05, "loss": 0.7411, "step": 77330 }, { "epoch": 0.86, "learning_rate": 3.572817934862846e-05, "loss": 0.6829, "step": 77335 }, { "epoch": 0.86, "learning_rate": 3.5727256621489946e-05, "loss": 0.6674, "step": 77340 }, { "epoch": 0.86, "learning_rate": 3.5726333894351434e-05, "loss": 0.724, "step": 77345 }, { "epoch": 0.86, "learning_rate": 3.572541116721292e-05, "loss": 0.6769, "step": 77350 }, { "epoch": 0.86, "learning_rate": 3.572448844007441e-05, "loss": 0.7676, "step": 77355 }, { "epoch": 0.86, "learning_rate": 3.57235657129359e-05, "loss": 0.7234, "step": 77360 }, { "epoch": 0.86, "learning_rate": 3.5722642985797385e-05, "loss": 0.7204, "step": 77365 }, { "epoch": 0.86, "learning_rate": 3.572172025865887e-05, "loss": 0.6876, "step": 77370 }, { "epoch": 0.86, "learning_rate": 3.572079753152036e-05, "loss": 0.6556, "step": 77375 }, { "epoch": 0.86, "learning_rate": 3.571987480438185e-05, "loss": 0.6544, "step": 77380 }, { "epoch": 0.86, "learning_rate": 3.571895207724334e-05, "loss": 0.7662, "step": 77385 }, { "epoch": 0.86, "learning_rate": 3.5718029350104825e-05, "loss": 0.6415, "step": 77390 }, { "epoch": 0.86, "learning_rate": 3.571710662296631e-05, "loss": 0.664, "step": 77395 }, { "epoch": 0.86, "learning_rate": 3.57161838958278e-05, "loss": 0.7314, "step": 77400 }, { "epoch": 0.86, "learning_rate": 3.571526116868928e-05, "loss": 0.6911, "step": 77405 }, { "epoch": 0.86, "learning_rate": 3.5714338441550776e-05, "loss": 0.6604, "step": 77410 }, { "epoch": 0.86, "learning_rate": 3.5713415714412264e-05, "loss": 0.7715, "step": 77415 }, { "epoch": 0.86, "learning_rate": 3.571249298727375e-05, "loss": 0.7446, "step": 77420 }, { "epoch": 0.86, "learning_rate": 3.571157026013523e-05, "loss": 0.6754, "step": 77425 }, { "epoch": 0.86, "learning_rate": 3.571064753299672e-05, "loss": 0.6526, "step": 77430 }, { "epoch": 0.86, "learning_rate": 3.5709724805858215e-05, "loss": 0.6757, "step": 77435 }, { "epoch": 0.86, "learning_rate": 3.57088020787197e-05, "loss": 0.7051, "step": 77440 }, { "epoch": 0.86, "learning_rate": 3.5707879351581184e-05, "loss": 0.6867, "step": 77445 }, { "epoch": 0.86, "learning_rate": 3.570695662444267e-05, "loss": 0.6706, "step": 77450 }, { "epoch": 0.86, "learning_rate": 3.5706033897304167e-05, "loss": 0.7, "step": 77455 }, { "epoch": 0.86, "learning_rate": 3.570511117016565e-05, "loss": 0.7351, "step": 77460 }, { "epoch": 0.86, "learning_rate": 3.5704188443027136e-05, "loss": 0.6864, "step": 77465 }, { "epoch": 0.86, "learning_rate": 3.570326571588862e-05, "loss": 0.6771, "step": 77470 }, { "epoch": 0.86, "learning_rate": 3.570234298875012e-05, "loss": 0.6572, "step": 77475 }, { "epoch": 0.86, "learning_rate": 3.57014202616116e-05, "loss": 0.7022, "step": 77480 }, { "epoch": 0.86, "learning_rate": 3.570049753447309e-05, "loss": 0.6921, "step": 77485 }, { "epoch": 0.86, "learning_rate": 3.5699574807334575e-05, "loss": 0.684, "step": 77490 }, { "epoch": 0.86, "learning_rate": 3.569865208019606e-05, "loss": 0.768, "step": 77495 }, { "epoch": 0.86, "learning_rate": 3.569772935305755e-05, "loss": 0.6798, "step": 77500 }, { "epoch": 0.86, "learning_rate": 3.569680662591904e-05, "loss": 0.6644, "step": 77505 }, { "epoch": 0.86, "learning_rate": 3.5695883898780526e-05, "loss": 0.6896, "step": 77510 }, { "epoch": 0.86, "learning_rate": 3.5694961171642014e-05, "loss": 0.6415, "step": 77515 }, { "epoch": 0.86, "learning_rate": 3.56940384445035e-05, "loss": 0.6799, "step": 77520 }, { "epoch": 0.86, "learning_rate": 3.569311571736499e-05, "loss": 0.6602, "step": 77525 }, { "epoch": 0.86, "learning_rate": 3.569219299022648e-05, "loss": 0.6648, "step": 77530 }, { "epoch": 0.86, "learning_rate": 3.569127026308796e-05, "loss": 0.6503, "step": 77535 }, { "epoch": 0.86, "learning_rate": 3.569034753594945e-05, "loss": 0.6995, "step": 77540 }, { "epoch": 0.86, "learning_rate": 3.568942480881094e-05, "loss": 0.6652, "step": 77545 }, { "epoch": 0.86, "learning_rate": 3.568850208167243e-05, "loss": 0.6723, "step": 77550 }, { "epoch": 0.86, "learning_rate": 3.568757935453391e-05, "loss": 0.6612, "step": 77555 }, { "epoch": 0.86, "learning_rate": 3.5686656627395405e-05, "loss": 0.666, "step": 77560 }, { "epoch": 0.86, "learning_rate": 3.568573390025689e-05, "loss": 0.6761, "step": 77565 }, { "epoch": 0.86, "learning_rate": 3.5684811173118374e-05, "loss": 0.724, "step": 77570 }, { "epoch": 0.86, "learning_rate": 3.568388844597986e-05, "loss": 0.6957, "step": 77575 }, { "epoch": 0.86, "learning_rate": 3.568296571884135e-05, "loss": 0.6756, "step": 77580 }, { "epoch": 0.86, "learning_rate": 3.5682042991702844e-05, "loss": 0.7512, "step": 77585 }, { "epoch": 0.86, "learning_rate": 3.5681120264564325e-05, "loss": 0.6534, "step": 77590 }, { "epoch": 0.86, "learning_rate": 3.568019753742581e-05, "loss": 0.7293, "step": 77595 }, { "epoch": 0.86, "learning_rate": 3.56792748102873e-05, "loss": 0.7285, "step": 77600 }, { "epoch": 0.86, "learning_rate": 3.567835208314879e-05, "loss": 0.7138, "step": 77605 }, { "epoch": 0.86, "learning_rate": 3.5677429356010276e-05, "loss": 0.6583, "step": 77610 }, { "epoch": 0.86, "learning_rate": 3.5676506628871764e-05, "loss": 0.6973, "step": 77615 }, { "epoch": 0.86, "learning_rate": 3.567558390173325e-05, "loss": 0.7392, "step": 77620 }, { "epoch": 0.86, "learning_rate": 3.567466117459474e-05, "loss": 0.7245, "step": 77625 }, { "epoch": 0.86, "learning_rate": 3.567373844745623e-05, "loss": 0.6413, "step": 77630 }, { "epoch": 0.86, "learning_rate": 3.5672815720317716e-05, "loss": 0.6941, "step": 77635 }, { "epoch": 0.86, "learning_rate": 3.5671892993179203e-05, "loss": 0.7557, "step": 77640 }, { "epoch": 0.86, "learning_rate": 3.567097026604069e-05, "loss": 0.6987, "step": 77645 }, { "epoch": 0.86, "learning_rate": 3.567004753890218e-05, "loss": 0.6883, "step": 77650 }, { "epoch": 0.86, "learning_rate": 3.566912481176367e-05, "loss": 0.6718, "step": 77655 }, { "epoch": 0.86, "learning_rate": 3.5668202084625155e-05, "loss": 0.6791, "step": 77660 }, { "epoch": 0.86, "learning_rate": 3.5667279357486636e-05, "loss": 0.682, "step": 77665 }, { "epoch": 0.86, "learning_rate": 3.566635663034813e-05, "loss": 0.666, "step": 77670 }, { "epoch": 0.86, "learning_rate": 3.566543390320962e-05, "loss": 0.6991, "step": 77675 }, { "epoch": 0.86, "learning_rate": 3.56645111760711e-05, "loss": 0.6993, "step": 77680 }, { "epoch": 0.86, "learning_rate": 3.566358844893259e-05, "loss": 0.6802, "step": 77685 }, { "epoch": 0.86, "learning_rate": 3.566266572179408e-05, "loss": 0.7382, "step": 77690 }, { "epoch": 0.86, "learning_rate": 3.566174299465557e-05, "loss": 0.7045, "step": 77695 }, { "epoch": 0.86, "learning_rate": 3.566082026751705e-05, "loss": 0.6881, "step": 77700 }, { "epoch": 0.86, "learning_rate": 3.565989754037854e-05, "loss": 0.6958, "step": 77705 }, { "epoch": 0.86, "learning_rate": 3.565897481324003e-05, "loss": 0.6906, "step": 77710 }, { "epoch": 0.86, "learning_rate": 3.5658052086101514e-05, "loss": 0.6813, "step": 77715 }, { "epoch": 0.86, "learning_rate": 3.5657129358963e-05, "loss": 0.6964, "step": 77720 }, { "epoch": 0.86, "learning_rate": 3.565620663182449e-05, "loss": 0.6745, "step": 77725 }, { "epoch": 0.86, "learning_rate": 3.565528390468598e-05, "loss": 0.7156, "step": 77730 }, { "epoch": 0.86, "learning_rate": 3.5654361177547466e-05, "loss": 0.6648, "step": 77735 }, { "epoch": 0.86, "learning_rate": 3.5653438450408954e-05, "loss": 0.6183, "step": 77740 }, { "epoch": 0.86, "learning_rate": 3.565251572327044e-05, "loss": 0.7366, "step": 77745 }, { "epoch": 0.86, "learning_rate": 3.565159299613193e-05, "loss": 0.6812, "step": 77750 }, { "epoch": 0.86, "learning_rate": 3.565067026899342e-05, "loss": 0.6803, "step": 77755 }, { "epoch": 0.86, "learning_rate": 3.5649747541854905e-05, "loss": 0.645, "step": 77760 }, { "epoch": 0.86, "learning_rate": 3.564882481471639e-05, "loss": 0.7139, "step": 77765 }, { "epoch": 0.86, "learning_rate": 3.564790208757788e-05, "loss": 0.7906, "step": 77770 }, { "epoch": 0.86, "learning_rate": 3.564697936043937e-05, "loss": 0.6972, "step": 77775 }, { "epoch": 0.86, "learning_rate": 3.5646056633300856e-05, "loss": 0.7251, "step": 77780 }, { "epoch": 0.86, "learning_rate": 3.5645133906162344e-05, "loss": 0.7015, "step": 77785 }, { "epoch": 0.86, "learning_rate": 3.5644211179023825e-05, "loss": 0.7182, "step": 77790 }, { "epoch": 0.86, "learning_rate": 3.564328845188532e-05, "loss": 0.7142, "step": 77795 }, { "epoch": 0.86, "learning_rate": 3.564236572474681e-05, "loss": 0.6566, "step": 77800 }, { "epoch": 0.86, "learning_rate": 3.5641442997608296e-05, "loss": 0.645, "step": 77805 }, { "epoch": 0.86, "learning_rate": 3.564052027046978e-05, "loss": 0.7448, "step": 77810 }, { "epoch": 0.86, "learning_rate": 3.5639597543331265e-05, "loss": 0.7811, "step": 77815 }, { "epoch": 0.86, "learning_rate": 3.563867481619276e-05, "loss": 0.6853, "step": 77820 }, { "epoch": 0.86, "learning_rate": 3.563775208905425e-05, "loss": 0.7311, "step": 77825 }, { "epoch": 0.86, "learning_rate": 3.563682936191573e-05, "loss": 0.7021, "step": 77830 }, { "epoch": 0.86, "learning_rate": 3.5635906634777216e-05, "loss": 0.6242, "step": 77835 }, { "epoch": 0.86, "learning_rate": 3.563498390763871e-05, "loss": 0.7368, "step": 77840 }, { "epoch": 0.86, "learning_rate": 3.563406118050019e-05, "loss": 0.6765, "step": 77845 }, { "epoch": 0.86, "learning_rate": 3.563313845336168e-05, "loss": 0.6736, "step": 77850 }, { "epoch": 0.86, "learning_rate": 3.563221572622317e-05, "loss": 0.7038, "step": 77855 }, { "epoch": 0.86, "learning_rate": 3.563129299908466e-05, "loss": 0.7475, "step": 77860 }, { "epoch": 0.86, "learning_rate": 3.563037027194614e-05, "loss": 0.6745, "step": 77865 }, { "epoch": 0.86, "learning_rate": 3.562944754480763e-05, "loss": 0.7141, "step": 77870 }, { "epoch": 0.86, "learning_rate": 3.562852481766912e-05, "loss": 0.6623, "step": 77875 }, { "epoch": 0.86, "learning_rate": 3.562760209053061e-05, "loss": 0.7011, "step": 77880 }, { "epoch": 0.86, "learning_rate": 3.5626679363392094e-05, "loss": 0.77, "step": 77885 }, { "epoch": 0.86, "learning_rate": 3.562575663625358e-05, "loss": 0.6941, "step": 77890 }, { "epoch": 0.86, "learning_rate": 3.562483390911507e-05, "loss": 0.7122, "step": 77895 }, { "epoch": 0.86, "learning_rate": 3.562391118197656e-05, "loss": 0.6381, "step": 77900 }, { "epoch": 0.86, "learning_rate": 3.5622988454838046e-05, "loss": 0.6647, "step": 77905 }, { "epoch": 0.86, "learning_rate": 3.5622065727699534e-05, "loss": 0.6579, "step": 77910 }, { "epoch": 0.86, "learning_rate": 3.562114300056102e-05, "loss": 0.6938, "step": 77915 }, { "epoch": 0.86, "learning_rate": 3.56202202734225e-05, "loss": 0.6812, "step": 77920 }, { "epoch": 0.86, "learning_rate": 3.5619297546284e-05, "loss": 0.6666, "step": 77925 }, { "epoch": 0.86, "learning_rate": 3.5618374819145485e-05, "loss": 0.7314, "step": 77930 }, { "epoch": 0.86, "learning_rate": 3.561745209200697e-05, "loss": 0.6914, "step": 77935 }, { "epoch": 0.86, "learning_rate": 3.5616529364868454e-05, "loss": 0.694, "step": 77940 }, { "epoch": 0.86, "learning_rate": 3.561560663772995e-05, "loss": 0.7594, "step": 77945 }, { "epoch": 0.86, "learning_rate": 3.5614683910591437e-05, "loss": 0.7902, "step": 77950 }, { "epoch": 0.86, "learning_rate": 3.561376118345292e-05, "loss": 0.6251, "step": 77955 }, { "epoch": 0.86, "learning_rate": 3.5612838456314405e-05, "loss": 0.7461, "step": 77960 }, { "epoch": 0.86, "learning_rate": 3.561191572917589e-05, "loss": 0.7907, "step": 77965 }, { "epoch": 0.86, "learning_rate": 3.561099300203739e-05, "loss": 0.6945, "step": 77970 }, { "epoch": 0.86, "learning_rate": 3.561007027489887e-05, "loss": 0.7071, "step": 77975 }, { "epoch": 0.86, "learning_rate": 3.560914754776036e-05, "loss": 0.7054, "step": 77980 }, { "epoch": 0.86, "learning_rate": 3.5608224820621845e-05, "loss": 0.6424, "step": 77985 }, { "epoch": 0.86, "learning_rate": 3.560730209348333e-05, "loss": 0.7576, "step": 77990 }, { "epoch": 0.86, "learning_rate": 3.560637936634482e-05, "loss": 0.7096, "step": 77995 }, { "epoch": 0.86, "learning_rate": 3.560545663920631e-05, "loss": 0.6963, "step": 78000 }, { "epoch": 0.86, "eval_loss": 0.6781031489372253, "eval_runtime": 69.3889, "eval_samples_per_second": 28.823, "eval_steps_per_second": 14.412, "step": 78000 }, { "epoch": 0.86, "learning_rate": 3.5604533912067796e-05, "loss": 0.7231, "step": 78005 }, { "epoch": 0.86, "learning_rate": 3.5603611184929284e-05, "loss": 0.6987, "step": 78010 }, { "epoch": 0.86, "learning_rate": 3.560268845779077e-05, "loss": 0.7427, "step": 78015 }, { "epoch": 0.86, "learning_rate": 3.560176573065226e-05, "loss": 0.7462, "step": 78020 }, { "epoch": 0.86, "learning_rate": 3.560084300351375e-05, "loss": 0.6882, "step": 78025 }, { "epoch": 0.86, "learning_rate": 3.559992027637523e-05, "loss": 0.7095, "step": 78030 }, { "epoch": 0.86, "learning_rate": 3.559899754923672e-05, "loss": 0.7453, "step": 78035 }, { "epoch": 0.86, "learning_rate": 3.559807482209821e-05, "loss": 0.6981, "step": 78040 }, { "epoch": 0.86, "learning_rate": 3.55971520949597e-05, "loss": 0.6874, "step": 78045 }, { "epoch": 0.86, "learning_rate": 3.559622936782118e-05, "loss": 0.681, "step": 78050 }, { "epoch": 0.86, "learning_rate": 3.5595306640682675e-05, "loss": 0.7868, "step": 78055 }, { "epoch": 0.86, "learning_rate": 3.559438391354416e-05, "loss": 0.6886, "step": 78060 }, { "epoch": 0.86, "learning_rate": 3.5593461186405643e-05, "loss": 0.6755, "step": 78065 }, { "epoch": 0.86, "learning_rate": 3.559253845926713e-05, "loss": 0.7046, "step": 78070 }, { "epoch": 0.86, "learning_rate": 3.5591615732128626e-05, "loss": 0.7503, "step": 78075 }, { "epoch": 0.86, "learning_rate": 3.5590693004990114e-05, "loss": 0.6669, "step": 78080 }, { "epoch": 0.86, "learning_rate": 3.5589770277851595e-05, "loss": 0.6952, "step": 78085 }, { "epoch": 0.86, "learning_rate": 3.558884755071308e-05, "loss": 0.7465, "step": 78090 }, { "epoch": 0.86, "learning_rate": 3.558792482357458e-05, "loss": 0.7117, "step": 78095 }, { "epoch": 0.86, "learning_rate": 3.558700209643606e-05, "loss": 0.796, "step": 78100 }, { "epoch": 0.86, "learning_rate": 3.5586079369297546e-05, "loss": 0.7179, "step": 78105 }, { "epoch": 0.86, "learning_rate": 3.5585156642159034e-05, "loss": 0.6775, "step": 78110 }, { "epoch": 0.86, "learning_rate": 3.558423391502052e-05, "loss": 0.7053, "step": 78115 }, { "epoch": 0.87, "learning_rate": 3.558331118788201e-05, "loss": 0.709, "step": 78120 }, { "epoch": 0.87, "learning_rate": 3.55823884607435e-05, "loss": 0.7033, "step": 78125 }, { "epoch": 0.87, "learning_rate": 3.5581465733604986e-05, "loss": 0.7416, "step": 78130 }, { "epoch": 0.87, "learning_rate": 3.558054300646647e-05, "loss": 0.7016, "step": 78135 }, { "epoch": 0.87, "learning_rate": 3.557962027932796e-05, "loss": 0.7006, "step": 78140 }, { "epoch": 0.87, "learning_rate": 3.557869755218945e-05, "loss": 0.7243, "step": 78145 }, { "epoch": 0.87, "learning_rate": 3.557777482505094e-05, "loss": 0.6729, "step": 78150 }, { "epoch": 0.87, "learning_rate": 3.5576852097912425e-05, "loss": 0.7349, "step": 78155 }, { "epoch": 0.87, "learning_rate": 3.557592937077391e-05, "loss": 0.7525, "step": 78160 }, { "epoch": 0.87, "learning_rate": 3.55750066436354e-05, "loss": 0.7841, "step": 78165 }, { "epoch": 0.87, "learning_rate": 3.557408391649689e-05, "loss": 0.7007, "step": 78170 }, { "epoch": 0.87, "learning_rate": 3.557316118935837e-05, "loss": 0.6853, "step": 78175 }, { "epoch": 0.87, "learning_rate": 3.557223846221986e-05, "loss": 0.7368, "step": 78180 }, { "epoch": 0.87, "learning_rate": 3.557131573508135e-05, "loss": 0.6858, "step": 78185 }, { "epoch": 0.87, "learning_rate": 3.557039300794284e-05, "loss": 0.7777, "step": 78190 }, { "epoch": 0.87, "learning_rate": 3.556947028080432e-05, "loss": 0.6666, "step": 78195 }, { "epoch": 0.87, "learning_rate": 3.556854755366581e-05, "loss": 0.6923, "step": 78200 }, { "epoch": 0.87, "learning_rate": 3.55676248265273e-05, "loss": 0.7105, "step": 78205 }, { "epoch": 0.87, "learning_rate": 3.556670209938879e-05, "loss": 0.6721, "step": 78210 }, { "epoch": 0.87, "learning_rate": 3.556577937225027e-05, "loss": 0.6486, "step": 78215 }, { "epoch": 0.87, "learning_rate": 3.556485664511176e-05, "loss": 0.724, "step": 78220 }, { "epoch": 0.87, "learning_rate": 3.5563933917973255e-05, "loss": 0.7261, "step": 78225 }, { "epoch": 0.87, "learning_rate": 3.5563011190834736e-05, "loss": 0.7536, "step": 78230 }, { "epoch": 0.87, "learning_rate": 3.5562088463696224e-05, "loss": 0.6566, "step": 78235 }, { "epoch": 0.87, "learning_rate": 3.556116573655771e-05, "loss": 0.7109, "step": 78240 }, { "epoch": 0.87, "learning_rate": 3.5560243009419206e-05, "loss": 0.7117, "step": 78245 }, { "epoch": 0.87, "learning_rate": 3.555932028228069e-05, "loss": 0.7185, "step": 78250 }, { "epoch": 0.87, "learning_rate": 3.5558397555142175e-05, "loss": 0.7431, "step": 78255 }, { "epoch": 0.87, "learning_rate": 3.555747482800366e-05, "loss": 0.6879, "step": 78260 }, { "epoch": 0.87, "learning_rate": 3.555655210086515e-05, "loss": 0.6934, "step": 78265 }, { "epoch": 0.87, "learning_rate": 3.555562937372664e-05, "loss": 0.7353, "step": 78270 }, { "epoch": 0.87, "learning_rate": 3.5554706646588126e-05, "loss": 0.6676, "step": 78275 }, { "epoch": 0.87, "learning_rate": 3.5553783919449614e-05, "loss": 0.6995, "step": 78280 }, { "epoch": 0.87, "learning_rate": 3.55528611923111e-05, "loss": 0.7508, "step": 78285 }, { "epoch": 0.87, "learning_rate": 3.555193846517259e-05, "loss": 0.7138, "step": 78290 }, { "epoch": 0.87, "learning_rate": 3.555101573803408e-05, "loss": 0.7009, "step": 78295 }, { "epoch": 0.87, "learning_rate": 3.5550093010895566e-05, "loss": 0.723, "step": 78300 }, { "epoch": 0.87, "learning_rate": 3.554917028375705e-05, "loss": 0.7199, "step": 78305 }, { "epoch": 0.87, "learning_rate": 3.554824755661854e-05, "loss": 0.6689, "step": 78310 }, { "epoch": 0.87, "learning_rate": 3.554732482948003e-05, "loss": 0.6935, "step": 78315 }, { "epoch": 0.87, "learning_rate": 3.554640210234152e-05, "loss": 0.6957, "step": 78320 }, { "epoch": 0.87, "learning_rate": 3.5545479375203e-05, "loss": 0.6804, "step": 78325 }, { "epoch": 0.87, "learning_rate": 3.5544556648064486e-05, "loss": 0.7173, "step": 78330 }, { "epoch": 0.87, "learning_rate": 3.554363392092598e-05, "loss": 0.6726, "step": 78335 }, { "epoch": 0.87, "learning_rate": 3.554271119378746e-05, "loss": 0.6316, "step": 78340 }, { "epoch": 0.87, "learning_rate": 3.554178846664895e-05, "loss": 0.7015, "step": 78345 }, { "epoch": 0.87, "learning_rate": 3.554086573951044e-05, "loss": 0.7909, "step": 78350 }, { "epoch": 0.87, "learning_rate": 3.553994301237193e-05, "loss": 0.6911, "step": 78355 }, { "epoch": 0.87, "learning_rate": 3.553902028523341e-05, "loss": 0.7494, "step": 78360 }, { "epoch": 0.87, "learning_rate": 3.55380975580949e-05, "loss": 0.6695, "step": 78365 }, { "epoch": 0.87, "learning_rate": 3.553717483095639e-05, "loss": 0.6917, "step": 78370 }, { "epoch": 0.87, "learning_rate": 3.5536252103817877e-05, "loss": 0.6938, "step": 78375 }, { "epoch": 0.87, "learning_rate": 3.5535329376679364e-05, "loss": 0.6997, "step": 78380 }, { "epoch": 0.87, "learning_rate": 3.553440664954085e-05, "loss": 0.7543, "step": 78385 }, { "epoch": 0.87, "learning_rate": 3.553348392240234e-05, "loss": 0.708, "step": 78390 }, { "epoch": 0.87, "learning_rate": 3.553256119526383e-05, "loss": 0.6666, "step": 78395 }, { "epoch": 0.87, "learning_rate": 3.5531638468125316e-05, "loss": 0.6584, "step": 78400 }, { "epoch": 0.87, "learning_rate": 3.5530715740986804e-05, "loss": 0.6947, "step": 78405 }, { "epoch": 0.87, "learning_rate": 3.552979301384829e-05, "loss": 0.7642, "step": 78410 }, { "epoch": 0.87, "learning_rate": 3.552887028670977e-05, "loss": 0.6469, "step": 78415 }, { "epoch": 0.87, "learning_rate": 3.552794755957127e-05, "loss": 0.6961, "step": 78420 }, { "epoch": 0.87, "learning_rate": 3.5527024832432755e-05, "loss": 0.6772, "step": 78425 }, { "epoch": 0.87, "learning_rate": 3.552610210529424e-05, "loss": 0.6849, "step": 78430 }, { "epoch": 0.87, "learning_rate": 3.5525179378155724e-05, "loss": 0.7108, "step": 78435 }, { "epoch": 0.87, "learning_rate": 3.552425665101722e-05, "loss": 0.7512, "step": 78440 }, { "epoch": 0.87, "learning_rate": 3.5523333923878706e-05, "loss": 0.7068, "step": 78445 }, { "epoch": 0.87, "learning_rate": 3.552241119674019e-05, "loss": 0.6717, "step": 78450 }, { "epoch": 0.87, "learning_rate": 3.5521488469601675e-05, "loss": 0.6672, "step": 78455 }, { "epoch": 0.87, "learning_rate": 3.552056574246317e-05, "loss": 0.7631, "step": 78460 }, { "epoch": 0.87, "learning_rate": 3.551964301532466e-05, "loss": 0.6625, "step": 78465 }, { "epoch": 0.87, "learning_rate": 3.551872028818614e-05, "loss": 0.7248, "step": 78470 }, { "epoch": 0.87, "learning_rate": 3.551779756104763e-05, "loss": 0.6437, "step": 78475 }, { "epoch": 0.87, "learning_rate": 3.551687483390912e-05, "loss": 0.6889, "step": 78480 }, { "epoch": 0.87, "learning_rate": 3.55159521067706e-05, "loss": 0.7165, "step": 78485 }, { "epoch": 0.87, "learning_rate": 3.551502937963209e-05, "loss": 0.7445, "step": 78490 }, { "epoch": 0.87, "learning_rate": 3.551410665249358e-05, "loss": 0.6787, "step": 78495 }, { "epoch": 0.87, "learning_rate": 3.5513183925355066e-05, "loss": 0.7211, "step": 78500 }, { "epoch": 0.87, "learning_rate": 3.5512261198216554e-05, "loss": 0.7452, "step": 78505 }, { "epoch": 0.87, "learning_rate": 3.551133847107804e-05, "loss": 0.6924, "step": 78510 }, { "epoch": 0.87, "learning_rate": 3.551041574393953e-05, "loss": 0.6526, "step": 78515 }, { "epoch": 0.87, "learning_rate": 3.550949301680102e-05, "loss": 0.7863, "step": 78520 }, { "epoch": 0.87, "learning_rate": 3.5508570289662505e-05, "loss": 0.7575, "step": 78525 }, { "epoch": 0.87, "learning_rate": 3.550764756252399e-05, "loss": 0.8042, "step": 78530 }, { "epoch": 0.87, "learning_rate": 3.550672483538548e-05, "loss": 0.6916, "step": 78535 }, { "epoch": 0.87, "learning_rate": 3.550580210824697e-05, "loss": 0.6808, "step": 78540 }, { "epoch": 0.87, "learning_rate": 3.550487938110846e-05, "loss": 0.7287, "step": 78545 }, { "epoch": 0.87, "learning_rate": 3.5503956653969944e-05, "loss": 0.7614, "step": 78550 }, { "epoch": 0.87, "learning_rate": 3.550303392683143e-05, "loss": 0.7676, "step": 78555 }, { "epoch": 0.87, "learning_rate": 3.5502111199692913e-05, "loss": 0.7203, "step": 78560 }, { "epoch": 0.87, "learning_rate": 3.55011884725544e-05, "loss": 0.7136, "step": 78565 }, { "epoch": 0.87, "learning_rate": 3.5500265745415896e-05, "loss": 0.7136, "step": 78570 }, { "epoch": 0.87, "learning_rate": 3.5499343018277384e-05, "loss": 0.6532, "step": 78575 }, { "epoch": 0.87, "learning_rate": 3.5498420291138865e-05, "loss": 0.7484, "step": 78580 }, { "epoch": 0.87, "learning_rate": 3.549749756400035e-05, "loss": 0.7083, "step": 78585 }, { "epoch": 0.87, "learning_rate": 3.549657483686185e-05, "loss": 0.6515, "step": 78590 }, { "epoch": 0.87, "learning_rate": 3.5495652109723335e-05, "loss": 0.7022, "step": 78595 }, { "epoch": 0.87, "learning_rate": 3.5494729382584816e-05, "loss": 0.7481, "step": 78600 }, { "epoch": 0.87, "learning_rate": 3.5493806655446304e-05, "loss": 0.7307, "step": 78605 }, { "epoch": 0.87, "learning_rate": 3.54928839283078e-05, "loss": 0.69, "step": 78610 }, { "epoch": 0.87, "learning_rate": 3.549196120116928e-05, "loss": 0.664, "step": 78615 }, { "epoch": 0.87, "learning_rate": 3.549103847403077e-05, "loss": 0.7175, "step": 78620 }, { "epoch": 0.87, "learning_rate": 3.5490115746892255e-05, "loss": 0.7348, "step": 78625 }, { "epoch": 0.87, "learning_rate": 3.548919301975375e-05, "loss": 0.6706, "step": 78630 }, { "epoch": 0.87, "learning_rate": 3.548827029261523e-05, "loss": 0.7521, "step": 78635 }, { "epoch": 0.87, "learning_rate": 3.548734756547672e-05, "loss": 0.7666, "step": 78640 }, { "epoch": 0.87, "learning_rate": 3.548642483833821e-05, "loss": 0.6818, "step": 78645 }, { "epoch": 0.87, "learning_rate": 3.5485502111199695e-05, "loss": 0.7278, "step": 78650 }, { "epoch": 0.87, "learning_rate": 3.548457938406118e-05, "loss": 0.7172, "step": 78655 }, { "epoch": 0.87, "learning_rate": 3.548365665692267e-05, "loss": 0.6869, "step": 78660 }, { "epoch": 0.87, "learning_rate": 3.548273392978416e-05, "loss": 0.7771, "step": 78665 }, { "epoch": 0.87, "learning_rate": 3.5481811202645646e-05, "loss": 0.7899, "step": 78670 }, { "epoch": 0.87, "learning_rate": 3.5480888475507134e-05, "loss": 0.7281, "step": 78675 }, { "epoch": 0.87, "learning_rate": 3.547996574836862e-05, "loss": 0.6495, "step": 78680 }, { "epoch": 0.87, "learning_rate": 3.547904302123011e-05, "loss": 0.7069, "step": 78685 }, { "epoch": 0.87, "learning_rate": 3.547812029409159e-05, "loss": 0.6441, "step": 78690 }, { "epoch": 0.87, "learning_rate": 3.5477197566953085e-05, "loss": 0.6878, "step": 78695 }, { "epoch": 0.87, "learning_rate": 3.547627483981457e-05, "loss": 0.71, "step": 78700 }, { "epoch": 0.87, "learning_rate": 3.547535211267606e-05, "loss": 0.6897, "step": 78705 }, { "epoch": 0.87, "learning_rate": 3.547442938553754e-05, "loss": 0.7102, "step": 78710 }, { "epoch": 0.87, "learning_rate": 3.547350665839903e-05, "loss": 0.6868, "step": 78715 }, { "epoch": 0.87, "learning_rate": 3.5472583931260525e-05, "loss": 0.6592, "step": 78720 }, { "epoch": 0.87, "learning_rate": 3.5471661204122006e-05, "loss": 0.7267, "step": 78725 }, { "epoch": 0.87, "learning_rate": 3.5470738476983493e-05, "loss": 0.67, "step": 78730 }, { "epoch": 0.87, "learning_rate": 3.546981574984498e-05, "loss": 0.7293, "step": 78735 }, { "epoch": 0.87, "learning_rate": 3.5468893022706476e-05, "loss": 0.7728, "step": 78740 }, { "epoch": 0.87, "learning_rate": 3.546797029556796e-05, "loss": 0.7408, "step": 78745 }, { "epoch": 0.87, "learning_rate": 3.5467047568429445e-05, "loss": 0.7101, "step": 78750 }, { "epoch": 0.87, "learning_rate": 3.546612484129093e-05, "loss": 0.7183, "step": 78755 }, { "epoch": 0.87, "learning_rate": 3.546520211415242e-05, "loss": 0.7503, "step": 78760 }, { "epoch": 0.87, "learning_rate": 3.546427938701391e-05, "loss": 0.6842, "step": 78765 }, { "epoch": 0.87, "learning_rate": 3.5463356659875396e-05, "loss": 0.6862, "step": 78770 }, { "epoch": 0.87, "learning_rate": 3.5462433932736884e-05, "loss": 0.7277, "step": 78775 }, { "epoch": 0.87, "learning_rate": 3.546151120559837e-05, "loss": 0.6914, "step": 78780 }, { "epoch": 0.87, "learning_rate": 3.546058847845986e-05, "loss": 0.6847, "step": 78785 }, { "epoch": 0.87, "learning_rate": 3.545966575132135e-05, "loss": 0.6982, "step": 78790 }, { "epoch": 0.87, "learning_rate": 3.5458743024182836e-05, "loss": 0.6899, "step": 78795 }, { "epoch": 0.87, "learning_rate": 3.5457820297044317e-05, "loss": 0.711, "step": 78800 }, { "epoch": 0.87, "learning_rate": 3.545689756990581e-05, "loss": 0.6454, "step": 78805 }, { "epoch": 0.87, "learning_rate": 3.54559748427673e-05, "loss": 0.7483, "step": 78810 }, { "epoch": 0.87, "learning_rate": 3.545505211562879e-05, "loss": 0.7091, "step": 78815 }, { "epoch": 0.87, "learning_rate": 3.545412938849027e-05, "loss": 0.6483, "step": 78820 }, { "epoch": 0.87, "learning_rate": 3.545320666135176e-05, "loss": 0.7126, "step": 78825 }, { "epoch": 0.87, "learning_rate": 3.545228393421325e-05, "loss": 0.7244, "step": 78830 }, { "epoch": 0.87, "learning_rate": 3.545136120707473e-05, "loss": 0.7081, "step": 78835 }, { "epoch": 0.87, "learning_rate": 3.545043847993622e-05, "loss": 0.7031, "step": 78840 }, { "epoch": 0.87, "learning_rate": 3.5449515752797714e-05, "loss": 0.7227, "step": 78845 }, { "epoch": 0.87, "learning_rate": 3.54485930256592e-05, "loss": 0.6898, "step": 78850 }, { "epoch": 0.87, "learning_rate": 3.544767029852068e-05, "loss": 0.7175, "step": 78855 }, { "epoch": 0.87, "learning_rate": 3.544674757138217e-05, "loss": 0.7047, "step": 78860 }, { "epoch": 0.87, "learning_rate": 3.544582484424366e-05, "loss": 0.7271, "step": 78865 }, { "epoch": 0.87, "learning_rate": 3.544490211710515e-05, "loss": 0.727, "step": 78870 }, { "epoch": 0.87, "learning_rate": 3.5443979389966634e-05, "loss": 0.6936, "step": 78875 }, { "epoch": 0.87, "learning_rate": 3.544305666282812e-05, "loss": 0.6675, "step": 78880 }, { "epoch": 0.87, "learning_rate": 3.544213393568961e-05, "loss": 0.6694, "step": 78885 }, { "epoch": 0.87, "learning_rate": 3.54412112085511e-05, "loss": 0.6905, "step": 78890 }, { "epoch": 0.87, "learning_rate": 3.5440288481412586e-05, "loss": 0.7238, "step": 78895 }, { "epoch": 0.87, "learning_rate": 3.5439365754274074e-05, "loss": 0.6846, "step": 78900 }, { "epoch": 0.87, "learning_rate": 3.543844302713556e-05, "loss": 0.7571, "step": 78905 }, { "epoch": 0.87, "learning_rate": 3.543752029999705e-05, "loss": 0.6841, "step": 78910 }, { "epoch": 0.87, "learning_rate": 3.543659757285854e-05, "loss": 0.7295, "step": 78915 }, { "epoch": 0.87, "learning_rate": 3.5435674845720025e-05, "loss": 0.6795, "step": 78920 }, { "epoch": 0.87, "learning_rate": 3.543475211858151e-05, "loss": 0.6859, "step": 78925 }, { "epoch": 0.87, "learning_rate": 3.5433829391443e-05, "loss": 0.6972, "step": 78930 }, { "epoch": 0.87, "learning_rate": 3.543290666430449e-05, "loss": 0.7717, "step": 78935 }, { "epoch": 0.87, "learning_rate": 3.5431983937165976e-05, "loss": 0.731, "step": 78940 }, { "epoch": 0.87, "learning_rate": 3.5431061210027464e-05, "loss": 0.7198, "step": 78945 }, { "epoch": 0.87, "learning_rate": 3.5430138482888945e-05, "loss": 0.6417, "step": 78950 }, { "epoch": 0.87, "learning_rate": 3.542921575575044e-05, "loss": 0.7347, "step": 78955 }, { "epoch": 0.87, "learning_rate": 3.542829302861193e-05, "loss": 0.7405, "step": 78960 }, { "epoch": 0.87, "learning_rate": 3.542737030147341e-05, "loss": 0.6578, "step": 78965 }, { "epoch": 0.87, "learning_rate": 3.54264475743349e-05, "loss": 0.7042, "step": 78970 }, { "epoch": 0.87, "learning_rate": 3.542552484719639e-05, "loss": 0.6727, "step": 78975 }, { "epoch": 0.87, "learning_rate": 3.542460212005788e-05, "loss": 0.6959, "step": 78980 }, { "epoch": 0.87, "learning_rate": 3.542367939291936e-05, "loss": 0.7155, "step": 78985 }, { "epoch": 0.87, "learning_rate": 3.542275666578085e-05, "loss": 0.7157, "step": 78990 }, { "epoch": 0.87, "learning_rate": 3.542183393864234e-05, "loss": 0.6823, "step": 78995 }, { "epoch": 0.87, "learning_rate": 3.5420911211503824e-05, "loss": 0.6949, "step": 79000 }, { "epoch": 0.87, "eval_loss": 0.6575542688369751, "eval_runtime": 69.176, "eval_samples_per_second": 28.912, "eval_steps_per_second": 14.456, "step": 79000 }, { "epoch": 0.87, "learning_rate": 3.541998848436531e-05, "loss": 0.7511, "step": 79005 }, { "epoch": 0.87, "learning_rate": 3.54190657572268e-05, "loss": 0.6459, "step": 79010 }, { "epoch": 0.87, "learning_rate": 3.541814303008829e-05, "loss": 0.7188, "step": 79015 }, { "epoch": 0.87, "learning_rate": 3.5417220302949775e-05, "loss": 0.6966, "step": 79020 }, { "epoch": 0.88, "learning_rate": 3.541629757581126e-05, "loss": 0.7232, "step": 79025 }, { "epoch": 0.88, "learning_rate": 3.541537484867275e-05, "loss": 0.6578, "step": 79030 }, { "epoch": 0.88, "learning_rate": 3.541445212153424e-05, "loss": 0.7017, "step": 79035 }, { "epoch": 0.88, "learning_rate": 3.5413529394395727e-05, "loss": 0.6834, "step": 79040 }, { "epoch": 0.88, "learning_rate": 3.5412606667257214e-05, "loss": 0.6863, "step": 79045 }, { "epoch": 0.88, "learning_rate": 3.54116839401187e-05, "loss": 0.6852, "step": 79050 }, { "epoch": 0.88, "learning_rate": 3.541076121298019e-05, "loss": 0.6583, "step": 79055 }, { "epoch": 0.88, "learning_rate": 3.540983848584168e-05, "loss": 0.7357, "step": 79060 }, { "epoch": 0.88, "learning_rate": 3.5408915758703166e-05, "loss": 0.782, "step": 79065 }, { "epoch": 0.88, "learning_rate": 3.5407993031564654e-05, "loss": 0.6769, "step": 79070 }, { "epoch": 0.88, "learning_rate": 3.5407070304426135e-05, "loss": 0.7066, "step": 79075 }, { "epoch": 0.88, "learning_rate": 3.540614757728763e-05, "loss": 0.6548, "step": 79080 }, { "epoch": 0.88, "learning_rate": 3.540522485014912e-05, "loss": 0.7413, "step": 79085 }, { "epoch": 0.88, "learning_rate": 3.5404302123010605e-05, "loss": 0.6683, "step": 79090 }, { "epoch": 0.88, "learning_rate": 3.5403379395872086e-05, "loss": 0.7352, "step": 79095 }, { "epoch": 0.88, "learning_rate": 3.5402456668733574e-05, "loss": 0.7444, "step": 79100 }, { "epoch": 0.88, "learning_rate": 3.540153394159507e-05, "loss": 0.6867, "step": 79105 }, { "epoch": 0.88, "learning_rate": 3.540061121445655e-05, "loss": 0.7533, "step": 79110 }, { "epoch": 0.88, "learning_rate": 3.539968848731804e-05, "loss": 0.6898, "step": 79115 }, { "epoch": 0.88, "learning_rate": 3.5398765760179525e-05, "loss": 0.6953, "step": 79120 }, { "epoch": 0.88, "learning_rate": 3.539784303304102e-05, "loss": 0.7187, "step": 79125 }, { "epoch": 0.88, "learning_rate": 3.53969203059025e-05, "loss": 0.8048, "step": 79130 }, { "epoch": 0.88, "learning_rate": 3.539599757876399e-05, "loss": 0.7084, "step": 79135 }, { "epoch": 0.88, "learning_rate": 3.539507485162548e-05, "loss": 0.7008, "step": 79140 }, { "epoch": 0.88, "learning_rate": 3.5394152124486965e-05, "loss": 0.7163, "step": 79145 }, { "epoch": 0.88, "learning_rate": 3.539322939734845e-05, "loss": 0.742, "step": 79150 }, { "epoch": 0.88, "learning_rate": 3.539230667020994e-05, "loss": 0.6914, "step": 79155 }, { "epoch": 0.88, "learning_rate": 3.539138394307143e-05, "loss": 0.6978, "step": 79160 }, { "epoch": 0.88, "learning_rate": 3.5390461215932916e-05, "loss": 0.7151, "step": 79165 }, { "epoch": 0.88, "learning_rate": 3.5389538488794404e-05, "loss": 0.6689, "step": 79170 }, { "epoch": 0.88, "learning_rate": 3.538861576165589e-05, "loss": 0.6704, "step": 79175 }, { "epoch": 0.88, "learning_rate": 3.538769303451738e-05, "loss": 0.7364, "step": 79180 }, { "epoch": 0.88, "learning_rate": 3.538677030737886e-05, "loss": 0.7245, "step": 79185 }, { "epoch": 0.88, "learning_rate": 3.5385847580240355e-05, "loss": 0.7228, "step": 79190 }, { "epoch": 0.88, "learning_rate": 3.538492485310184e-05, "loss": 0.7332, "step": 79195 }, { "epoch": 0.88, "learning_rate": 3.538400212596333e-05, "loss": 0.6974, "step": 79200 }, { "epoch": 0.88, "learning_rate": 3.538307939882481e-05, "loss": 0.697, "step": 79205 }, { "epoch": 0.88, "learning_rate": 3.538215667168631e-05, "loss": 0.7473, "step": 79210 }, { "epoch": 0.88, "learning_rate": 3.5381233944547794e-05, "loss": 0.6448, "step": 79215 }, { "epoch": 0.88, "learning_rate": 3.5380311217409276e-05, "loss": 0.6686, "step": 79220 }, { "epoch": 0.88, "learning_rate": 3.5379388490270763e-05, "loss": 0.7182, "step": 79225 }, { "epoch": 0.88, "learning_rate": 3.537846576313226e-05, "loss": 0.7427, "step": 79230 }, { "epoch": 0.88, "learning_rate": 3.5377543035993746e-05, "loss": 0.7199, "step": 79235 }, { "epoch": 0.88, "learning_rate": 3.537662030885523e-05, "loss": 0.7303, "step": 79240 }, { "epoch": 0.88, "learning_rate": 3.5375697581716715e-05, "loss": 0.671, "step": 79245 }, { "epoch": 0.88, "learning_rate": 3.53747748545782e-05, "loss": 0.7288, "step": 79250 }, { "epoch": 0.88, "learning_rate": 3.53738521274397e-05, "loss": 0.7235, "step": 79255 }, { "epoch": 0.88, "learning_rate": 3.537292940030118e-05, "loss": 0.6938, "step": 79260 }, { "epoch": 0.88, "learning_rate": 3.5372006673162666e-05, "loss": 0.6722, "step": 79265 }, { "epoch": 0.88, "learning_rate": 3.5371083946024154e-05, "loss": 0.7062, "step": 79270 }, { "epoch": 0.88, "learning_rate": 3.537016121888564e-05, "loss": 0.669, "step": 79275 }, { "epoch": 0.88, "learning_rate": 3.536923849174713e-05, "loss": 0.722, "step": 79280 }, { "epoch": 0.88, "learning_rate": 3.536831576460862e-05, "loss": 0.6931, "step": 79285 }, { "epoch": 0.88, "learning_rate": 3.5367393037470105e-05, "loss": 0.7406, "step": 79290 }, { "epoch": 0.88, "learning_rate": 3.536647031033159e-05, "loss": 0.7223, "step": 79295 }, { "epoch": 0.88, "learning_rate": 3.536554758319308e-05, "loss": 0.7212, "step": 79300 }, { "epoch": 0.88, "learning_rate": 3.536462485605457e-05, "loss": 0.6923, "step": 79305 }, { "epoch": 0.88, "learning_rate": 3.536370212891606e-05, "loss": 0.7667, "step": 79310 }, { "epoch": 0.88, "learning_rate": 3.5362779401777545e-05, "loss": 0.6849, "step": 79315 }, { "epoch": 0.88, "learning_rate": 3.536185667463903e-05, "loss": 0.6896, "step": 79320 }, { "epoch": 0.88, "learning_rate": 3.536093394750052e-05, "loss": 0.6747, "step": 79325 }, { "epoch": 0.88, "learning_rate": 3.536001122036201e-05, "loss": 0.6932, "step": 79330 }, { "epoch": 0.88, "learning_rate": 3.535908849322349e-05, "loss": 0.7493, "step": 79335 }, { "epoch": 0.88, "learning_rate": 3.5358165766084984e-05, "loss": 0.7183, "step": 79340 }, { "epoch": 0.88, "learning_rate": 3.535724303894647e-05, "loss": 0.6871, "step": 79345 }, { "epoch": 0.88, "learning_rate": 3.535632031180795e-05, "loss": 0.6837, "step": 79350 }, { "epoch": 0.88, "learning_rate": 3.535539758466944e-05, "loss": 0.648, "step": 79355 }, { "epoch": 0.88, "learning_rate": 3.5354474857530935e-05, "loss": 0.6618, "step": 79360 }, { "epoch": 0.88, "learning_rate": 3.535355213039242e-05, "loss": 0.6671, "step": 79365 }, { "epoch": 0.88, "learning_rate": 3.5352629403253904e-05, "loss": 0.7131, "step": 79370 }, { "epoch": 0.88, "learning_rate": 3.535170667611539e-05, "loss": 0.6791, "step": 79375 }, { "epoch": 0.88, "learning_rate": 3.535078394897689e-05, "loss": 0.7634, "step": 79380 }, { "epoch": 0.88, "learning_rate": 3.534986122183837e-05, "loss": 0.6691, "step": 79385 }, { "epoch": 0.88, "learning_rate": 3.5348938494699856e-05, "loss": 0.7184, "step": 79390 }, { "epoch": 0.88, "learning_rate": 3.5348015767561343e-05, "loss": 0.7033, "step": 79395 }, { "epoch": 0.88, "learning_rate": 3.534709304042283e-05, "loss": 0.7394, "step": 79400 }, { "epoch": 0.88, "learning_rate": 3.534617031328432e-05, "loss": 0.6587, "step": 79405 }, { "epoch": 0.88, "learning_rate": 3.534524758614581e-05, "loss": 0.6674, "step": 79410 }, { "epoch": 0.88, "learning_rate": 3.5344324859007295e-05, "loss": 0.7016, "step": 79415 }, { "epoch": 0.88, "learning_rate": 3.534340213186878e-05, "loss": 0.6347, "step": 79420 }, { "epoch": 0.88, "learning_rate": 3.534247940473027e-05, "loss": 0.7295, "step": 79425 }, { "epoch": 0.88, "learning_rate": 3.534155667759176e-05, "loss": 0.6772, "step": 79430 }, { "epoch": 0.88, "learning_rate": 3.5340633950453246e-05, "loss": 0.7106, "step": 79435 }, { "epoch": 0.88, "learning_rate": 3.5339711223314734e-05, "loss": 0.7188, "step": 79440 }, { "epoch": 0.88, "learning_rate": 3.533878849617622e-05, "loss": 0.6928, "step": 79445 }, { "epoch": 0.88, "learning_rate": 3.533786576903771e-05, "loss": 0.687, "step": 79450 }, { "epoch": 0.88, "learning_rate": 3.53369430418992e-05, "loss": 0.7353, "step": 79455 }, { "epoch": 0.88, "learning_rate": 3.533602031476068e-05, "loss": 0.7068, "step": 79460 }, { "epoch": 0.88, "learning_rate": 3.533509758762217e-05, "loss": 0.6631, "step": 79465 }, { "epoch": 0.88, "learning_rate": 3.533417486048366e-05, "loss": 0.6428, "step": 79470 }, { "epoch": 0.88, "learning_rate": 3.533325213334515e-05, "loss": 0.7044, "step": 79475 }, { "epoch": 0.88, "learning_rate": 3.533232940620663e-05, "loss": 0.6705, "step": 79480 }, { "epoch": 0.88, "learning_rate": 3.533140667906812e-05, "loss": 0.7006, "step": 79485 }, { "epoch": 0.88, "learning_rate": 3.533048395192961e-05, "loss": 0.7395, "step": 79490 }, { "epoch": 0.88, "learning_rate": 3.5329561224791094e-05, "loss": 0.7173, "step": 79495 }, { "epoch": 0.88, "learning_rate": 3.532863849765258e-05, "loss": 0.7594, "step": 79500 }, { "epoch": 0.88, "learning_rate": 3.532771577051407e-05, "loss": 0.7053, "step": 79505 }, { "epoch": 0.88, "learning_rate": 3.5326793043375564e-05, "loss": 0.738, "step": 79510 }, { "epoch": 0.88, "learning_rate": 3.5325870316237045e-05, "loss": 0.7091, "step": 79515 }, { "epoch": 0.88, "learning_rate": 3.532494758909853e-05, "loss": 0.6912, "step": 79520 }, { "epoch": 0.88, "learning_rate": 3.532402486196002e-05, "loss": 0.6744, "step": 79525 }, { "epoch": 0.88, "learning_rate": 3.532310213482151e-05, "loss": 0.6996, "step": 79530 }, { "epoch": 0.88, "learning_rate": 3.5322179407682996e-05, "loss": 0.6799, "step": 79535 }, { "epoch": 0.88, "learning_rate": 3.5321256680544484e-05, "loss": 0.7214, "step": 79540 }, { "epoch": 0.88, "learning_rate": 3.532033395340597e-05, "loss": 0.7518, "step": 79545 }, { "epoch": 0.88, "learning_rate": 3.531941122626746e-05, "loss": 0.7017, "step": 79550 }, { "epoch": 0.88, "learning_rate": 3.531848849912895e-05, "loss": 0.6317, "step": 79555 }, { "epoch": 0.88, "learning_rate": 3.5317565771990436e-05, "loss": 0.713, "step": 79560 }, { "epoch": 0.88, "learning_rate": 3.5316643044851924e-05, "loss": 0.7007, "step": 79565 }, { "epoch": 0.88, "learning_rate": 3.5315720317713405e-05, "loss": 0.6983, "step": 79570 }, { "epoch": 0.88, "learning_rate": 3.53147975905749e-05, "loss": 0.6298, "step": 79575 }, { "epoch": 0.88, "learning_rate": 3.531387486343639e-05, "loss": 0.7656, "step": 79580 }, { "epoch": 0.88, "learning_rate": 3.5312952136297875e-05, "loss": 0.744, "step": 79585 }, { "epoch": 0.88, "learning_rate": 3.5312029409159356e-05, "loss": 0.7139, "step": 79590 }, { "epoch": 0.88, "learning_rate": 3.531110668202085e-05, "loss": 0.666, "step": 79595 }, { "epoch": 0.88, "learning_rate": 3.531018395488234e-05, "loss": 0.6861, "step": 79600 }, { "epoch": 0.88, "learning_rate": 3.530926122774382e-05, "loss": 0.7283, "step": 79605 }, { "epoch": 0.88, "learning_rate": 3.530833850060531e-05, "loss": 0.6851, "step": 79610 }, { "epoch": 0.88, "learning_rate": 3.53074157734668e-05, "loss": 0.6782, "step": 79615 }, { "epoch": 0.88, "learning_rate": 3.530649304632829e-05, "loss": 0.7285, "step": 79620 }, { "epoch": 0.88, "learning_rate": 3.530557031918977e-05, "loss": 0.7026, "step": 79625 }, { "epoch": 0.88, "learning_rate": 3.530464759205126e-05, "loss": 0.6739, "step": 79630 }, { "epoch": 0.88, "learning_rate": 3.530372486491275e-05, "loss": 0.7182, "step": 79635 }, { "epoch": 0.88, "learning_rate": 3.530280213777424e-05, "loss": 0.7013, "step": 79640 }, { "epoch": 0.88, "learning_rate": 3.530187941063572e-05, "loss": 0.7084, "step": 79645 }, { "epoch": 0.88, "learning_rate": 3.530095668349721e-05, "loss": 0.6983, "step": 79650 }, { "epoch": 0.88, "learning_rate": 3.53000339563587e-05, "loss": 0.6997, "step": 79655 }, { "epoch": 0.88, "learning_rate": 3.5299111229220186e-05, "loss": 0.67, "step": 79660 }, { "epoch": 0.88, "learning_rate": 3.5298188502081674e-05, "loss": 0.7307, "step": 79665 }, { "epoch": 0.88, "learning_rate": 3.529726577494316e-05, "loss": 0.6759, "step": 79670 }, { "epoch": 0.88, "learning_rate": 3.529634304780465e-05, "loss": 0.6811, "step": 79675 }, { "epoch": 0.88, "learning_rate": 3.529542032066614e-05, "loss": 0.7191, "step": 79680 }, { "epoch": 0.88, "learning_rate": 3.5294497593527625e-05, "loss": 0.7129, "step": 79685 }, { "epoch": 0.88, "learning_rate": 3.529357486638911e-05, "loss": 0.7125, "step": 79690 }, { "epoch": 0.88, "learning_rate": 3.52926521392506e-05, "loss": 0.6632, "step": 79695 }, { "epoch": 0.88, "learning_rate": 3.529172941211208e-05, "loss": 0.6752, "step": 79700 }, { "epoch": 0.88, "learning_rate": 3.5290806684973577e-05, "loss": 0.7119, "step": 79705 }, { "epoch": 0.88, "learning_rate": 3.5289883957835064e-05, "loss": 0.7099, "step": 79710 }, { "epoch": 0.88, "learning_rate": 3.528896123069655e-05, "loss": 0.7426, "step": 79715 }, { "epoch": 0.88, "learning_rate": 3.528803850355803e-05, "loss": 0.671, "step": 79720 }, { "epoch": 0.88, "learning_rate": 3.528711577641953e-05, "loss": 0.7029, "step": 79725 }, { "epoch": 0.88, "learning_rate": 3.5286193049281016e-05, "loss": 0.67, "step": 79730 }, { "epoch": 0.88, "learning_rate": 3.52852703221425e-05, "loss": 0.6853, "step": 79735 }, { "epoch": 0.88, "learning_rate": 3.5284347595003985e-05, "loss": 0.7151, "step": 79740 }, { "epoch": 0.88, "learning_rate": 3.528342486786548e-05, "loss": 0.7341, "step": 79745 }, { "epoch": 0.88, "learning_rate": 3.528250214072697e-05, "loss": 0.679, "step": 79750 }, { "epoch": 0.88, "learning_rate": 3.528157941358845e-05, "loss": 0.7345, "step": 79755 }, { "epoch": 0.88, "learning_rate": 3.5280656686449936e-05, "loss": 0.685, "step": 79760 }, { "epoch": 0.88, "learning_rate": 3.527973395931143e-05, "loss": 0.6947, "step": 79765 }, { "epoch": 0.88, "learning_rate": 3.527881123217291e-05, "loss": 0.7201, "step": 79770 }, { "epoch": 0.88, "learning_rate": 3.52778885050344e-05, "loss": 0.7425, "step": 79775 }, { "epoch": 0.88, "learning_rate": 3.527696577789589e-05, "loss": 0.6714, "step": 79780 }, { "epoch": 0.88, "learning_rate": 3.5276043050757375e-05, "loss": 0.7107, "step": 79785 }, { "epoch": 0.88, "learning_rate": 3.527512032361886e-05, "loss": 0.7347, "step": 79790 }, { "epoch": 0.88, "learning_rate": 3.527419759648035e-05, "loss": 0.6938, "step": 79795 }, { "epoch": 0.88, "learning_rate": 3.527327486934184e-05, "loss": 0.6955, "step": 79800 }, { "epoch": 0.88, "learning_rate": 3.527235214220333e-05, "loss": 0.7395, "step": 79805 }, { "epoch": 0.88, "learning_rate": 3.5271429415064815e-05, "loss": 0.7314, "step": 79810 }, { "epoch": 0.88, "learning_rate": 3.52705066879263e-05, "loss": 0.7015, "step": 79815 }, { "epoch": 0.88, "learning_rate": 3.526958396078779e-05, "loss": 0.7718, "step": 79820 }, { "epoch": 0.88, "learning_rate": 3.526866123364928e-05, "loss": 0.6924, "step": 79825 }, { "epoch": 0.88, "learning_rate": 3.5267738506510766e-05, "loss": 0.6259, "step": 79830 }, { "epoch": 0.88, "learning_rate": 3.5266815779372254e-05, "loss": 0.6547, "step": 79835 }, { "epoch": 0.88, "learning_rate": 3.526589305223374e-05, "loss": 0.7142, "step": 79840 }, { "epoch": 0.88, "learning_rate": 3.526497032509522e-05, "loss": 0.7437, "step": 79845 }, { "epoch": 0.88, "learning_rate": 3.526404759795671e-05, "loss": 0.7025, "step": 79850 }, { "epoch": 0.88, "learning_rate": 3.5263124870818205e-05, "loss": 0.7124, "step": 79855 }, { "epoch": 0.88, "learning_rate": 3.526220214367969e-05, "loss": 0.7708, "step": 79860 }, { "epoch": 0.88, "learning_rate": 3.5261279416541174e-05, "loss": 0.7129, "step": 79865 }, { "epoch": 0.88, "learning_rate": 3.526035668940266e-05, "loss": 0.6385, "step": 79870 }, { "epoch": 0.88, "learning_rate": 3.525943396226416e-05, "loss": 0.6997, "step": 79875 }, { "epoch": 0.88, "learning_rate": 3.525851123512564e-05, "loss": 0.6911, "step": 79880 }, { "epoch": 0.88, "learning_rate": 3.5257588507987126e-05, "loss": 0.7728, "step": 79885 }, { "epoch": 0.88, "learning_rate": 3.525666578084861e-05, "loss": 0.6603, "step": 79890 }, { "epoch": 0.88, "learning_rate": 3.525574305371011e-05, "loss": 0.7272, "step": 79895 }, { "epoch": 0.88, "learning_rate": 3.525482032657159e-05, "loss": 0.6839, "step": 79900 }, { "epoch": 0.88, "learning_rate": 3.525389759943308e-05, "loss": 0.6442, "step": 79905 }, { "epoch": 0.88, "learning_rate": 3.5252974872294565e-05, "loss": 0.7524, "step": 79910 }, { "epoch": 0.88, "learning_rate": 3.525205214515605e-05, "loss": 0.7497, "step": 79915 }, { "epoch": 0.88, "learning_rate": 3.525112941801754e-05, "loss": 0.7233, "step": 79920 }, { "epoch": 0.88, "learning_rate": 3.525020669087903e-05, "loss": 0.7772, "step": 79925 }, { "epoch": 0.89, "learning_rate": 3.5249283963740516e-05, "loss": 0.6659, "step": 79930 }, { "epoch": 0.89, "learning_rate": 3.5248361236602004e-05, "loss": 0.6468, "step": 79935 }, { "epoch": 0.89, "learning_rate": 3.524743850946349e-05, "loss": 0.6768, "step": 79940 }, { "epoch": 0.89, "learning_rate": 3.524651578232498e-05, "loss": 0.7216, "step": 79945 }, { "epoch": 0.89, "learning_rate": 3.524559305518647e-05, "loss": 0.6744, "step": 79950 }, { "epoch": 0.89, "learning_rate": 3.524467032804795e-05, "loss": 0.6669, "step": 79955 }, { "epoch": 0.89, "learning_rate": 3.524374760090944e-05, "loss": 0.6936, "step": 79960 }, { "epoch": 0.89, "learning_rate": 3.524282487377093e-05, "loss": 0.6769, "step": 79965 }, { "epoch": 0.89, "learning_rate": 3.524190214663242e-05, "loss": 0.7225, "step": 79970 }, { "epoch": 0.89, "learning_rate": 3.52409794194939e-05, "loss": 0.6364, "step": 79975 }, { "epoch": 0.89, "learning_rate": 3.5240056692355395e-05, "loss": 0.6723, "step": 79980 }, { "epoch": 0.89, "learning_rate": 3.523913396521688e-05, "loss": 0.7276, "step": 79985 }, { "epoch": 0.89, "learning_rate": 3.5238211238078364e-05, "loss": 0.6921, "step": 79990 }, { "epoch": 0.89, "learning_rate": 3.523728851093985e-05, "loss": 0.6698, "step": 79995 }, { "epoch": 0.89, "learning_rate": 3.523636578380134e-05, "loss": 0.6781, "step": 80000 }, { "epoch": 0.89, "eval_loss": 0.6900344491004944, "eval_runtime": 69.4295, "eval_samples_per_second": 28.806, "eval_steps_per_second": 14.403, "step": 80000 }, { "epoch": 0.89, "learning_rate": 3.5235443056662834e-05, "loss": 0.6876, "step": 80005 }, { "epoch": 0.89, "learning_rate": 3.5234520329524315e-05, "loss": 0.705, "step": 80010 }, { "epoch": 0.89, "learning_rate": 3.52335976023858e-05, "loss": 0.6892, "step": 80015 }, { "epoch": 0.89, "learning_rate": 3.523267487524729e-05, "loss": 0.7321, "step": 80020 }, { "epoch": 0.89, "learning_rate": 3.5231752148108785e-05, "loss": 0.699, "step": 80025 }, { "epoch": 0.89, "learning_rate": 3.5230829420970266e-05, "loss": 0.685, "step": 80030 }, { "epoch": 0.89, "learning_rate": 3.5229906693831754e-05, "loss": 0.6829, "step": 80035 }, { "epoch": 0.89, "learning_rate": 3.522898396669324e-05, "loss": 0.704, "step": 80040 }, { "epoch": 0.89, "learning_rate": 3.522806123955473e-05, "loss": 0.766, "step": 80045 }, { "epoch": 0.89, "learning_rate": 3.522713851241622e-05, "loss": 0.7126, "step": 80050 }, { "epoch": 0.89, "learning_rate": 3.5226215785277706e-05, "loss": 0.6917, "step": 80055 }, { "epoch": 0.89, "learning_rate": 3.5225293058139193e-05, "loss": 0.7027, "step": 80060 }, { "epoch": 0.89, "learning_rate": 3.522437033100068e-05, "loss": 0.6462, "step": 80065 }, { "epoch": 0.89, "learning_rate": 3.522344760386217e-05, "loss": 0.7266, "step": 80070 }, { "epoch": 0.89, "learning_rate": 3.522252487672366e-05, "loss": 0.7206, "step": 80075 }, { "epoch": 0.89, "learning_rate": 3.5221602149585145e-05, "loss": 0.6673, "step": 80080 }, { "epoch": 0.89, "learning_rate": 3.5220679422446626e-05, "loss": 0.737, "step": 80085 }, { "epoch": 0.89, "learning_rate": 3.521975669530812e-05, "loss": 0.6443, "step": 80090 }, { "epoch": 0.89, "learning_rate": 3.521883396816961e-05, "loss": 0.6766, "step": 80095 }, { "epoch": 0.89, "learning_rate": 3.5217911241031096e-05, "loss": 0.7346, "step": 80100 }, { "epoch": 0.89, "learning_rate": 3.521698851389258e-05, "loss": 0.6613, "step": 80105 }, { "epoch": 0.89, "learning_rate": 3.521606578675407e-05, "loss": 0.6823, "step": 80110 }, { "epoch": 0.89, "learning_rate": 3.521514305961556e-05, "loss": 0.6841, "step": 80115 }, { "epoch": 0.89, "learning_rate": 3.521422033247704e-05, "loss": 0.673, "step": 80120 }, { "epoch": 0.89, "learning_rate": 3.521329760533853e-05, "loss": 0.7345, "step": 80125 }, { "epoch": 0.89, "learning_rate": 3.521237487820002e-05, "loss": 0.6978, "step": 80130 }, { "epoch": 0.89, "learning_rate": 3.521145215106151e-05, "loss": 0.6853, "step": 80135 }, { "epoch": 0.89, "learning_rate": 3.521052942392299e-05, "loss": 0.7174, "step": 80140 }, { "epoch": 0.89, "learning_rate": 3.520960669678448e-05, "loss": 0.6784, "step": 80145 }, { "epoch": 0.89, "learning_rate": 3.5208683969645975e-05, "loss": 0.7259, "step": 80150 }, { "epoch": 0.89, "learning_rate": 3.5207761242507456e-05, "loss": 0.6853, "step": 80155 }, { "epoch": 0.89, "learning_rate": 3.5206838515368944e-05, "loss": 0.6803, "step": 80160 }, { "epoch": 0.89, "learning_rate": 3.520591578823043e-05, "loss": 0.7242, "step": 80165 }, { "epoch": 0.89, "learning_rate": 3.520499306109192e-05, "loss": 0.7045, "step": 80170 }, { "epoch": 0.89, "learning_rate": 3.520407033395341e-05, "loss": 0.7018, "step": 80175 }, { "epoch": 0.89, "learning_rate": 3.5203147606814895e-05, "loss": 0.7668, "step": 80180 }, { "epoch": 0.89, "learning_rate": 3.520222487967638e-05, "loss": 0.7262, "step": 80185 }, { "epoch": 0.89, "learning_rate": 3.520130215253787e-05, "loss": 0.6253, "step": 80190 }, { "epoch": 0.89, "learning_rate": 3.520037942539936e-05, "loss": 0.6693, "step": 80195 }, { "epoch": 0.89, "learning_rate": 3.5199456698260846e-05, "loss": 0.7244, "step": 80200 }, { "epoch": 0.89, "learning_rate": 3.5198533971122334e-05, "loss": 0.7104, "step": 80205 }, { "epoch": 0.89, "learning_rate": 3.519761124398382e-05, "loss": 0.6411, "step": 80210 }, { "epoch": 0.89, "learning_rate": 3.519668851684531e-05, "loss": 0.6807, "step": 80215 }, { "epoch": 0.89, "learning_rate": 3.51957657897068e-05, "loss": 0.7168, "step": 80220 }, { "epoch": 0.89, "learning_rate": 3.5194843062568286e-05, "loss": 0.7307, "step": 80225 }, { "epoch": 0.89, "learning_rate": 3.519392033542977e-05, "loss": 0.6757, "step": 80230 }, { "epoch": 0.89, "learning_rate": 3.5192997608291255e-05, "loss": 0.6659, "step": 80235 }, { "epoch": 0.89, "learning_rate": 3.519207488115275e-05, "loss": 0.7054, "step": 80240 }, { "epoch": 0.89, "learning_rate": 3.519115215401424e-05, "loss": 0.6465, "step": 80245 }, { "epoch": 0.89, "learning_rate": 3.519022942687572e-05, "loss": 0.7187, "step": 80250 }, { "epoch": 0.89, "learning_rate": 3.5189306699737206e-05, "loss": 0.7022, "step": 80255 }, { "epoch": 0.89, "learning_rate": 3.51883839725987e-05, "loss": 0.7153, "step": 80260 }, { "epoch": 0.89, "learning_rate": 3.518746124546018e-05, "loss": 0.6351, "step": 80265 }, { "epoch": 0.89, "learning_rate": 3.518653851832167e-05, "loss": 0.6662, "step": 80270 }, { "epoch": 0.89, "learning_rate": 3.518561579118316e-05, "loss": 0.7096, "step": 80275 }, { "epoch": 0.89, "learning_rate": 3.518469306404465e-05, "loss": 0.7323, "step": 80280 }, { "epoch": 0.89, "learning_rate": 3.518377033690613e-05, "loss": 0.7031, "step": 80285 }, { "epoch": 0.89, "learning_rate": 3.518284760976762e-05, "loss": 0.6698, "step": 80290 }, { "epoch": 0.89, "learning_rate": 3.518192488262911e-05, "loss": 0.6496, "step": 80295 }, { "epoch": 0.89, "learning_rate": 3.51810021554906e-05, "loss": 0.7111, "step": 80300 }, { "epoch": 0.89, "learning_rate": 3.5180079428352084e-05, "loss": 0.669, "step": 80305 }, { "epoch": 0.89, "learning_rate": 3.517915670121357e-05, "loss": 0.6779, "step": 80310 }, { "epoch": 0.89, "learning_rate": 3.517823397407506e-05, "loss": 0.6797, "step": 80315 }, { "epoch": 0.89, "learning_rate": 3.517731124693655e-05, "loss": 0.6921, "step": 80320 }, { "epoch": 0.89, "learning_rate": 3.5176388519798036e-05, "loss": 0.6779, "step": 80325 }, { "epoch": 0.89, "learning_rate": 3.5175465792659524e-05, "loss": 0.699, "step": 80330 }, { "epoch": 0.89, "learning_rate": 3.517454306552101e-05, "loss": 0.7258, "step": 80335 }, { "epoch": 0.89, "learning_rate": 3.517362033838249e-05, "loss": 0.7035, "step": 80340 }, { "epoch": 0.89, "learning_rate": 3.517269761124399e-05, "loss": 0.685, "step": 80345 }, { "epoch": 0.89, "learning_rate": 3.5171774884105475e-05, "loss": 0.7573, "step": 80350 }, { "epoch": 0.89, "learning_rate": 3.517085215696696e-05, "loss": 0.7356, "step": 80355 }, { "epoch": 0.89, "learning_rate": 3.5169929429828444e-05, "loss": 0.694, "step": 80360 }, { "epoch": 0.89, "learning_rate": 3.516900670268994e-05, "loss": 0.8, "step": 80365 }, { "epoch": 0.89, "learning_rate": 3.5168083975551427e-05, "loss": 0.6731, "step": 80370 }, { "epoch": 0.89, "learning_rate": 3.516716124841291e-05, "loss": 0.7273, "step": 80375 }, { "epoch": 0.89, "learning_rate": 3.5166238521274395e-05, "loss": 0.659, "step": 80380 }, { "epoch": 0.89, "learning_rate": 3.516531579413588e-05, "loss": 0.6698, "step": 80385 }, { "epoch": 0.89, "learning_rate": 3.516439306699738e-05, "loss": 0.7036, "step": 80390 }, { "epoch": 0.89, "learning_rate": 3.516347033985886e-05, "loss": 0.6769, "step": 80395 }, { "epoch": 0.89, "learning_rate": 3.516254761272035e-05, "loss": 0.7322, "step": 80400 }, { "epoch": 0.89, "learning_rate": 3.5161624885581835e-05, "loss": 0.7056, "step": 80405 }, { "epoch": 0.89, "learning_rate": 3.516070215844333e-05, "loss": 0.7349, "step": 80410 }, { "epoch": 0.89, "learning_rate": 3.515977943130481e-05, "loss": 0.6844, "step": 80415 }, { "epoch": 0.89, "learning_rate": 3.51588567041663e-05, "loss": 0.6805, "step": 80420 }, { "epoch": 0.89, "learning_rate": 3.5157933977027786e-05, "loss": 0.6587, "step": 80425 }, { "epoch": 0.89, "learning_rate": 3.5157011249889274e-05, "loss": 0.7416, "step": 80430 }, { "epoch": 0.89, "learning_rate": 3.515608852275076e-05, "loss": 0.659, "step": 80435 }, { "epoch": 0.89, "learning_rate": 3.515516579561225e-05, "loss": 0.6984, "step": 80440 }, { "epoch": 0.89, "learning_rate": 3.515424306847374e-05, "loss": 0.6946, "step": 80445 }, { "epoch": 0.89, "learning_rate": 3.5153320341335225e-05, "loss": 0.6894, "step": 80450 }, { "epoch": 0.89, "learning_rate": 3.515239761419671e-05, "loss": 0.7186, "step": 80455 }, { "epoch": 0.89, "learning_rate": 3.51514748870582e-05, "loss": 0.6765, "step": 80460 }, { "epoch": 0.89, "learning_rate": 3.515055215991969e-05, "loss": 0.607, "step": 80465 }, { "epoch": 0.89, "learning_rate": 3.514962943278117e-05, "loss": 0.6816, "step": 80470 }, { "epoch": 0.89, "learning_rate": 3.5148706705642665e-05, "loss": 0.63, "step": 80475 }, { "epoch": 0.89, "learning_rate": 3.514778397850415e-05, "loss": 0.7143, "step": 80480 }, { "epoch": 0.89, "learning_rate": 3.514686125136564e-05, "loss": 0.6487, "step": 80485 }, { "epoch": 0.89, "learning_rate": 3.514593852422712e-05, "loss": 0.7292, "step": 80490 }, { "epoch": 0.89, "learning_rate": 3.5145015797088616e-05, "loss": 0.673, "step": 80495 }, { "epoch": 0.89, "learning_rate": 3.5144093069950104e-05, "loss": 0.6994, "step": 80500 }, { "epoch": 0.89, "learning_rate": 3.5143170342811585e-05, "loss": 0.654, "step": 80505 }, { "epoch": 0.89, "learning_rate": 3.514224761567307e-05, "loss": 0.6794, "step": 80510 }, { "epoch": 0.89, "learning_rate": 3.514132488853457e-05, "loss": 0.7196, "step": 80515 }, { "epoch": 0.89, "learning_rate": 3.5140402161396055e-05, "loss": 0.6867, "step": 80520 }, { "epoch": 0.89, "learning_rate": 3.5139479434257536e-05, "loss": 0.6674, "step": 80525 }, { "epoch": 0.89, "learning_rate": 3.5138556707119024e-05, "loss": 0.695, "step": 80530 }, { "epoch": 0.89, "learning_rate": 3.513763397998051e-05, "loss": 0.7005, "step": 80535 }, { "epoch": 0.89, "learning_rate": 3.5136711252842e-05, "loss": 0.7133, "step": 80540 }, { "epoch": 0.89, "learning_rate": 3.513578852570349e-05, "loss": 0.6923, "step": 80545 }, { "epoch": 0.89, "learning_rate": 3.5134865798564976e-05, "loss": 0.6938, "step": 80550 }, { "epoch": 0.89, "learning_rate": 3.513394307142646e-05, "loss": 0.6637, "step": 80555 }, { "epoch": 0.89, "learning_rate": 3.513302034428795e-05, "loss": 0.7259, "step": 80560 }, { "epoch": 0.89, "learning_rate": 3.513209761714944e-05, "loss": 0.6678, "step": 80565 }, { "epoch": 0.89, "learning_rate": 3.513117489001093e-05, "loss": 0.771, "step": 80570 }, { "epoch": 0.89, "learning_rate": 3.5130252162872415e-05, "loss": 0.6598, "step": 80575 }, { "epoch": 0.89, "learning_rate": 3.51293294357339e-05, "loss": 0.7445, "step": 80580 }, { "epoch": 0.89, "learning_rate": 3.512840670859539e-05, "loss": 0.6967, "step": 80585 }, { "epoch": 0.89, "learning_rate": 3.512748398145688e-05, "loss": 0.6481, "step": 80590 }, { "epoch": 0.89, "learning_rate": 3.5126561254318366e-05, "loss": 0.7249, "step": 80595 }, { "epoch": 0.89, "learning_rate": 3.5125638527179854e-05, "loss": 0.681, "step": 80600 }, { "epoch": 0.89, "learning_rate": 3.512471580004134e-05, "loss": 0.6771, "step": 80605 }, { "epoch": 0.89, "learning_rate": 3.512379307290283e-05, "loss": 0.7462, "step": 80610 }, { "epoch": 0.89, "learning_rate": 3.512287034576431e-05, "loss": 0.7081, "step": 80615 }, { "epoch": 0.89, "learning_rate": 3.51219476186258e-05, "loss": 0.7426, "step": 80620 }, { "epoch": 0.89, "learning_rate": 3.512102489148729e-05, "loss": 0.7344, "step": 80625 }, { "epoch": 0.89, "learning_rate": 3.512010216434878e-05, "loss": 0.6856, "step": 80630 }, { "epoch": 0.89, "learning_rate": 3.511917943721026e-05, "loss": 0.6783, "step": 80635 }, { "epoch": 0.89, "learning_rate": 3.511825671007175e-05, "loss": 0.6977, "step": 80640 }, { "epoch": 0.89, "learning_rate": 3.5117333982933245e-05, "loss": 0.6979, "step": 80645 }, { "epoch": 0.89, "learning_rate": 3.5116411255794726e-05, "loss": 0.6684, "step": 80650 }, { "epoch": 0.89, "learning_rate": 3.5115488528656214e-05, "loss": 0.6863, "step": 80655 }, { "epoch": 0.89, "learning_rate": 3.51145658015177e-05, "loss": 0.7058, "step": 80660 }, { "epoch": 0.89, "learning_rate": 3.5113643074379196e-05, "loss": 0.7565, "step": 80665 }, { "epoch": 0.89, "learning_rate": 3.511272034724068e-05, "loss": 0.688, "step": 80670 }, { "epoch": 0.89, "learning_rate": 3.5111797620102165e-05, "loss": 0.6773, "step": 80675 }, { "epoch": 0.89, "learning_rate": 3.511087489296365e-05, "loss": 0.6699, "step": 80680 }, { "epoch": 0.89, "learning_rate": 3.510995216582514e-05, "loss": 0.7712, "step": 80685 }, { "epoch": 0.89, "learning_rate": 3.510902943868663e-05, "loss": 0.7732, "step": 80690 }, { "epoch": 0.89, "learning_rate": 3.5108106711548116e-05, "loss": 0.6992, "step": 80695 }, { "epoch": 0.89, "learning_rate": 3.5107183984409604e-05, "loss": 0.6772, "step": 80700 }, { "epoch": 0.89, "learning_rate": 3.510626125727109e-05, "loss": 0.6464, "step": 80705 }, { "epoch": 0.89, "learning_rate": 3.510533853013258e-05, "loss": 0.6581, "step": 80710 }, { "epoch": 0.89, "learning_rate": 3.510441580299407e-05, "loss": 0.6683, "step": 80715 }, { "epoch": 0.89, "learning_rate": 3.5103493075855556e-05, "loss": 0.7274, "step": 80720 }, { "epoch": 0.89, "learning_rate": 3.510257034871704e-05, "loss": 0.6495, "step": 80725 }, { "epoch": 0.89, "learning_rate": 3.510164762157853e-05, "loss": 0.6713, "step": 80730 }, { "epoch": 0.89, "learning_rate": 3.510072489444002e-05, "loss": 0.677, "step": 80735 }, { "epoch": 0.89, "learning_rate": 3.509980216730151e-05, "loss": 0.7188, "step": 80740 }, { "epoch": 0.89, "learning_rate": 3.509887944016299e-05, "loss": 0.6745, "step": 80745 }, { "epoch": 0.89, "learning_rate": 3.509795671302448e-05, "loss": 0.7682, "step": 80750 }, { "epoch": 0.89, "learning_rate": 3.509703398588597e-05, "loss": 0.6384, "step": 80755 }, { "epoch": 0.89, "learning_rate": 3.509611125874745e-05, "loss": 0.7038, "step": 80760 }, { "epoch": 0.89, "learning_rate": 3.509518853160894e-05, "loss": 0.6724, "step": 80765 }, { "epoch": 0.89, "learning_rate": 3.509426580447043e-05, "loss": 0.7632, "step": 80770 }, { "epoch": 0.89, "learning_rate": 3.509334307733192e-05, "loss": 0.7248, "step": 80775 }, { "epoch": 0.89, "learning_rate": 3.50924203501934e-05, "loss": 0.667, "step": 80780 }, { "epoch": 0.89, "learning_rate": 3.509149762305489e-05, "loss": 0.7163, "step": 80785 }, { "epoch": 0.89, "learning_rate": 3.509057489591638e-05, "loss": 0.7359, "step": 80790 }, { "epoch": 0.89, "learning_rate": 3.508965216877787e-05, "loss": 0.6802, "step": 80795 }, { "epoch": 0.89, "learning_rate": 3.5088729441639354e-05, "loss": 0.7916, "step": 80800 }, { "epoch": 0.89, "learning_rate": 3.508780671450084e-05, "loss": 0.6909, "step": 80805 }, { "epoch": 0.89, "learning_rate": 3.508688398736233e-05, "loss": 0.6879, "step": 80810 }, { "epoch": 0.89, "learning_rate": 3.508596126022382e-05, "loss": 0.641, "step": 80815 }, { "epoch": 0.89, "learning_rate": 3.5085038533085306e-05, "loss": 0.7979, "step": 80820 }, { "epoch": 0.89, "learning_rate": 3.5084115805946794e-05, "loss": 0.705, "step": 80825 }, { "epoch": 0.9, "learning_rate": 3.508319307880828e-05, "loss": 0.68, "step": 80830 }, { "epoch": 0.9, "learning_rate": 3.508227035166976e-05, "loss": 0.7213, "step": 80835 }, { "epoch": 0.9, "learning_rate": 3.508134762453126e-05, "loss": 0.6772, "step": 80840 }, { "epoch": 0.9, "learning_rate": 3.5080424897392745e-05, "loss": 0.671, "step": 80845 }, { "epoch": 0.9, "learning_rate": 3.507950217025423e-05, "loss": 0.7242, "step": 80850 }, { "epoch": 0.9, "learning_rate": 3.5078579443115714e-05, "loss": 0.7152, "step": 80855 }, { "epoch": 0.9, "learning_rate": 3.507765671597721e-05, "loss": 0.6717, "step": 80860 }, { "epoch": 0.9, "learning_rate": 3.5076733988838696e-05, "loss": 0.6737, "step": 80865 }, { "epoch": 0.9, "learning_rate": 3.5075811261700184e-05, "loss": 0.6531, "step": 80870 }, { "epoch": 0.9, "learning_rate": 3.5074888534561665e-05, "loss": 0.7682, "step": 80875 }, { "epoch": 0.9, "learning_rate": 3.507396580742316e-05, "loss": 0.6522, "step": 80880 }, { "epoch": 0.9, "learning_rate": 3.507304308028465e-05, "loss": 0.7562, "step": 80885 }, { "epoch": 0.9, "learning_rate": 3.507212035314613e-05, "loss": 0.6946, "step": 80890 }, { "epoch": 0.9, "learning_rate": 3.507119762600762e-05, "loss": 0.6622, "step": 80895 }, { "epoch": 0.9, "learning_rate": 3.507027489886911e-05, "loss": 0.7366, "step": 80900 }, { "epoch": 0.9, "learning_rate": 3.50693521717306e-05, "loss": 0.6881, "step": 80905 }, { "epoch": 0.9, "learning_rate": 3.506842944459208e-05, "loss": 0.7319, "step": 80910 }, { "epoch": 0.9, "learning_rate": 3.506750671745357e-05, "loss": 0.7089, "step": 80915 }, { "epoch": 0.9, "learning_rate": 3.5066583990315056e-05, "loss": 0.703, "step": 80920 }, { "epoch": 0.9, "learning_rate": 3.5065661263176544e-05, "loss": 0.7154, "step": 80925 }, { "epoch": 0.9, "learning_rate": 3.506473853603803e-05, "loss": 0.6741, "step": 80930 }, { "epoch": 0.9, "learning_rate": 3.506381580889952e-05, "loss": 0.6542, "step": 80935 }, { "epoch": 0.9, "learning_rate": 3.506289308176101e-05, "loss": 0.7511, "step": 80940 }, { "epoch": 0.9, "learning_rate": 3.5061970354622495e-05, "loss": 0.686, "step": 80945 }, { "epoch": 0.9, "learning_rate": 3.506104762748398e-05, "loss": 0.6744, "step": 80950 }, { "epoch": 0.9, "learning_rate": 3.506012490034547e-05, "loss": 0.6893, "step": 80955 }, { "epoch": 0.9, "learning_rate": 3.505920217320696e-05, "loss": 0.6324, "step": 80960 }, { "epoch": 0.9, "learning_rate": 3.505827944606845e-05, "loss": 0.683, "step": 80965 }, { "epoch": 0.9, "learning_rate": 3.5057356718929934e-05, "loss": 0.6997, "step": 80970 }, { "epoch": 0.9, "learning_rate": 3.505643399179142e-05, "loss": 0.7213, "step": 80975 }, { "epoch": 0.9, "learning_rate": 3.505551126465291e-05, "loss": 0.6968, "step": 80980 }, { "epoch": 0.9, "learning_rate": 3.50545885375144e-05, "loss": 0.7088, "step": 80985 }, { "epoch": 0.9, "learning_rate": 3.5053665810375886e-05, "loss": 0.7164, "step": 80990 }, { "epoch": 0.9, "learning_rate": 3.5052743083237374e-05, "loss": 0.7123, "step": 80995 }, { "epoch": 0.9, "learning_rate": 3.5051820356098855e-05, "loss": 0.65, "step": 81000 }, { "epoch": 0.9, "eval_loss": 0.6834558844566345, "eval_runtime": 69.2384, "eval_samples_per_second": 28.886, "eval_steps_per_second": 14.443, "step": 81000 }, { "epoch": 0.9, "learning_rate": 3.505089762896034e-05, "loss": 0.6454, "step": 81005 }, { "epoch": 0.9, "learning_rate": 3.504997490182184e-05, "loss": 0.6995, "step": 81010 }, { "epoch": 0.9, "learning_rate": 3.5049052174683325e-05, "loss": 0.7405, "step": 81015 }, { "epoch": 0.9, "learning_rate": 3.5048129447544806e-05, "loss": 0.6716, "step": 81020 }, { "epoch": 0.9, "learning_rate": 3.5047206720406294e-05, "loss": 0.6945, "step": 81025 }, { "epoch": 0.9, "learning_rate": 3.504628399326779e-05, "loss": 0.7378, "step": 81030 }, { "epoch": 0.9, "learning_rate": 3.504536126612927e-05, "loss": 0.6914, "step": 81035 }, { "epoch": 0.9, "learning_rate": 3.504443853899076e-05, "loss": 0.6845, "step": 81040 }, { "epoch": 0.9, "learning_rate": 3.5043515811852245e-05, "loss": 0.7574, "step": 81045 }, { "epoch": 0.9, "learning_rate": 3.504259308471374e-05, "loss": 0.6438, "step": 81050 }, { "epoch": 0.9, "learning_rate": 3.504167035757522e-05, "loss": 0.7136, "step": 81055 }, { "epoch": 0.9, "learning_rate": 3.504074763043671e-05, "loss": 0.7103, "step": 81060 }, { "epoch": 0.9, "learning_rate": 3.50398249032982e-05, "loss": 0.722, "step": 81065 }, { "epoch": 0.9, "learning_rate": 3.5038902176159685e-05, "loss": 0.6432, "step": 81070 }, { "epoch": 0.9, "learning_rate": 3.503797944902117e-05, "loss": 0.7152, "step": 81075 }, { "epoch": 0.9, "learning_rate": 3.503705672188266e-05, "loss": 0.7176, "step": 81080 }, { "epoch": 0.9, "learning_rate": 3.503613399474415e-05, "loss": 0.6728, "step": 81085 }, { "epoch": 0.9, "learning_rate": 3.5035211267605636e-05, "loss": 0.7005, "step": 81090 }, { "epoch": 0.9, "learning_rate": 3.5034288540467124e-05, "loss": 0.7336, "step": 81095 }, { "epoch": 0.9, "learning_rate": 3.503336581332861e-05, "loss": 0.6849, "step": 81100 }, { "epoch": 0.9, "learning_rate": 3.50324430861901e-05, "loss": 0.714, "step": 81105 }, { "epoch": 0.9, "learning_rate": 3.503152035905158e-05, "loss": 0.6855, "step": 81110 }, { "epoch": 0.9, "learning_rate": 3.5030597631913075e-05, "loss": 0.7124, "step": 81115 }, { "epoch": 0.9, "learning_rate": 3.502967490477456e-05, "loss": 0.723, "step": 81120 }, { "epoch": 0.9, "learning_rate": 3.502875217763605e-05, "loss": 0.6967, "step": 81125 }, { "epoch": 0.9, "learning_rate": 3.502782945049753e-05, "loss": 0.6878, "step": 81130 }, { "epoch": 0.9, "learning_rate": 3.502690672335903e-05, "loss": 0.7353, "step": 81135 }, { "epoch": 0.9, "learning_rate": 3.5025983996220515e-05, "loss": 0.7067, "step": 81140 }, { "epoch": 0.9, "learning_rate": 3.5025061269082e-05, "loss": 0.7192, "step": 81145 }, { "epoch": 0.9, "learning_rate": 3.5024138541943483e-05, "loss": 0.661, "step": 81150 }, { "epoch": 0.9, "learning_rate": 3.502321581480497e-05, "loss": 0.6898, "step": 81155 }, { "epoch": 0.9, "learning_rate": 3.5022293087666466e-05, "loss": 0.6988, "step": 81160 }, { "epoch": 0.9, "learning_rate": 3.502137036052795e-05, "loss": 0.6727, "step": 81165 }, { "epoch": 0.9, "learning_rate": 3.5020447633389435e-05, "loss": 0.6695, "step": 81170 }, { "epoch": 0.9, "learning_rate": 3.501952490625092e-05, "loss": 0.6982, "step": 81175 }, { "epoch": 0.9, "learning_rate": 3.501860217911242e-05, "loss": 0.691, "step": 81180 }, { "epoch": 0.9, "learning_rate": 3.50176794519739e-05, "loss": 0.698, "step": 81185 }, { "epoch": 0.9, "learning_rate": 3.5016756724835386e-05, "loss": 0.7167, "step": 81190 }, { "epoch": 0.9, "learning_rate": 3.5015833997696874e-05, "loss": 0.6748, "step": 81195 }, { "epoch": 0.9, "learning_rate": 3.501491127055836e-05, "loss": 0.6459, "step": 81200 }, { "epoch": 0.9, "learning_rate": 3.501398854341985e-05, "loss": 0.6947, "step": 81205 }, { "epoch": 0.9, "learning_rate": 3.501306581628134e-05, "loss": 0.6393, "step": 81210 }, { "epoch": 0.9, "learning_rate": 3.5012143089142826e-05, "loss": 0.6786, "step": 81215 }, { "epoch": 0.9, "learning_rate": 3.501122036200431e-05, "loss": 0.6993, "step": 81220 }, { "epoch": 0.9, "learning_rate": 3.50102976348658e-05, "loss": 0.7155, "step": 81225 }, { "epoch": 0.9, "learning_rate": 3.500937490772729e-05, "loss": 0.6621, "step": 81230 }, { "epoch": 0.9, "learning_rate": 3.500845218058878e-05, "loss": 0.6928, "step": 81235 }, { "epoch": 0.9, "learning_rate": 3.500752945345026e-05, "loss": 0.6384, "step": 81240 }, { "epoch": 0.9, "learning_rate": 3.500660672631175e-05, "loss": 0.7102, "step": 81245 }, { "epoch": 0.9, "learning_rate": 3.500568399917324e-05, "loss": 0.7018, "step": 81250 }, { "epoch": 0.9, "learning_rate": 3.500476127203473e-05, "loss": 0.6427, "step": 81255 }, { "epoch": 0.9, "learning_rate": 3.500383854489621e-05, "loss": 0.7047, "step": 81260 }, { "epoch": 0.9, "learning_rate": 3.5002915817757704e-05, "loss": 0.6907, "step": 81265 }, { "epoch": 0.9, "learning_rate": 3.500199309061919e-05, "loss": 0.7235, "step": 81270 }, { "epoch": 0.9, "learning_rate": 3.500107036348067e-05, "loss": 0.6956, "step": 81275 }, { "epoch": 0.9, "learning_rate": 3.500014763634216e-05, "loss": 0.6408, "step": 81280 }, { "epoch": 0.9, "learning_rate": 3.4999224909203655e-05, "loss": 0.6447, "step": 81285 }, { "epoch": 0.9, "learning_rate": 3.499830218206514e-05, "loss": 0.7409, "step": 81290 }, { "epoch": 0.9, "learning_rate": 3.4997379454926624e-05, "loss": 0.661, "step": 81295 }, { "epoch": 0.9, "learning_rate": 3.499645672778811e-05, "loss": 0.6928, "step": 81300 }, { "epoch": 0.9, "learning_rate": 3.49955340006496e-05, "loss": 0.6766, "step": 81305 }, { "epoch": 0.9, "learning_rate": 3.499461127351109e-05, "loss": 0.6519, "step": 81310 }, { "epoch": 0.9, "learning_rate": 3.4993688546372576e-05, "loss": 0.6867, "step": 81315 }, { "epoch": 0.9, "learning_rate": 3.4992765819234064e-05, "loss": 0.6769, "step": 81320 }, { "epoch": 0.9, "learning_rate": 3.499184309209555e-05, "loss": 0.6971, "step": 81325 }, { "epoch": 0.9, "learning_rate": 3.499092036495704e-05, "loss": 0.6765, "step": 81330 }, { "epoch": 0.9, "learning_rate": 3.498999763781853e-05, "loss": 0.7179, "step": 81335 }, { "epoch": 0.9, "learning_rate": 3.4989074910680015e-05, "loss": 0.6223, "step": 81340 }, { "epoch": 0.9, "learning_rate": 3.49881521835415e-05, "loss": 0.7538, "step": 81345 }, { "epoch": 0.9, "learning_rate": 3.498722945640299e-05, "loss": 0.6857, "step": 81350 }, { "epoch": 0.9, "learning_rate": 3.498630672926448e-05, "loss": 0.6779, "step": 81355 }, { "epoch": 0.9, "learning_rate": 3.4985384002125966e-05, "loss": 0.7456, "step": 81360 }, { "epoch": 0.9, "learning_rate": 3.4984461274987454e-05, "loss": 0.7703, "step": 81365 }, { "epoch": 0.9, "learning_rate": 3.4983538547848935e-05, "loss": 0.7464, "step": 81370 }, { "epoch": 0.9, "learning_rate": 3.498261582071043e-05, "loss": 0.7019, "step": 81375 }, { "epoch": 0.9, "learning_rate": 3.498169309357192e-05, "loss": 0.6742, "step": 81380 }, { "epoch": 0.9, "learning_rate": 3.49807703664334e-05, "loss": 0.7096, "step": 81385 }, { "epoch": 0.9, "learning_rate": 3.497984763929489e-05, "loss": 0.7748, "step": 81390 }, { "epoch": 0.9, "learning_rate": 3.497892491215638e-05, "loss": 0.6357, "step": 81395 }, { "epoch": 0.9, "learning_rate": 3.497800218501787e-05, "loss": 0.6771, "step": 81400 }, { "epoch": 0.9, "learning_rate": 3.497707945787935e-05, "loss": 0.6838, "step": 81405 }, { "epoch": 0.9, "learning_rate": 3.497615673074084e-05, "loss": 0.7969, "step": 81410 }, { "epoch": 0.9, "learning_rate": 3.497523400360233e-05, "loss": 0.6436, "step": 81415 }, { "epoch": 0.9, "learning_rate": 3.4974311276463814e-05, "loss": 0.715, "step": 81420 }, { "epoch": 0.9, "learning_rate": 3.49733885493253e-05, "loss": 0.7207, "step": 81425 }, { "epoch": 0.9, "learning_rate": 3.497246582218679e-05, "loss": 0.6934, "step": 81430 }, { "epoch": 0.9, "learning_rate": 3.4971543095048284e-05, "loss": 0.6996, "step": 81435 }, { "epoch": 0.9, "learning_rate": 3.4970620367909765e-05, "loss": 0.6937, "step": 81440 }, { "epoch": 0.9, "learning_rate": 3.496969764077125e-05, "loss": 0.7004, "step": 81445 }, { "epoch": 0.9, "learning_rate": 3.496877491363274e-05, "loss": 0.6694, "step": 81450 }, { "epoch": 0.9, "learning_rate": 3.496785218649423e-05, "loss": 0.6996, "step": 81455 }, { "epoch": 0.9, "learning_rate": 3.4966929459355717e-05, "loss": 0.6605, "step": 81460 }, { "epoch": 0.9, "learning_rate": 3.4966006732217204e-05, "loss": 0.7155, "step": 81465 }, { "epoch": 0.9, "learning_rate": 3.496508400507869e-05, "loss": 0.7645, "step": 81470 }, { "epoch": 0.9, "learning_rate": 3.496416127794018e-05, "loss": 0.716, "step": 81475 }, { "epoch": 0.9, "learning_rate": 3.496323855080167e-05, "loss": 0.6855, "step": 81480 }, { "epoch": 0.9, "learning_rate": 3.4962315823663156e-05, "loss": 0.7236, "step": 81485 }, { "epoch": 0.9, "learning_rate": 3.4961393096524644e-05, "loss": 0.7077, "step": 81490 }, { "epoch": 0.9, "learning_rate": 3.4960470369386125e-05, "loss": 0.6916, "step": 81495 }, { "epoch": 0.9, "learning_rate": 3.495954764224762e-05, "loss": 0.6778, "step": 81500 }, { "epoch": 0.9, "learning_rate": 3.495862491510911e-05, "loss": 0.6965, "step": 81505 }, { "epoch": 0.9, "learning_rate": 3.4957702187970595e-05, "loss": 0.6916, "step": 81510 }, { "epoch": 0.9, "learning_rate": 3.4956779460832076e-05, "loss": 0.729, "step": 81515 }, { "epoch": 0.9, "learning_rate": 3.4955856733693564e-05, "loss": 0.7185, "step": 81520 }, { "epoch": 0.9, "learning_rate": 3.495493400655506e-05, "loss": 0.6472, "step": 81525 }, { "epoch": 0.9, "learning_rate": 3.4954011279416546e-05, "loss": 0.6923, "step": 81530 }, { "epoch": 0.9, "learning_rate": 3.495308855227803e-05, "loss": 0.7113, "step": 81535 }, { "epoch": 0.9, "learning_rate": 3.4952165825139515e-05, "loss": 0.7617, "step": 81540 }, { "epoch": 0.9, "learning_rate": 3.495124309800101e-05, "loss": 0.7938, "step": 81545 }, { "epoch": 0.9, "learning_rate": 3.495032037086249e-05, "loss": 0.6998, "step": 81550 }, { "epoch": 0.9, "learning_rate": 3.494939764372398e-05, "loss": 0.682, "step": 81555 }, { "epoch": 0.9, "learning_rate": 3.494847491658547e-05, "loss": 0.6784, "step": 81560 }, { "epoch": 0.9, "learning_rate": 3.494755218944696e-05, "loss": 0.675, "step": 81565 }, { "epoch": 0.9, "learning_rate": 3.494662946230844e-05, "loss": 0.7346, "step": 81570 }, { "epoch": 0.9, "learning_rate": 3.494570673516993e-05, "loss": 0.6715, "step": 81575 }, { "epoch": 0.9, "learning_rate": 3.494478400803142e-05, "loss": 0.6967, "step": 81580 }, { "epoch": 0.9, "learning_rate": 3.4943861280892906e-05, "loss": 0.6907, "step": 81585 }, { "epoch": 0.9, "learning_rate": 3.4942938553754394e-05, "loss": 0.6991, "step": 81590 }, { "epoch": 0.9, "learning_rate": 3.494201582661588e-05, "loss": 0.7135, "step": 81595 }, { "epoch": 0.9, "learning_rate": 3.494109309947737e-05, "loss": 0.7119, "step": 81600 }, { "epoch": 0.9, "learning_rate": 3.494017037233886e-05, "loss": 0.6639, "step": 81605 }, { "epoch": 0.9, "learning_rate": 3.4939247645200345e-05, "loss": 0.6551, "step": 81610 }, { "epoch": 0.9, "learning_rate": 3.493832491806183e-05, "loss": 0.6659, "step": 81615 }, { "epoch": 0.9, "learning_rate": 3.493740219092332e-05, "loss": 0.6492, "step": 81620 }, { "epoch": 0.9, "learning_rate": 3.49364794637848e-05, "loss": 0.7234, "step": 81625 }, { "epoch": 0.9, "learning_rate": 3.49355567366463e-05, "loss": 0.675, "step": 81630 }, { "epoch": 0.9, "learning_rate": 3.4934634009507784e-05, "loss": 0.6369, "step": 81635 }, { "epoch": 0.9, "learning_rate": 3.493371128236927e-05, "loss": 0.6417, "step": 81640 }, { "epoch": 0.9, "learning_rate": 3.4932788555230753e-05, "loss": 0.6048, "step": 81645 }, { "epoch": 0.9, "learning_rate": 3.493186582809225e-05, "loss": 0.6641, "step": 81650 }, { "epoch": 0.9, "learning_rate": 3.4930943100953736e-05, "loss": 0.7161, "step": 81655 }, { "epoch": 0.9, "learning_rate": 3.493002037381522e-05, "loss": 0.6602, "step": 81660 }, { "epoch": 0.9, "learning_rate": 3.4929097646676705e-05, "loss": 0.7601, "step": 81665 }, { "epoch": 0.9, "learning_rate": 3.492817491953819e-05, "loss": 0.7053, "step": 81670 }, { "epoch": 0.9, "learning_rate": 3.492725219239969e-05, "loss": 0.73, "step": 81675 }, { "epoch": 0.9, "learning_rate": 3.492632946526117e-05, "loss": 0.6802, "step": 81680 }, { "epoch": 0.9, "learning_rate": 3.4925406738122656e-05, "loss": 0.7673, "step": 81685 }, { "epoch": 0.9, "learning_rate": 3.4924484010984144e-05, "loss": 0.6782, "step": 81690 }, { "epoch": 0.9, "learning_rate": 3.492356128384563e-05, "loss": 0.6488, "step": 81695 }, { "epoch": 0.9, "learning_rate": 3.492263855670712e-05, "loss": 0.7154, "step": 81700 }, { "epoch": 0.9, "learning_rate": 3.492171582956861e-05, "loss": 0.7705, "step": 81705 }, { "epoch": 0.9, "learning_rate": 3.4920793102430095e-05, "loss": 0.7129, "step": 81710 }, { "epoch": 0.9, "learning_rate": 3.491987037529158e-05, "loss": 0.6998, "step": 81715 }, { "epoch": 0.9, "learning_rate": 3.491894764815307e-05, "loss": 0.6582, "step": 81720 }, { "epoch": 0.9, "learning_rate": 3.491802492101456e-05, "loss": 0.7218, "step": 81725 }, { "epoch": 0.9, "learning_rate": 3.491710219387605e-05, "loss": 0.6635, "step": 81730 }, { "epoch": 0.91, "learning_rate": 3.4916179466737535e-05, "loss": 0.6629, "step": 81735 }, { "epoch": 0.91, "learning_rate": 3.491525673959902e-05, "loss": 0.7012, "step": 81740 }, { "epoch": 0.91, "learning_rate": 3.491433401246051e-05, "loss": 0.7065, "step": 81745 }, { "epoch": 0.91, "learning_rate": 3.4913411285322e-05, "loss": 0.7398, "step": 81750 }, { "epoch": 0.91, "learning_rate": 3.491248855818348e-05, "loss": 0.7385, "step": 81755 }, { "epoch": 0.91, "learning_rate": 3.4911565831044974e-05, "loss": 0.6628, "step": 81760 }, { "epoch": 0.91, "learning_rate": 3.491064310390646e-05, "loss": 0.6983, "step": 81765 }, { "epoch": 0.91, "learning_rate": 3.490972037676794e-05, "loss": 0.6754, "step": 81770 }, { "epoch": 0.91, "learning_rate": 3.490879764962943e-05, "loss": 0.7108, "step": 81775 }, { "epoch": 0.91, "learning_rate": 3.4907874922490925e-05, "loss": 0.6319, "step": 81780 }, { "epoch": 0.91, "learning_rate": 3.490695219535241e-05, "loss": 0.7668, "step": 81785 }, { "epoch": 0.91, "learning_rate": 3.4906029468213894e-05, "loss": 0.6825, "step": 81790 }, { "epoch": 0.91, "learning_rate": 3.490510674107538e-05, "loss": 0.7872, "step": 81795 }, { "epoch": 0.91, "learning_rate": 3.490418401393688e-05, "loss": 0.6592, "step": 81800 }, { "epoch": 0.91, "learning_rate": 3.490326128679836e-05, "loss": 0.6735, "step": 81805 }, { "epoch": 0.91, "learning_rate": 3.4902338559659846e-05, "loss": 0.6996, "step": 81810 }, { "epoch": 0.91, "learning_rate": 3.4901415832521333e-05, "loss": 0.7052, "step": 81815 }, { "epoch": 0.91, "learning_rate": 3.490049310538283e-05, "loss": 0.6836, "step": 81820 }, { "epoch": 0.91, "learning_rate": 3.489957037824431e-05, "loss": 0.6905, "step": 81825 }, { "epoch": 0.91, "learning_rate": 3.48986476511058e-05, "loss": 0.6589, "step": 81830 }, { "epoch": 0.91, "learning_rate": 3.4897724923967285e-05, "loss": 0.7256, "step": 81835 }, { "epoch": 0.91, "learning_rate": 3.489680219682877e-05, "loss": 0.728, "step": 81840 }, { "epoch": 0.91, "learning_rate": 3.489587946969026e-05, "loss": 0.7017, "step": 81845 }, { "epoch": 0.91, "learning_rate": 3.489495674255175e-05, "loss": 0.6502, "step": 81850 }, { "epoch": 0.91, "learning_rate": 3.4894034015413236e-05, "loss": 0.712, "step": 81855 }, { "epoch": 0.91, "learning_rate": 3.4893111288274724e-05, "loss": 0.6627, "step": 81860 }, { "epoch": 0.91, "learning_rate": 3.489218856113621e-05, "loss": 0.7172, "step": 81865 }, { "epoch": 0.91, "learning_rate": 3.48912658339977e-05, "loss": 0.6746, "step": 81870 }, { "epoch": 0.91, "learning_rate": 3.489034310685919e-05, "loss": 0.6611, "step": 81875 }, { "epoch": 0.91, "learning_rate": 3.488942037972067e-05, "loss": 0.6976, "step": 81880 }, { "epoch": 0.91, "learning_rate": 3.488849765258216e-05, "loss": 0.735, "step": 81885 }, { "epoch": 0.91, "learning_rate": 3.488757492544365e-05, "loss": 0.7249, "step": 81890 }, { "epoch": 0.91, "learning_rate": 3.488665219830514e-05, "loss": 0.7137, "step": 81895 }, { "epoch": 0.91, "learning_rate": 3.488572947116662e-05, "loss": 0.7151, "step": 81900 }, { "epoch": 0.91, "learning_rate": 3.488480674402811e-05, "loss": 0.6352, "step": 81905 }, { "epoch": 0.91, "learning_rate": 3.48838840168896e-05, "loss": 0.68, "step": 81910 }, { "epoch": 0.91, "learning_rate": 3.488296128975109e-05, "loss": 0.645, "step": 81915 }, { "epoch": 0.91, "learning_rate": 3.488203856261257e-05, "loss": 0.7271, "step": 81920 }, { "epoch": 0.91, "learning_rate": 3.488111583547406e-05, "loss": 0.7021, "step": 81925 }, { "epoch": 0.91, "learning_rate": 3.4880193108335554e-05, "loss": 0.6402, "step": 81930 }, { "epoch": 0.91, "learning_rate": 3.4879270381197035e-05, "loss": 0.7141, "step": 81935 }, { "epoch": 0.91, "learning_rate": 3.487834765405852e-05, "loss": 0.6743, "step": 81940 }, { "epoch": 0.91, "learning_rate": 3.487742492692001e-05, "loss": 0.6985, "step": 81945 }, { "epoch": 0.91, "learning_rate": 3.4876502199781505e-05, "loss": 0.688, "step": 81950 }, { "epoch": 0.91, "learning_rate": 3.4875579472642986e-05, "loss": 0.6811, "step": 81955 }, { "epoch": 0.91, "learning_rate": 3.4874656745504474e-05, "loss": 0.6836, "step": 81960 }, { "epoch": 0.91, "learning_rate": 3.487373401836596e-05, "loss": 0.6714, "step": 81965 }, { "epoch": 0.91, "learning_rate": 3.487281129122745e-05, "loss": 0.6592, "step": 81970 }, { "epoch": 0.91, "learning_rate": 3.487188856408894e-05, "loss": 0.6835, "step": 81975 }, { "epoch": 0.91, "learning_rate": 3.4870965836950426e-05, "loss": 0.7067, "step": 81980 }, { "epoch": 0.91, "learning_rate": 3.4870043109811914e-05, "loss": 0.6826, "step": 81985 }, { "epoch": 0.91, "learning_rate": 3.48691203826734e-05, "loss": 0.6697, "step": 81990 }, { "epoch": 0.91, "learning_rate": 3.486819765553489e-05, "loss": 0.6486, "step": 81995 }, { "epoch": 0.91, "learning_rate": 3.486727492839638e-05, "loss": 0.7205, "step": 82000 }, { "epoch": 0.91, "eval_loss": 0.6712340116500854, "eval_runtime": 115.1725, "eval_samples_per_second": 17.365, "eval_steps_per_second": 8.683, "step": 82000 }, { "epoch": 0.91, "learning_rate": 3.4866352201257865e-05, "loss": 0.6906, "step": 82005 }, { "epoch": 0.91, "learning_rate": 3.4865429474119346e-05, "loss": 0.6493, "step": 82010 }, { "epoch": 0.91, "learning_rate": 3.486450674698084e-05, "loss": 0.665, "step": 82015 }, { "epoch": 0.91, "learning_rate": 3.486358401984233e-05, "loss": 0.6992, "step": 82020 }, { "epoch": 0.91, "learning_rate": 3.4862661292703816e-05, "loss": 0.6664, "step": 82025 }, { "epoch": 0.91, "learning_rate": 3.48617385655653e-05, "loss": 0.6754, "step": 82030 }, { "epoch": 0.91, "learning_rate": 3.486081583842679e-05, "loss": 0.7328, "step": 82035 }, { "epoch": 0.91, "learning_rate": 3.485989311128828e-05, "loss": 0.6597, "step": 82040 }, { "epoch": 0.91, "learning_rate": 3.485897038414976e-05, "loss": 0.715, "step": 82045 }, { "epoch": 0.91, "learning_rate": 3.485804765701125e-05, "loss": 0.7257, "step": 82050 }, { "epoch": 0.91, "learning_rate": 3.485712492987274e-05, "loss": 0.6936, "step": 82055 }, { "epoch": 0.91, "learning_rate": 3.485620220273423e-05, "loss": 0.7112, "step": 82060 }, { "epoch": 0.91, "learning_rate": 3.485527947559571e-05, "loss": 0.7321, "step": 82065 }, { "epoch": 0.91, "learning_rate": 3.48543567484572e-05, "loss": 0.6516, "step": 82070 }, { "epoch": 0.91, "learning_rate": 3.485343402131869e-05, "loss": 0.6442, "step": 82075 }, { "epoch": 0.91, "learning_rate": 3.4852511294180176e-05, "loss": 0.6806, "step": 82080 }, { "epoch": 0.91, "learning_rate": 3.4851588567041664e-05, "loss": 0.7278, "step": 82085 }, { "epoch": 0.91, "learning_rate": 3.485066583990315e-05, "loss": 0.6494, "step": 82090 }, { "epoch": 0.91, "learning_rate": 3.484974311276464e-05, "loss": 0.7255, "step": 82095 }, { "epoch": 0.91, "learning_rate": 3.484882038562613e-05, "loss": 0.7122, "step": 82100 }, { "epoch": 0.91, "learning_rate": 3.4847897658487615e-05, "loss": 0.6395, "step": 82105 }, { "epoch": 0.91, "learning_rate": 3.48469749313491e-05, "loss": 0.6707, "step": 82110 }, { "epoch": 0.91, "learning_rate": 3.484605220421059e-05, "loss": 0.7248, "step": 82115 }, { "epoch": 0.91, "learning_rate": 3.484512947707208e-05, "loss": 0.694, "step": 82120 }, { "epoch": 0.91, "learning_rate": 3.4844206749933567e-05, "loss": 0.6679, "step": 82125 }, { "epoch": 0.91, "learning_rate": 3.4843284022795054e-05, "loss": 0.6576, "step": 82130 }, { "epoch": 0.91, "learning_rate": 3.484236129565654e-05, "loss": 0.6842, "step": 82135 }, { "epoch": 0.91, "learning_rate": 3.484143856851802e-05, "loss": 0.6638, "step": 82140 }, { "epoch": 0.91, "learning_rate": 3.484051584137952e-05, "loss": 0.748, "step": 82145 }, { "epoch": 0.91, "learning_rate": 3.4839593114241006e-05, "loss": 0.7549, "step": 82150 }, { "epoch": 0.91, "learning_rate": 3.483867038710249e-05, "loss": 0.6879, "step": 82155 }, { "epoch": 0.91, "learning_rate": 3.4837747659963975e-05, "loss": 0.7136, "step": 82160 }, { "epoch": 0.91, "learning_rate": 3.483682493282547e-05, "loss": 0.6952, "step": 82165 }, { "epoch": 0.91, "learning_rate": 3.483590220568696e-05, "loss": 0.6877, "step": 82170 }, { "epoch": 0.91, "learning_rate": 3.483497947854844e-05, "loss": 0.7063, "step": 82175 }, { "epoch": 0.91, "learning_rate": 3.4834056751409926e-05, "loss": 0.694, "step": 82180 }, { "epoch": 0.91, "learning_rate": 3.483313402427142e-05, "loss": 0.7449, "step": 82185 }, { "epoch": 0.91, "learning_rate": 3.48322112971329e-05, "loss": 0.6662, "step": 82190 }, { "epoch": 0.91, "learning_rate": 3.483128856999439e-05, "loss": 0.6542, "step": 82195 }, { "epoch": 0.91, "learning_rate": 3.483036584285588e-05, "loss": 0.6767, "step": 82200 }, { "epoch": 0.91, "learning_rate": 3.4829443115717365e-05, "loss": 0.6663, "step": 82205 }, { "epoch": 0.91, "learning_rate": 3.482852038857885e-05, "loss": 0.667, "step": 82210 }, { "epoch": 0.91, "learning_rate": 3.482759766144034e-05, "loss": 0.7385, "step": 82215 }, { "epoch": 0.91, "learning_rate": 3.482667493430183e-05, "loss": 0.6936, "step": 82220 }, { "epoch": 0.91, "learning_rate": 3.482575220716332e-05, "loss": 0.7098, "step": 82225 }, { "epoch": 0.91, "learning_rate": 3.4824829480024805e-05, "loss": 0.7178, "step": 82230 }, { "epoch": 0.91, "learning_rate": 3.482390675288629e-05, "loss": 0.6483, "step": 82235 }, { "epoch": 0.91, "learning_rate": 3.482298402574778e-05, "loss": 0.7141, "step": 82240 }, { "epoch": 0.91, "learning_rate": 3.482206129860927e-05, "loss": 0.6642, "step": 82245 }, { "epoch": 0.91, "learning_rate": 3.4821138571470756e-05, "loss": 0.7119, "step": 82250 }, { "epoch": 0.91, "learning_rate": 3.4820215844332244e-05, "loss": 0.7404, "step": 82255 }, { "epoch": 0.91, "learning_rate": 3.481929311719373e-05, "loss": 0.6628, "step": 82260 }, { "epoch": 0.91, "learning_rate": 3.481837039005521e-05, "loss": 0.6942, "step": 82265 }, { "epoch": 0.91, "learning_rate": 3.481744766291671e-05, "loss": 0.6134, "step": 82270 }, { "epoch": 0.91, "learning_rate": 3.4816524935778195e-05, "loss": 0.6714, "step": 82275 }, { "epoch": 0.91, "learning_rate": 3.481560220863968e-05, "loss": 0.6314, "step": 82280 }, { "epoch": 0.91, "learning_rate": 3.4814679481501164e-05, "loss": 0.6925, "step": 82285 }, { "epoch": 0.91, "learning_rate": 3.481375675436265e-05, "loss": 0.7395, "step": 82290 }, { "epoch": 0.91, "learning_rate": 3.481283402722415e-05, "loss": 0.6663, "step": 82295 }, { "epoch": 0.91, "learning_rate": 3.4811911300085634e-05, "loss": 0.7036, "step": 82300 }, { "epoch": 0.91, "learning_rate": 3.4810988572947116e-05, "loss": 0.6855, "step": 82305 }, { "epoch": 0.91, "learning_rate": 3.4810065845808603e-05, "loss": 0.6746, "step": 82310 }, { "epoch": 0.91, "learning_rate": 3.48091431186701e-05, "loss": 0.7355, "step": 82315 }, { "epoch": 0.91, "learning_rate": 3.480822039153158e-05, "loss": 0.6604, "step": 82320 }, { "epoch": 0.91, "learning_rate": 3.480729766439307e-05, "loss": 0.7603, "step": 82325 }, { "epoch": 0.91, "learning_rate": 3.4806374937254555e-05, "loss": 0.6873, "step": 82330 }, { "epoch": 0.91, "learning_rate": 3.480545221011605e-05, "loss": 0.7027, "step": 82335 }, { "epoch": 0.91, "learning_rate": 3.480452948297753e-05, "loss": 0.6903, "step": 82340 }, { "epoch": 0.91, "learning_rate": 3.480360675583902e-05, "loss": 0.6617, "step": 82345 }, { "epoch": 0.91, "learning_rate": 3.4802684028700506e-05, "loss": 0.6711, "step": 82350 }, { "epoch": 0.91, "learning_rate": 3.4801761301561994e-05, "loss": 0.653, "step": 82355 }, { "epoch": 0.91, "learning_rate": 3.480083857442348e-05, "loss": 0.7193, "step": 82360 }, { "epoch": 0.91, "learning_rate": 3.479991584728497e-05, "loss": 0.707, "step": 82365 }, { "epoch": 0.91, "learning_rate": 3.479899312014646e-05, "loss": 0.7231, "step": 82370 }, { "epoch": 0.91, "learning_rate": 3.4798070393007945e-05, "loss": 0.6476, "step": 82375 }, { "epoch": 0.91, "learning_rate": 3.479714766586943e-05, "loss": 0.7868, "step": 82380 }, { "epoch": 0.91, "learning_rate": 3.479622493873092e-05, "loss": 0.7202, "step": 82385 }, { "epoch": 0.91, "learning_rate": 3.479530221159241e-05, "loss": 0.7767, "step": 82390 }, { "epoch": 0.91, "learning_rate": 3.479437948445389e-05, "loss": 0.6488, "step": 82395 }, { "epoch": 0.91, "learning_rate": 3.4793456757315385e-05, "loss": 0.7201, "step": 82400 }, { "epoch": 0.91, "learning_rate": 3.479253403017687e-05, "loss": 0.7014, "step": 82405 }, { "epoch": 0.91, "learning_rate": 3.479161130303836e-05, "loss": 0.7177, "step": 82410 }, { "epoch": 0.91, "learning_rate": 3.479068857589984e-05, "loss": 0.7083, "step": 82415 }, { "epoch": 0.91, "learning_rate": 3.4789765848761336e-05, "loss": 0.6901, "step": 82420 }, { "epoch": 0.91, "learning_rate": 3.4788843121622824e-05, "loss": 0.7028, "step": 82425 }, { "epoch": 0.91, "learning_rate": 3.4787920394484305e-05, "loss": 0.7018, "step": 82430 }, { "epoch": 0.91, "learning_rate": 3.478699766734579e-05, "loss": 0.7131, "step": 82435 }, { "epoch": 0.91, "learning_rate": 3.478607494020728e-05, "loss": 0.7513, "step": 82440 }, { "epoch": 0.91, "learning_rate": 3.4785152213068775e-05, "loss": 0.7253, "step": 82445 }, { "epoch": 0.91, "learning_rate": 3.4784229485930256e-05, "loss": 0.6951, "step": 82450 }, { "epoch": 0.91, "learning_rate": 3.4783306758791744e-05, "loss": 0.6829, "step": 82455 }, { "epoch": 0.91, "learning_rate": 3.478238403165323e-05, "loss": 0.7134, "step": 82460 }, { "epoch": 0.91, "learning_rate": 3.478146130451472e-05, "loss": 0.6648, "step": 82465 }, { "epoch": 0.91, "learning_rate": 3.478053857737621e-05, "loss": 0.6286, "step": 82470 }, { "epoch": 0.91, "learning_rate": 3.4779615850237696e-05, "loss": 0.7076, "step": 82475 }, { "epoch": 0.91, "learning_rate": 3.4778693123099183e-05, "loss": 0.6616, "step": 82480 }, { "epoch": 0.91, "learning_rate": 3.477777039596067e-05, "loss": 0.6929, "step": 82485 }, { "epoch": 0.91, "learning_rate": 3.477684766882216e-05, "loss": 0.6586, "step": 82490 }, { "epoch": 0.91, "learning_rate": 3.477592494168365e-05, "loss": 0.6946, "step": 82495 }, { "epoch": 0.91, "learning_rate": 3.4775002214545135e-05, "loss": 0.6993, "step": 82500 }, { "epoch": 0.91, "learning_rate": 3.4774079487406616e-05, "loss": 0.7132, "step": 82505 }, { "epoch": 0.91, "learning_rate": 3.477315676026811e-05, "loss": 0.6959, "step": 82510 }, { "epoch": 0.91, "learning_rate": 3.47722340331296e-05, "loss": 0.6743, "step": 82515 }, { "epoch": 0.91, "learning_rate": 3.4771311305991086e-05, "loss": 0.6583, "step": 82520 }, { "epoch": 0.91, "learning_rate": 3.477038857885257e-05, "loss": 0.6753, "step": 82525 }, { "epoch": 0.91, "learning_rate": 3.476946585171406e-05, "loss": 0.6816, "step": 82530 }, { "epoch": 0.91, "learning_rate": 3.476854312457555e-05, "loss": 0.7065, "step": 82535 }, { "epoch": 0.91, "learning_rate": 3.476762039743703e-05, "loss": 0.7472, "step": 82540 }, { "epoch": 0.91, "learning_rate": 3.476669767029852e-05, "loss": 0.6747, "step": 82545 }, { "epoch": 0.91, "learning_rate": 3.476577494316001e-05, "loss": 0.7165, "step": 82550 }, { "epoch": 0.91, "learning_rate": 3.47648522160215e-05, "loss": 0.7353, "step": 82555 }, { "epoch": 0.91, "learning_rate": 3.476392948888298e-05, "loss": 0.7242, "step": 82560 }, { "epoch": 0.91, "learning_rate": 3.476300676174447e-05, "loss": 0.7706, "step": 82565 }, { "epoch": 0.91, "learning_rate": 3.4762084034605965e-05, "loss": 0.6463, "step": 82570 }, { "epoch": 0.91, "learning_rate": 3.4761161307467446e-05, "loss": 0.6993, "step": 82575 }, { "epoch": 0.91, "learning_rate": 3.4760238580328934e-05, "loss": 0.6828, "step": 82580 }, { "epoch": 0.91, "learning_rate": 3.475931585319042e-05, "loss": 0.6701, "step": 82585 }, { "epoch": 0.91, "learning_rate": 3.475839312605191e-05, "loss": 0.6845, "step": 82590 }, { "epoch": 0.91, "learning_rate": 3.47574703989134e-05, "loss": 0.6678, "step": 82595 }, { "epoch": 0.91, "learning_rate": 3.4756547671774885e-05, "loss": 0.7019, "step": 82600 }, { "epoch": 0.91, "learning_rate": 3.475562494463637e-05, "loss": 0.7248, "step": 82605 }, { "epoch": 0.91, "learning_rate": 3.475470221749786e-05, "loss": 0.716, "step": 82610 }, { "epoch": 0.91, "learning_rate": 3.475377949035935e-05, "loss": 0.6698, "step": 82615 }, { "epoch": 0.91, "learning_rate": 3.4752856763220836e-05, "loss": 0.6697, "step": 82620 }, { "epoch": 0.91, "learning_rate": 3.4751934036082324e-05, "loss": 0.6947, "step": 82625 }, { "epoch": 0.91, "learning_rate": 3.475101130894381e-05, "loss": 0.7132, "step": 82630 }, { "epoch": 0.91, "learning_rate": 3.47500885818053e-05, "loss": 0.6558, "step": 82635 }, { "epoch": 0.92, "learning_rate": 3.474916585466679e-05, "loss": 0.7672, "step": 82640 }, { "epoch": 0.92, "learning_rate": 3.4748243127528276e-05, "loss": 0.67, "step": 82645 }, { "epoch": 0.92, "learning_rate": 3.474732040038976e-05, "loss": 0.7075, "step": 82650 }, { "epoch": 0.92, "learning_rate": 3.474639767325125e-05, "loss": 0.6912, "step": 82655 }, { "epoch": 0.92, "learning_rate": 3.474547494611274e-05, "loss": 0.656, "step": 82660 }, { "epoch": 0.92, "learning_rate": 3.474455221897423e-05, "loss": 0.7196, "step": 82665 }, { "epoch": 0.92, "learning_rate": 3.474362949183571e-05, "loss": 0.7304, "step": 82670 }, { "epoch": 0.92, "learning_rate": 3.4742706764697196e-05, "loss": 0.7141, "step": 82675 }, { "epoch": 0.92, "learning_rate": 3.474178403755869e-05, "loss": 0.6966, "step": 82680 }, { "epoch": 0.92, "learning_rate": 3.474086131042018e-05, "loss": 0.7351, "step": 82685 }, { "epoch": 0.92, "learning_rate": 3.473993858328166e-05, "loss": 0.6615, "step": 82690 }, { "epoch": 0.92, "learning_rate": 3.473901585614315e-05, "loss": 0.6119, "step": 82695 }, { "epoch": 0.92, "learning_rate": 3.473809312900464e-05, "loss": 0.7002, "step": 82700 }, { "epoch": 0.92, "learning_rate": 3.473717040186612e-05, "loss": 0.7524, "step": 82705 }, { "epoch": 0.92, "learning_rate": 3.473624767472761e-05, "loss": 0.6787, "step": 82710 }, { "epoch": 0.92, "learning_rate": 3.47353249475891e-05, "loss": 0.7026, "step": 82715 }, { "epoch": 0.92, "learning_rate": 3.4734402220450593e-05, "loss": 0.6599, "step": 82720 }, { "epoch": 0.92, "learning_rate": 3.4733479493312075e-05, "loss": 0.7288, "step": 82725 }, { "epoch": 0.92, "learning_rate": 3.473255676617356e-05, "loss": 0.6888, "step": 82730 }, { "epoch": 0.92, "learning_rate": 3.473163403903505e-05, "loss": 0.6363, "step": 82735 }, { "epoch": 0.92, "learning_rate": 3.473071131189654e-05, "loss": 0.6487, "step": 82740 }, { "epoch": 0.92, "learning_rate": 3.4729788584758026e-05, "loss": 0.6844, "step": 82745 }, { "epoch": 0.92, "learning_rate": 3.4728865857619514e-05, "loss": 0.6597, "step": 82750 }, { "epoch": 0.92, "learning_rate": 3.4727943130481e-05, "loss": 0.6504, "step": 82755 }, { "epoch": 0.92, "learning_rate": 3.472702040334249e-05, "loss": 0.7003, "step": 82760 }, { "epoch": 0.92, "learning_rate": 3.472609767620398e-05, "loss": 0.6823, "step": 82765 }, { "epoch": 0.92, "learning_rate": 3.4725174949065465e-05, "loss": 0.6946, "step": 82770 }, { "epoch": 0.92, "learning_rate": 3.472425222192695e-05, "loss": 0.7434, "step": 82775 }, { "epoch": 0.92, "learning_rate": 3.4723329494788434e-05, "loss": 0.7157, "step": 82780 }, { "epoch": 0.92, "learning_rate": 3.472240676764993e-05, "loss": 0.6776, "step": 82785 }, { "epoch": 0.92, "learning_rate": 3.4721484040511417e-05, "loss": 0.7353, "step": 82790 }, { "epoch": 0.92, "learning_rate": 3.4720561313372904e-05, "loss": 0.6969, "step": 82795 }, { "epoch": 0.92, "learning_rate": 3.4719638586234385e-05, "loss": 0.6541, "step": 82800 }, { "epoch": 0.92, "learning_rate": 3.471871585909588e-05, "loss": 0.7226, "step": 82805 }, { "epoch": 0.92, "learning_rate": 3.471779313195737e-05, "loss": 0.6693, "step": 82810 }, { "epoch": 0.92, "learning_rate": 3.471687040481885e-05, "loss": 0.6936, "step": 82815 }, { "epoch": 0.92, "learning_rate": 3.471594767768034e-05, "loss": 0.6913, "step": 82820 }, { "epoch": 0.92, "learning_rate": 3.4715024950541825e-05, "loss": 0.7075, "step": 82825 }, { "epoch": 0.92, "learning_rate": 3.471410222340332e-05, "loss": 0.6918, "step": 82830 }, { "epoch": 0.92, "learning_rate": 3.47131794962648e-05, "loss": 0.7284, "step": 82835 }, { "epoch": 0.92, "learning_rate": 3.471225676912629e-05, "loss": 0.6925, "step": 82840 }, { "epoch": 0.92, "learning_rate": 3.4711334041987776e-05, "loss": 0.718, "step": 82845 }, { "epoch": 0.92, "learning_rate": 3.4710411314849264e-05, "loss": 0.6873, "step": 82850 }, { "epoch": 0.92, "learning_rate": 3.470948858771075e-05, "loss": 0.7255, "step": 82855 }, { "epoch": 0.92, "learning_rate": 3.470856586057224e-05, "loss": 0.7154, "step": 82860 }, { "epoch": 0.92, "learning_rate": 3.470764313343373e-05, "loss": 0.7613, "step": 82865 }, { "epoch": 0.92, "learning_rate": 3.4706720406295215e-05, "loss": 0.7765, "step": 82870 }, { "epoch": 0.92, "learning_rate": 3.47057976791567e-05, "loss": 0.679, "step": 82875 }, { "epoch": 0.92, "learning_rate": 3.470487495201819e-05, "loss": 0.7412, "step": 82880 }, { "epoch": 0.92, "learning_rate": 3.470395222487968e-05, "loss": 0.716, "step": 82885 }, { "epoch": 0.92, "learning_rate": 3.470302949774116e-05, "loss": 0.6767, "step": 82890 }, { "epoch": 0.92, "learning_rate": 3.4702106770602655e-05, "loss": 0.6856, "step": 82895 }, { "epoch": 0.92, "learning_rate": 3.470118404346414e-05, "loss": 0.6682, "step": 82900 }, { "epoch": 0.92, "learning_rate": 3.470026131632563e-05, "loss": 0.7463, "step": 82905 }, { "epoch": 0.92, "learning_rate": 3.469933858918711e-05, "loss": 0.6618, "step": 82910 }, { "epoch": 0.92, "learning_rate": 3.4698415862048606e-05, "loss": 0.6791, "step": 82915 }, { "epoch": 0.92, "learning_rate": 3.4697493134910094e-05, "loss": 0.7242, "step": 82920 }, { "epoch": 0.92, "learning_rate": 3.4696570407771575e-05, "loss": 0.6962, "step": 82925 }, { "epoch": 0.92, "learning_rate": 3.469564768063306e-05, "loss": 0.6732, "step": 82930 }, { "epoch": 0.92, "learning_rate": 3.469472495349456e-05, "loss": 0.6622, "step": 82935 }, { "epoch": 0.92, "learning_rate": 3.4693802226356045e-05, "loss": 0.7583, "step": 82940 }, { "epoch": 0.92, "learning_rate": 3.4692879499217526e-05, "loss": 0.7013, "step": 82945 }, { "epoch": 0.92, "learning_rate": 3.4691956772079014e-05, "loss": 0.6755, "step": 82950 }, { "epoch": 0.92, "learning_rate": 3.469103404494051e-05, "loss": 0.6466, "step": 82955 }, { "epoch": 0.92, "learning_rate": 3.469011131780199e-05, "loss": 0.7409, "step": 82960 }, { "epoch": 0.92, "learning_rate": 3.468918859066348e-05, "loss": 0.6903, "step": 82965 }, { "epoch": 0.92, "learning_rate": 3.4688265863524966e-05, "loss": 0.713, "step": 82970 }, { "epoch": 0.92, "learning_rate": 3.468734313638645e-05, "loss": 0.7195, "step": 82975 }, { "epoch": 0.92, "learning_rate": 3.468642040924794e-05, "loss": 0.6883, "step": 82980 }, { "epoch": 0.92, "learning_rate": 3.468549768210943e-05, "loss": 0.6608, "step": 82985 }, { "epoch": 0.92, "learning_rate": 3.468457495497092e-05, "loss": 0.7059, "step": 82990 }, { "epoch": 0.92, "learning_rate": 3.4683652227832405e-05, "loss": 0.7591, "step": 82995 }, { "epoch": 0.92, "learning_rate": 3.468272950069389e-05, "loss": 0.6901, "step": 83000 }, { "epoch": 0.92, "eval_loss": 0.6699089407920837, "eval_runtime": 69.334, "eval_samples_per_second": 28.846, "eval_steps_per_second": 14.423, "step": 83000 }, { "epoch": 0.92, "learning_rate": 3.468180677355538e-05, "loss": 0.6198, "step": 83005 }, { "epoch": 0.92, "learning_rate": 3.468088404641687e-05, "loss": 0.6807, "step": 83010 }, { "epoch": 0.92, "learning_rate": 3.4679961319278356e-05, "loss": 0.6891, "step": 83015 }, { "epoch": 0.92, "learning_rate": 3.4679038592139844e-05, "loss": 0.7377, "step": 83020 }, { "epoch": 0.92, "learning_rate": 3.467811586500133e-05, "loss": 0.63, "step": 83025 }, { "epoch": 0.92, "learning_rate": 3.467719313786282e-05, "loss": 0.7528, "step": 83030 }, { "epoch": 0.92, "learning_rate": 3.46762704107243e-05, "loss": 0.6962, "step": 83035 }, { "epoch": 0.92, "learning_rate": 3.467534768358579e-05, "loss": 0.6971, "step": 83040 }, { "epoch": 0.92, "learning_rate": 3.467442495644728e-05, "loss": 0.6524, "step": 83045 }, { "epoch": 0.92, "learning_rate": 3.467350222930877e-05, "loss": 0.7258, "step": 83050 }, { "epoch": 0.92, "learning_rate": 3.467257950217025e-05, "loss": 0.7329, "step": 83055 }, { "epoch": 0.92, "learning_rate": 3.467165677503174e-05, "loss": 0.6858, "step": 83060 }, { "epoch": 0.92, "learning_rate": 3.4670734047893235e-05, "loss": 0.7058, "step": 83065 }, { "epoch": 0.92, "learning_rate": 3.466981132075472e-05, "loss": 0.6773, "step": 83070 }, { "epoch": 0.92, "learning_rate": 3.4668888593616204e-05, "loss": 0.7096, "step": 83075 }, { "epoch": 0.92, "learning_rate": 3.466796586647769e-05, "loss": 0.7266, "step": 83080 }, { "epoch": 0.92, "learning_rate": 3.4667043139339186e-05, "loss": 0.6675, "step": 83085 }, { "epoch": 0.92, "learning_rate": 3.466612041220067e-05, "loss": 0.7295, "step": 83090 }, { "epoch": 0.92, "learning_rate": 3.4665197685062155e-05, "loss": 0.6331, "step": 83095 }, { "epoch": 0.92, "learning_rate": 3.466427495792364e-05, "loss": 0.7289, "step": 83100 }, { "epoch": 0.92, "learning_rate": 3.466335223078514e-05, "loss": 0.7011, "step": 83105 }, { "epoch": 0.92, "learning_rate": 3.466242950364662e-05, "loss": 0.6822, "step": 83110 }, { "epoch": 0.92, "learning_rate": 3.4661506776508106e-05, "loss": 0.6727, "step": 83115 }, { "epoch": 0.92, "learning_rate": 3.4660584049369594e-05, "loss": 0.6512, "step": 83120 }, { "epoch": 0.92, "learning_rate": 3.465966132223108e-05, "loss": 0.6587, "step": 83125 }, { "epoch": 0.92, "learning_rate": 3.465873859509257e-05, "loss": 0.6782, "step": 83130 }, { "epoch": 0.92, "learning_rate": 3.465781586795406e-05, "loss": 0.7556, "step": 83135 }, { "epoch": 0.92, "learning_rate": 3.4656893140815546e-05, "loss": 0.7655, "step": 83140 }, { "epoch": 0.92, "learning_rate": 3.4655970413677033e-05, "loss": 0.7343, "step": 83145 }, { "epoch": 0.92, "learning_rate": 3.465504768653852e-05, "loss": 0.6442, "step": 83150 }, { "epoch": 0.92, "learning_rate": 3.465412495940001e-05, "loss": 0.6694, "step": 83155 }, { "epoch": 0.92, "learning_rate": 3.46532022322615e-05, "loss": 0.6907, "step": 83160 }, { "epoch": 0.92, "learning_rate": 3.465227950512298e-05, "loss": 0.7719, "step": 83165 }, { "epoch": 0.92, "learning_rate": 3.465135677798447e-05, "loss": 0.6872, "step": 83170 }, { "epoch": 0.92, "learning_rate": 3.465043405084596e-05, "loss": 0.6784, "step": 83175 }, { "epoch": 0.92, "learning_rate": 3.464951132370745e-05, "loss": 0.6627, "step": 83180 }, { "epoch": 0.92, "learning_rate": 3.464858859656893e-05, "loss": 0.698, "step": 83185 }, { "epoch": 0.92, "learning_rate": 3.464766586943042e-05, "loss": 0.7153, "step": 83190 }, { "epoch": 0.92, "learning_rate": 3.464674314229191e-05, "loss": 0.6949, "step": 83195 }, { "epoch": 0.92, "learning_rate": 3.464582041515339e-05, "loss": 0.6986, "step": 83200 }, { "epoch": 0.92, "learning_rate": 3.464489768801488e-05, "loss": 0.7437, "step": 83205 }, { "epoch": 0.92, "learning_rate": 3.464397496087637e-05, "loss": 0.699, "step": 83210 }, { "epoch": 0.92, "learning_rate": 3.464305223373786e-05, "loss": 0.6658, "step": 83215 }, { "epoch": 0.92, "learning_rate": 3.4642129506599344e-05, "loss": 0.7145, "step": 83220 }, { "epoch": 0.92, "learning_rate": 3.464120677946083e-05, "loss": 0.6704, "step": 83225 }, { "epoch": 0.92, "learning_rate": 3.464028405232232e-05, "loss": 0.6917, "step": 83230 }, { "epoch": 0.92, "learning_rate": 3.463936132518381e-05, "loss": 0.6684, "step": 83235 }, { "epoch": 0.92, "learning_rate": 3.4638438598045296e-05, "loss": 0.7158, "step": 83240 }, { "epoch": 0.92, "learning_rate": 3.4637515870906784e-05, "loss": 0.7354, "step": 83245 }, { "epoch": 0.92, "learning_rate": 3.463659314376827e-05, "loss": 0.6707, "step": 83250 }, { "epoch": 0.92, "learning_rate": 3.463567041662976e-05, "loss": 0.7013, "step": 83255 }, { "epoch": 0.92, "learning_rate": 3.463474768949125e-05, "loss": 0.6776, "step": 83260 }, { "epoch": 0.92, "learning_rate": 3.4633824962352735e-05, "loss": 0.7063, "step": 83265 }, { "epoch": 0.92, "learning_rate": 3.463290223521422e-05, "loss": 0.7204, "step": 83270 }, { "epoch": 0.92, "learning_rate": 3.4631979508075704e-05, "loss": 0.6769, "step": 83275 }, { "epoch": 0.92, "learning_rate": 3.46310567809372e-05, "loss": 0.6508, "step": 83280 }, { "epoch": 0.92, "learning_rate": 3.4630134053798686e-05, "loss": 0.7327, "step": 83285 }, { "epoch": 0.92, "learning_rate": 3.4629211326660174e-05, "loss": 0.7056, "step": 83290 }, { "epoch": 0.92, "learning_rate": 3.4628288599521655e-05, "loss": 0.6982, "step": 83295 }, { "epoch": 0.92, "learning_rate": 3.462736587238315e-05, "loss": 0.7038, "step": 83300 }, { "epoch": 0.92, "learning_rate": 3.462644314524464e-05, "loss": 0.6666, "step": 83305 }, { "epoch": 0.92, "learning_rate": 3.462552041810612e-05, "loss": 0.6704, "step": 83310 }, { "epoch": 0.92, "learning_rate": 3.462459769096761e-05, "loss": 0.7156, "step": 83315 }, { "epoch": 0.92, "learning_rate": 3.46236749638291e-05, "loss": 0.6862, "step": 83320 }, { "epoch": 0.92, "learning_rate": 3.462275223669059e-05, "loss": 0.6559, "step": 83325 }, { "epoch": 0.92, "learning_rate": 3.462182950955207e-05, "loss": 0.7517, "step": 83330 }, { "epoch": 0.92, "learning_rate": 3.462090678241356e-05, "loss": 0.6627, "step": 83335 }, { "epoch": 0.92, "learning_rate": 3.4619984055275046e-05, "loss": 0.7616, "step": 83340 }, { "epoch": 0.92, "learning_rate": 3.4619061328136534e-05, "loss": 0.7054, "step": 83345 }, { "epoch": 0.92, "learning_rate": 3.461813860099802e-05, "loss": 0.7225, "step": 83350 }, { "epoch": 0.92, "learning_rate": 3.461721587385951e-05, "loss": 0.6948, "step": 83355 }, { "epoch": 0.92, "learning_rate": 3.4616293146721e-05, "loss": 0.8039, "step": 83360 }, { "epoch": 0.92, "learning_rate": 3.4615370419582485e-05, "loss": 0.6841, "step": 83365 }, { "epoch": 0.92, "learning_rate": 3.461444769244397e-05, "loss": 0.6834, "step": 83370 }, { "epoch": 0.92, "learning_rate": 3.461352496530546e-05, "loss": 0.812, "step": 83375 }, { "epoch": 0.92, "learning_rate": 3.461260223816695e-05, "loss": 0.6858, "step": 83380 }, { "epoch": 0.92, "learning_rate": 3.461167951102844e-05, "loss": 0.7498, "step": 83385 }, { "epoch": 0.92, "learning_rate": 3.4610756783889924e-05, "loss": 0.7291, "step": 83390 }, { "epoch": 0.92, "learning_rate": 3.460983405675141e-05, "loss": 0.6601, "step": 83395 }, { "epoch": 0.92, "learning_rate": 3.46089113296129e-05, "loss": 0.6559, "step": 83400 }, { "epoch": 0.92, "learning_rate": 3.460798860247439e-05, "loss": 0.7653, "step": 83405 }, { "epoch": 0.92, "learning_rate": 3.4607065875335876e-05, "loss": 0.7355, "step": 83410 }, { "epoch": 0.92, "learning_rate": 3.4606143148197364e-05, "loss": 0.7121, "step": 83415 }, { "epoch": 0.92, "learning_rate": 3.460522042105885e-05, "loss": 0.6349, "step": 83420 }, { "epoch": 0.92, "learning_rate": 3.460429769392033e-05, "loss": 0.6755, "step": 83425 }, { "epoch": 0.92, "learning_rate": 3.460337496678183e-05, "loss": 0.6978, "step": 83430 }, { "epoch": 0.92, "learning_rate": 3.4602452239643315e-05, "loss": 0.6948, "step": 83435 }, { "epoch": 0.92, "learning_rate": 3.4601529512504796e-05, "loss": 0.668, "step": 83440 }, { "epoch": 0.92, "learning_rate": 3.4600606785366284e-05, "loss": 0.6655, "step": 83445 }, { "epoch": 0.92, "learning_rate": 3.459968405822778e-05, "loss": 0.7113, "step": 83450 }, { "epoch": 0.92, "learning_rate": 3.4598761331089267e-05, "loss": 0.707, "step": 83455 }, { "epoch": 0.92, "learning_rate": 3.459783860395075e-05, "loss": 0.7449, "step": 83460 }, { "epoch": 0.92, "learning_rate": 3.4596915876812235e-05, "loss": 0.7393, "step": 83465 }, { "epoch": 0.92, "learning_rate": 3.459599314967373e-05, "loss": 0.6935, "step": 83470 }, { "epoch": 0.92, "learning_rate": 3.459507042253521e-05, "loss": 0.7572, "step": 83475 }, { "epoch": 0.92, "learning_rate": 3.45941476953967e-05, "loss": 0.7043, "step": 83480 }, { "epoch": 0.92, "learning_rate": 3.459322496825819e-05, "loss": 0.6443, "step": 83485 }, { "epoch": 0.92, "learning_rate": 3.459230224111968e-05, "loss": 0.682, "step": 83490 }, { "epoch": 0.92, "learning_rate": 3.459137951398116e-05, "loss": 0.6527, "step": 83495 }, { "epoch": 0.92, "learning_rate": 3.459045678684265e-05, "loss": 0.6733, "step": 83500 }, { "epoch": 0.92, "learning_rate": 3.458953405970414e-05, "loss": 0.7275, "step": 83505 }, { "epoch": 0.92, "learning_rate": 3.4588611332565626e-05, "loss": 0.6599, "step": 83510 }, { "epoch": 0.92, "learning_rate": 3.4587688605427114e-05, "loss": 0.6619, "step": 83515 }, { "epoch": 0.92, "learning_rate": 3.45867658782886e-05, "loss": 0.7447, "step": 83520 }, { "epoch": 0.92, "learning_rate": 3.458584315115009e-05, "loss": 0.6586, "step": 83525 }, { "epoch": 0.92, "learning_rate": 3.458492042401158e-05, "loss": 0.6737, "step": 83530 }, { "epoch": 0.92, "learning_rate": 3.4583997696873065e-05, "loss": 0.7253, "step": 83535 }, { "epoch": 0.93, "learning_rate": 3.458307496973455e-05, "loss": 0.6452, "step": 83540 }, { "epoch": 0.93, "learning_rate": 3.458215224259604e-05, "loss": 0.7386, "step": 83545 }, { "epoch": 0.93, "learning_rate": 3.458122951545752e-05, "loss": 0.7132, "step": 83550 }, { "epoch": 0.93, "learning_rate": 3.458030678831902e-05, "loss": 0.673, "step": 83555 }, { "epoch": 0.93, "learning_rate": 3.4579384061180505e-05, "loss": 0.7045, "step": 83560 }, { "epoch": 0.93, "learning_rate": 3.457846133404199e-05, "loss": 0.7083, "step": 83565 }, { "epoch": 0.93, "learning_rate": 3.4577538606903473e-05, "loss": 0.6896, "step": 83570 }, { "epoch": 0.93, "learning_rate": 3.457661587976496e-05, "loss": 0.704, "step": 83575 }, { "epoch": 0.93, "learning_rate": 3.4575693152626456e-05, "loss": 0.6878, "step": 83580 }, { "epoch": 0.93, "learning_rate": 3.457477042548794e-05, "loss": 0.6565, "step": 83585 }, { "epoch": 0.93, "learning_rate": 3.4573847698349425e-05, "loss": 0.6892, "step": 83590 }, { "epoch": 0.93, "learning_rate": 3.457292497121091e-05, "loss": 0.6933, "step": 83595 }, { "epoch": 0.93, "learning_rate": 3.457200224407241e-05, "loss": 0.7033, "step": 83600 }, { "epoch": 0.93, "learning_rate": 3.457107951693389e-05, "loss": 0.6645, "step": 83605 }, { "epoch": 0.93, "learning_rate": 3.4570156789795376e-05, "loss": 0.684, "step": 83610 }, { "epoch": 0.93, "learning_rate": 3.4569234062656864e-05, "loss": 0.703, "step": 83615 }, { "epoch": 0.93, "learning_rate": 3.456831133551835e-05, "loss": 0.6628, "step": 83620 }, { "epoch": 0.93, "learning_rate": 3.456738860837984e-05, "loss": 0.7106, "step": 83625 }, { "epoch": 0.93, "learning_rate": 3.456646588124133e-05, "loss": 0.7155, "step": 83630 }, { "epoch": 0.93, "learning_rate": 3.4565543154102816e-05, "loss": 0.6735, "step": 83635 }, { "epoch": 0.93, "learning_rate": 3.45646204269643e-05, "loss": 0.6419, "step": 83640 }, { "epoch": 0.93, "learning_rate": 3.456369769982579e-05, "loss": 0.7313, "step": 83645 }, { "epoch": 0.93, "learning_rate": 3.456277497268728e-05, "loss": 0.6518, "step": 83650 }, { "epoch": 0.93, "learning_rate": 3.456185224554877e-05, "loss": 0.7356, "step": 83655 }, { "epoch": 0.93, "learning_rate": 3.456092951841025e-05, "loss": 0.7572, "step": 83660 }, { "epoch": 0.93, "learning_rate": 3.456000679127174e-05, "loss": 0.7047, "step": 83665 }, { "epoch": 0.93, "learning_rate": 3.455908406413323e-05, "loss": 0.7153, "step": 83670 }, { "epoch": 0.93, "learning_rate": 3.455816133699472e-05, "loss": 0.6949, "step": 83675 }, { "epoch": 0.93, "learning_rate": 3.45572386098562e-05, "loss": 0.6444, "step": 83680 }, { "epoch": 0.93, "learning_rate": 3.4556315882717694e-05, "loss": 0.6895, "step": 83685 }, { "epoch": 0.93, "learning_rate": 3.455539315557918e-05, "loss": 0.6445, "step": 83690 }, { "epoch": 0.93, "learning_rate": 3.455447042844066e-05, "loss": 0.7199, "step": 83695 }, { "epoch": 0.93, "learning_rate": 3.455354770130215e-05, "loss": 0.6758, "step": 83700 }, { "epoch": 0.93, "learning_rate": 3.4552624974163645e-05, "loss": 0.6487, "step": 83705 }, { "epoch": 0.93, "learning_rate": 3.455170224702513e-05, "loss": 0.7383, "step": 83710 }, { "epoch": 0.93, "learning_rate": 3.4550779519886614e-05, "loss": 0.6593, "step": 83715 }, { "epoch": 0.93, "learning_rate": 3.45498567927481e-05, "loss": 0.7313, "step": 83720 }, { "epoch": 0.93, "learning_rate": 3.454893406560959e-05, "loss": 0.6954, "step": 83725 }, { "epoch": 0.93, "learning_rate": 3.4548011338471085e-05, "loss": 0.7416, "step": 83730 }, { "epoch": 0.93, "learning_rate": 3.4547088611332566e-05, "loss": 0.6844, "step": 83735 }, { "epoch": 0.93, "learning_rate": 3.4546165884194054e-05, "loss": 0.6902, "step": 83740 }, { "epoch": 0.93, "learning_rate": 3.454524315705554e-05, "loss": 0.6761, "step": 83745 }, { "epoch": 0.93, "learning_rate": 3.454432042991703e-05, "loss": 0.6443, "step": 83750 }, { "epoch": 0.93, "learning_rate": 3.454339770277852e-05, "loss": 0.7541, "step": 83755 }, { "epoch": 0.93, "learning_rate": 3.4542474975640005e-05, "loss": 0.6829, "step": 83760 }, { "epoch": 0.93, "learning_rate": 3.454155224850149e-05, "loss": 0.7369, "step": 83765 }, { "epoch": 0.93, "learning_rate": 3.454062952136298e-05, "loss": 0.7512, "step": 83770 }, { "epoch": 0.93, "learning_rate": 3.453970679422447e-05, "loss": 0.6804, "step": 83775 }, { "epoch": 0.93, "learning_rate": 3.4538784067085956e-05, "loss": 0.703, "step": 83780 }, { "epoch": 0.93, "learning_rate": 3.4537861339947444e-05, "loss": 0.6988, "step": 83785 }, { "epoch": 0.93, "learning_rate": 3.453693861280893e-05, "loss": 0.6573, "step": 83790 }, { "epoch": 0.93, "learning_rate": 3.453601588567042e-05, "loss": 0.6779, "step": 83795 }, { "epoch": 0.93, "learning_rate": 3.453509315853191e-05, "loss": 0.708, "step": 83800 }, { "epoch": 0.93, "learning_rate": 3.4534170431393396e-05, "loss": 0.701, "step": 83805 }, { "epoch": 0.93, "learning_rate": 3.453324770425488e-05, "loss": 0.6777, "step": 83810 }, { "epoch": 0.93, "learning_rate": 3.453232497711637e-05, "loss": 0.7248, "step": 83815 }, { "epoch": 0.93, "learning_rate": 3.453140224997786e-05, "loss": 0.7237, "step": 83820 }, { "epoch": 0.93, "learning_rate": 3.453047952283934e-05, "loss": 0.6506, "step": 83825 }, { "epoch": 0.93, "learning_rate": 3.452955679570083e-05, "loss": 0.6761, "step": 83830 }, { "epoch": 0.93, "learning_rate": 3.452863406856232e-05, "loss": 0.6578, "step": 83835 }, { "epoch": 0.93, "learning_rate": 3.452771134142381e-05, "loss": 0.7323, "step": 83840 }, { "epoch": 0.93, "learning_rate": 3.452678861428529e-05, "loss": 0.7244, "step": 83845 }, { "epoch": 0.93, "learning_rate": 3.452586588714678e-05, "loss": 0.707, "step": 83850 }, { "epoch": 0.93, "learning_rate": 3.4524943160008274e-05, "loss": 0.6664, "step": 83855 }, { "epoch": 0.93, "learning_rate": 3.4524020432869755e-05, "loss": 0.6716, "step": 83860 }, { "epoch": 0.93, "learning_rate": 3.452309770573124e-05, "loss": 0.6844, "step": 83865 }, { "epoch": 0.93, "learning_rate": 3.452217497859273e-05, "loss": 0.6788, "step": 83870 }, { "epoch": 0.93, "learning_rate": 3.452125225145422e-05, "loss": 0.7051, "step": 83875 }, { "epoch": 0.93, "learning_rate": 3.4520329524315707e-05, "loss": 0.6789, "step": 83880 }, { "epoch": 0.93, "learning_rate": 3.4519406797177194e-05, "loss": 0.6487, "step": 83885 }, { "epoch": 0.93, "learning_rate": 3.451848407003868e-05, "loss": 0.6467, "step": 83890 }, { "epoch": 0.93, "learning_rate": 3.451756134290017e-05, "loss": 0.7038, "step": 83895 }, { "epoch": 0.93, "learning_rate": 3.451663861576166e-05, "loss": 0.6624, "step": 83900 }, { "epoch": 0.93, "learning_rate": 3.4515715888623146e-05, "loss": 0.6369, "step": 83905 }, { "epoch": 0.93, "learning_rate": 3.4514793161484634e-05, "loss": 0.7401, "step": 83910 }, { "epoch": 0.93, "learning_rate": 3.451387043434612e-05, "loss": 0.6994, "step": 83915 }, { "epoch": 0.93, "learning_rate": 3.451294770720761e-05, "loss": 0.6532, "step": 83920 }, { "epoch": 0.93, "learning_rate": 3.45120249800691e-05, "loss": 0.7487, "step": 83925 }, { "epoch": 0.93, "learning_rate": 3.4511102252930585e-05, "loss": 0.7014, "step": 83930 }, { "epoch": 0.93, "learning_rate": 3.4510179525792066e-05, "loss": 0.73, "step": 83935 }, { "epoch": 0.93, "learning_rate": 3.450925679865356e-05, "loss": 0.7231, "step": 83940 }, { "epoch": 0.93, "learning_rate": 3.450833407151505e-05, "loss": 0.677, "step": 83945 }, { "epoch": 0.93, "learning_rate": 3.4507411344376536e-05, "loss": 0.6353, "step": 83950 }, { "epoch": 0.93, "learning_rate": 3.450648861723802e-05, "loss": 0.7311, "step": 83955 }, { "epoch": 0.93, "learning_rate": 3.4505565890099505e-05, "loss": 0.6523, "step": 83960 }, { "epoch": 0.93, "learning_rate": 3.4504643162961e-05, "loss": 0.7516, "step": 83965 }, { "epoch": 0.93, "learning_rate": 3.450372043582248e-05, "loss": 0.7683, "step": 83970 }, { "epoch": 0.93, "learning_rate": 3.450279770868397e-05, "loss": 0.7095, "step": 83975 }, { "epoch": 0.93, "learning_rate": 3.450187498154546e-05, "loss": 0.6882, "step": 83980 }, { "epoch": 0.93, "learning_rate": 3.450095225440695e-05, "loss": 0.695, "step": 83985 }, { "epoch": 0.93, "learning_rate": 3.450002952726843e-05, "loss": 0.7635, "step": 83990 }, { "epoch": 0.93, "learning_rate": 3.449910680012992e-05, "loss": 0.7, "step": 83995 }, { "epoch": 0.93, "learning_rate": 3.449818407299141e-05, "loss": 0.6972, "step": 84000 }, { "epoch": 0.93, "eval_loss": 0.6455740928649902, "eval_runtime": 114.0169, "eval_samples_per_second": 17.541, "eval_steps_per_second": 8.771, "step": 84000 }, { "epoch": 0.93, "learning_rate": 3.4497261345852896e-05, "loss": 0.6498, "step": 84005 }, { "epoch": 0.93, "learning_rate": 3.4496338618714384e-05, "loss": 0.7118, "step": 84010 }, { "epoch": 0.93, "learning_rate": 3.449541589157587e-05, "loss": 0.6699, "step": 84015 }, { "epoch": 0.93, "learning_rate": 3.449449316443736e-05, "loss": 0.7064, "step": 84020 }, { "epoch": 0.93, "learning_rate": 3.449357043729885e-05, "loss": 0.6937, "step": 84025 }, { "epoch": 0.93, "learning_rate": 3.4492647710160335e-05, "loss": 0.7119, "step": 84030 }, { "epoch": 0.93, "learning_rate": 3.449172498302182e-05, "loss": 0.653, "step": 84035 }, { "epoch": 0.93, "learning_rate": 3.449080225588331e-05, "loss": 0.6729, "step": 84040 }, { "epoch": 0.93, "learning_rate": 3.448987952874479e-05, "loss": 0.7097, "step": 84045 }, { "epoch": 0.93, "learning_rate": 3.448895680160629e-05, "loss": 0.6285, "step": 84050 }, { "epoch": 0.93, "learning_rate": 3.4488034074467774e-05, "loss": 0.7084, "step": 84055 }, { "epoch": 0.93, "learning_rate": 3.448711134732926e-05, "loss": 0.6862, "step": 84060 }, { "epoch": 0.93, "learning_rate": 3.4486188620190743e-05, "loss": 0.665, "step": 84065 }, { "epoch": 0.93, "learning_rate": 3.448526589305224e-05, "loss": 0.7357, "step": 84070 }, { "epoch": 0.93, "learning_rate": 3.4484343165913726e-05, "loss": 0.6853, "step": 84075 }, { "epoch": 0.93, "learning_rate": 3.448342043877521e-05, "loss": 0.6599, "step": 84080 }, { "epoch": 0.93, "learning_rate": 3.4482497711636695e-05, "loss": 0.6755, "step": 84085 }, { "epoch": 0.93, "learning_rate": 3.448157498449819e-05, "loss": 0.7464, "step": 84090 }, { "epoch": 0.93, "learning_rate": 3.448065225735968e-05, "loss": 0.7186, "step": 84095 }, { "epoch": 0.93, "learning_rate": 3.447972953022116e-05, "loss": 0.7247, "step": 84100 }, { "epoch": 0.93, "learning_rate": 3.4478806803082646e-05, "loss": 0.754, "step": 84105 }, { "epoch": 0.93, "learning_rate": 3.4477884075944134e-05, "loss": 0.7022, "step": 84110 }, { "epoch": 0.93, "learning_rate": 3.447696134880563e-05, "loss": 0.7213, "step": 84115 }, { "epoch": 0.93, "learning_rate": 3.447603862166711e-05, "loss": 0.7563, "step": 84120 }, { "epoch": 0.93, "learning_rate": 3.44751158945286e-05, "loss": 0.72, "step": 84125 }, { "epoch": 0.93, "learning_rate": 3.4474193167390085e-05, "loss": 0.6421, "step": 84130 }, { "epoch": 0.93, "learning_rate": 3.447327044025157e-05, "loss": 0.7218, "step": 84135 }, { "epoch": 0.93, "learning_rate": 3.447234771311306e-05, "loss": 0.6813, "step": 84140 }, { "epoch": 0.93, "learning_rate": 3.447142498597455e-05, "loss": 0.7189, "step": 84145 }, { "epoch": 0.93, "learning_rate": 3.447050225883604e-05, "loss": 0.6721, "step": 84150 }, { "epoch": 0.93, "learning_rate": 3.4469579531697525e-05, "loss": 0.6521, "step": 84155 }, { "epoch": 0.93, "learning_rate": 3.446865680455901e-05, "loss": 0.7593, "step": 84160 }, { "epoch": 0.93, "learning_rate": 3.44677340774205e-05, "loss": 0.6694, "step": 84165 }, { "epoch": 0.93, "learning_rate": 3.446681135028199e-05, "loss": 0.6426, "step": 84170 }, { "epoch": 0.93, "learning_rate": 3.446588862314347e-05, "loss": 0.7047, "step": 84175 }, { "epoch": 0.93, "learning_rate": 3.4464965896004964e-05, "loss": 0.689, "step": 84180 }, { "epoch": 0.93, "learning_rate": 3.446404316886645e-05, "loss": 0.679, "step": 84185 }, { "epoch": 0.93, "learning_rate": 3.446312044172794e-05, "loss": 0.739, "step": 84190 }, { "epoch": 0.93, "learning_rate": 3.446219771458942e-05, "loss": 0.6469, "step": 84195 }, { "epoch": 0.93, "learning_rate": 3.4461274987450915e-05, "loss": 0.7238, "step": 84200 }, { "epoch": 0.93, "learning_rate": 3.44603522603124e-05, "loss": 0.6636, "step": 84205 }, { "epoch": 0.93, "learning_rate": 3.4459429533173884e-05, "loss": 0.7716, "step": 84210 }, { "epoch": 0.93, "learning_rate": 3.445850680603537e-05, "loss": 0.6954, "step": 84215 }, { "epoch": 0.93, "learning_rate": 3.445758407889687e-05, "loss": 0.7429, "step": 84220 }, { "epoch": 0.93, "learning_rate": 3.4456661351758355e-05, "loss": 0.7645, "step": 84225 }, { "epoch": 0.93, "learning_rate": 3.4455738624619836e-05, "loss": 0.6638, "step": 84230 }, { "epoch": 0.93, "learning_rate": 3.4454815897481323e-05, "loss": 0.7631, "step": 84235 }, { "epoch": 0.93, "learning_rate": 3.445389317034282e-05, "loss": 0.7116, "step": 84240 }, { "epoch": 0.93, "learning_rate": 3.44529704432043e-05, "loss": 0.6455, "step": 84245 }, { "epoch": 0.93, "learning_rate": 3.445204771606579e-05, "loss": 0.6756, "step": 84250 }, { "epoch": 0.93, "learning_rate": 3.4451124988927275e-05, "loss": 0.66, "step": 84255 }, { "epoch": 0.93, "learning_rate": 3.445020226178876e-05, "loss": 0.6898, "step": 84260 }, { "epoch": 0.93, "learning_rate": 3.444927953465025e-05, "loss": 0.6638, "step": 84265 }, { "epoch": 0.93, "learning_rate": 3.444835680751174e-05, "loss": 0.6653, "step": 84270 }, { "epoch": 0.93, "learning_rate": 3.4447434080373226e-05, "loss": 0.6981, "step": 84275 }, { "epoch": 0.93, "learning_rate": 3.4446511353234714e-05, "loss": 0.6388, "step": 84280 }, { "epoch": 0.93, "learning_rate": 3.44455886260962e-05, "loss": 0.7232, "step": 84285 }, { "epoch": 0.93, "learning_rate": 3.444466589895769e-05, "loss": 0.7089, "step": 84290 }, { "epoch": 0.93, "learning_rate": 3.444374317181918e-05, "loss": 0.7296, "step": 84295 }, { "epoch": 0.93, "learning_rate": 3.4442820444680666e-05, "loss": 0.7294, "step": 84300 }, { "epoch": 0.93, "learning_rate": 3.444189771754215e-05, "loss": 0.6939, "step": 84305 }, { "epoch": 0.93, "learning_rate": 3.444097499040364e-05, "loss": 0.6696, "step": 84310 }, { "epoch": 0.93, "learning_rate": 3.444005226326513e-05, "loss": 0.6766, "step": 84315 }, { "epoch": 0.93, "learning_rate": 3.443912953612661e-05, "loss": 0.6261, "step": 84320 }, { "epoch": 0.93, "learning_rate": 3.4438206808988105e-05, "loss": 0.6947, "step": 84325 }, { "epoch": 0.93, "learning_rate": 3.443728408184959e-05, "loss": 0.6777, "step": 84330 }, { "epoch": 0.93, "learning_rate": 3.443636135471108e-05, "loss": 0.7053, "step": 84335 }, { "epoch": 0.93, "learning_rate": 3.443543862757256e-05, "loss": 0.7105, "step": 84340 }, { "epoch": 0.93, "learning_rate": 3.443451590043405e-05, "loss": 0.6822, "step": 84345 }, { "epoch": 0.93, "learning_rate": 3.4433593173295544e-05, "loss": 0.6392, "step": 84350 }, { "epoch": 0.93, "learning_rate": 3.4432670446157025e-05, "loss": 0.65, "step": 84355 }, { "epoch": 0.93, "learning_rate": 3.443174771901851e-05, "loss": 0.669, "step": 84360 }, { "epoch": 0.93, "learning_rate": 3.443082499188e-05, "loss": 0.6545, "step": 84365 }, { "epoch": 0.93, "learning_rate": 3.4429902264741495e-05, "loss": 0.6666, "step": 84370 }, { "epoch": 0.93, "learning_rate": 3.4428979537602976e-05, "loss": 0.6961, "step": 84375 }, { "epoch": 0.93, "learning_rate": 3.4428056810464464e-05, "loss": 0.6876, "step": 84380 }, { "epoch": 0.93, "learning_rate": 3.442713408332595e-05, "loss": 0.7252, "step": 84385 }, { "epoch": 0.93, "learning_rate": 3.442621135618744e-05, "loss": 0.7118, "step": 84390 }, { "epoch": 0.93, "learning_rate": 3.442528862904893e-05, "loss": 0.6505, "step": 84395 }, { "epoch": 0.93, "learning_rate": 3.4424365901910416e-05, "loss": 0.6645, "step": 84400 }, { "epoch": 0.93, "learning_rate": 3.4423443174771904e-05, "loss": 0.6693, "step": 84405 }, { "epoch": 0.93, "learning_rate": 3.442252044763339e-05, "loss": 0.7259, "step": 84410 }, { "epoch": 0.93, "learning_rate": 3.442159772049488e-05, "loss": 0.716, "step": 84415 }, { "epoch": 0.93, "learning_rate": 3.442067499335637e-05, "loss": 0.6966, "step": 84420 }, { "epoch": 0.93, "learning_rate": 3.4419752266217855e-05, "loss": 0.6763, "step": 84425 }, { "epoch": 0.93, "learning_rate": 3.4418829539079336e-05, "loss": 0.7171, "step": 84430 }, { "epoch": 0.93, "learning_rate": 3.441790681194083e-05, "loss": 0.7516, "step": 84435 }, { "epoch": 0.93, "learning_rate": 3.441698408480232e-05, "loss": 0.6782, "step": 84440 }, { "epoch": 0.94, "learning_rate": 3.4416061357663806e-05, "loss": 0.7245, "step": 84445 }, { "epoch": 0.94, "learning_rate": 3.441513863052529e-05, "loss": 0.7204, "step": 84450 }, { "epoch": 0.94, "learning_rate": 3.441421590338678e-05, "loss": 0.6869, "step": 84455 }, { "epoch": 0.94, "learning_rate": 3.441329317624827e-05, "loss": 0.6913, "step": 84460 }, { "epoch": 0.94, "learning_rate": 3.441237044910975e-05, "loss": 0.7431, "step": 84465 }, { "epoch": 0.94, "learning_rate": 3.441144772197124e-05, "loss": 0.6706, "step": 84470 }, { "epoch": 0.94, "learning_rate": 3.4410524994832733e-05, "loss": 0.6991, "step": 84475 }, { "epoch": 0.94, "learning_rate": 3.440960226769422e-05, "loss": 0.6593, "step": 84480 }, { "epoch": 0.94, "learning_rate": 3.44086795405557e-05, "loss": 0.723, "step": 84485 }, { "epoch": 0.94, "learning_rate": 3.440775681341719e-05, "loss": 0.6773, "step": 84490 }, { "epoch": 0.94, "learning_rate": 3.440683408627868e-05, "loss": 0.6946, "step": 84495 }, { "epoch": 0.94, "learning_rate": 3.440591135914017e-05, "loss": 0.7262, "step": 84500 }, { "epoch": 0.94, "learning_rate": 3.4404988632001654e-05, "loss": 0.7251, "step": 84505 }, { "epoch": 0.94, "learning_rate": 3.440406590486314e-05, "loss": 0.6884, "step": 84510 }, { "epoch": 0.94, "learning_rate": 3.440314317772463e-05, "loss": 0.7099, "step": 84515 }, { "epoch": 0.94, "learning_rate": 3.440222045058612e-05, "loss": 0.7173, "step": 84520 }, { "epoch": 0.94, "learning_rate": 3.4401297723447605e-05, "loss": 0.6758, "step": 84525 }, { "epoch": 0.94, "learning_rate": 3.440037499630909e-05, "loss": 0.66, "step": 84530 }, { "epoch": 0.94, "learning_rate": 3.439945226917058e-05, "loss": 0.6734, "step": 84535 }, { "epoch": 0.94, "learning_rate": 3.439852954203207e-05, "loss": 0.6533, "step": 84540 }, { "epoch": 0.94, "learning_rate": 3.4397606814893557e-05, "loss": 0.7255, "step": 84545 }, { "epoch": 0.94, "learning_rate": 3.4396684087755044e-05, "loss": 0.6781, "step": 84550 }, { "epoch": 0.94, "learning_rate": 3.439576136061653e-05, "loss": 0.6844, "step": 84555 }, { "epoch": 0.94, "learning_rate": 3.439483863347801e-05, "loss": 0.6907, "step": 84560 }, { "epoch": 0.94, "learning_rate": 3.439391590633951e-05, "loss": 0.6793, "step": 84565 }, { "epoch": 0.94, "learning_rate": 3.4392993179200996e-05, "loss": 0.6579, "step": 84570 }, { "epoch": 0.94, "learning_rate": 3.4392070452062484e-05, "loss": 0.6952, "step": 84575 }, { "epoch": 0.94, "learning_rate": 3.4391147724923965e-05, "loss": 0.677, "step": 84580 }, { "epoch": 0.94, "learning_rate": 3.439022499778546e-05, "loss": 0.728, "step": 84585 }, { "epoch": 0.94, "learning_rate": 3.438930227064695e-05, "loss": 0.7433, "step": 84590 }, { "epoch": 0.94, "learning_rate": 3.438837954350843e-05, "loss": 0.7749, "step": 84595 }, { "epoch": 0.94, "learning_rate": 3.4387456816369916e-05, "loss": 0.6979, "step": 84600 }, { "epoch": 0.94, "learning_rate": 3.438653408923141e-05, "loss": 0.722, "step": 84605 }, { "epoch": 0.94, "learning_rate": 3.43856113620929e-05, "loss": 0.6824, "step": 84610 }, { "epoch": 0.94, "learning_rate": 3.438468863495438e-05, "loss": 0.6647, "step": 84615 }, { "epoch": 0.94, "learning_rate": 3.438376590781587e-05, "loss": 0.7234, "step": 84620 }, { "epoch": 0.94, "learning_rate": 3.438284318067736e-05, "loss": 0.7026, "step": 84625 }, { "epoch": 0.94, "learning_rate": 3.438192045353884e-05, "loss": 0.6707, "step": 84630 }, { "epoch": 0.94, "learning_rate": 3.438099772640033e-05, "loss": 0.6915, "step": 84635 }, { "epoch": 0.94, "learning_rate": 3.438007499926182e-05, "loss": 0.7265, "step": 84640 }, { "epoch": 0.94, "learning_rate": 3.437915227212331e-05, "loss": 0.665, "step": 84645 }, { "epoch": 0.94, "learning_rate": 3.4378229544984795e-05, "loss": 0.6409, "step": 84650 }, { "epoch": 0.94, "learning_rate": 3.437730681784628e-05, "loss": 0.6885, "step": 84655 }, { "epoch": 0.94, "learning_rate": 3.437638409070777e-05, "loss": 0.7794, "step": 84660 }, { "epoch": 0.94, "learning_rate": 3.437546136356926e-05, "loss": 0.6497, "step": 84665 }, { "epoch": 0.94, "learning_rate": 3.4374538636430746e-05, "loss": 0.6776, "step": 84670 }, { "epoch": 0.94, "learning_rate": 3.4373615909292234e-05, "loss": 0.6555, "step": 84675 }, { "epoch": 0.94, "learning_rate": 3.437269318215372e-05, "loss": 0.7301, "step": 84680 }, { "epoch": 0.94, "learning_rate": 3.437177045501521e-05, "loss": 0.6886, "step": 84685 }, { "epoch": 0.94, "learning_rate": 3.43708477278767e-05, "loss": 0.6781, "step": 84690 }, { "epoch": 0.94, "learning_rate": 3.4369925000738185e-05, "loss": 0.6943, "step": 84695 }, { "epoch": 0.94, "learning_rate": 3.436900227359967e-05, "loss": 0.672, "step": 84700 }, { "epoch": 0.94, "learning_rate": 3.4368079546461154e-05, "loss": 0.6831, "step": 84705 }, { "epoch": 0.94, "learning_rate": 3.436715681932264e-05, "loss": 0.7317, "step": 84710 }, { "epoch": 0.94, "learning_rate": 3.436623409218414e-05, "loss": 0.7098, "step": 84715 }, { "epoch": 0.94, "learning_rate": 3.4365311365045624e-05, "loss": 0.6417, "step": 84720 }, { "epoch": 0.94, "learning_rate": 3.4364388637907106e-05, "loss": 0.6912, "step": 84725 }, { "epoch": 0.94, "learning_rate": 3.4363465910768593e-05, "loss": 0.7002, "step": 84730 }, { "epoch": 0.94, "learning_rate": 3.436254318363009e-05, "loss": 0.702, "step": 84735 }, { "epoch": 0.94, "learning_rate": 3.436162045649157e-05, "loss": 0.6574, "step": 84740 }, { "epoch": 0.94, "learning_rate": 3.436069772935306e-05, "loss": 0.6811, "step": 84745 }, { "epoch": 0.94, "learning_rate": 3.4359775002214545e-05, "loss": 0.757, "step": 84750 }, { "epoch": 0.94, "learning_rate": 3.435885227507604e-05, "loss": 0.6507, "step": 84755 }, { "epoch": 0.94, "learning_rate": 3.435792954793752e-05, "loss": 0.6111, "step": 84760 }, { "epoch": 0.94, "learning_rate": 3.435700682079901e-05, "loss": 0.6689, "step": 84765 }, { "epoch": 0.94, "learning_rate": 3.4356084093660496e-05, "loss": 0.7301, "step": 84770 }, { "epoch": 0.94, "learning_rate": 3.4355161366521984e-05, "loss": 0.6773, "step": 84775 }, { "epoch": 0.94, "learning_rate": 3.435423863938347e-05, "loss": 0.6534, "step": 84780 }, { "epoch": 0.94, "learning_rate": 3.435331591224496e-05, "loss": 0.6596, "step": 84785 }, { "epoch": 0.94, "learning_rate": 3.435239318510645e-05, "loss": 0.6725, "step": 84790 }, { "epoch": 0.94, "learning_rate": 3.4351470457967935e-05, "loss": 0.7158, "step": 84795 }, { "epoch": 0.94, "learning_rate": 3.435054773082942e-05, "loss": 0.6297, "step": 84800 }, { "epoch": 0.94, "learning_rate": 3.434962500369091e-05, "loss": 0.6216, "step": 84805 }, { "epoch": 0.94, "learning_rate": 3.43487022765524e-05, "loss": 0.702, "step": 84810 }, { "epoch": 0.94, "learning_rate": 3.434777954941388e-05, "loss": 0.7175, "step": 84815 }, { "epoch": 0.94, "learning_rate": 3.4346856822275375e-05, "loss": 0.6656, "step": 84820 }, { "epoch": 0.94, "learning_rate": 3.434593409513686e-05, "loss": 0.6844, "step": 84825 }, { "epoch": 0.94, "learning_rate": 3.434501136799835e-05, "loss": 0.7374, "step": 84830 }, { "epoch": 0.94, "learning_rate": 3.434408864085983e-05, "loss": 0.6952, "step": 84835 }, { "epoch": 0.94, "learning_rate": 3.4343165913721326e-05, "loss": 0.7415, "step": 84840 }, { "epoch": 0.94, "learning_rate": 3.4342243186582814e-05, "loss": 0.6905, "step": 84845 }, { "epoch": 0.94, "learning_rate": 3.4341320459444295e-05, "loss": 0.7194, "step": 84850 }, { "epoch": 0.94, "learning_rate": 3.434039773230578e-05, "loss": 0.6533, "step": 84855 }, { "epoch": 0.94, "learning_rate": 3.433947500516727e-05, "loss": 0.7171, "step": 84860 }, { "epoch": 0.94, "learning_rate": 3.4338552278028765e-05, "loss": 0.6894, "step": 84865 }, { "epoch": 0.94, "learning_rate": 3.4337629550890246e-05, "loss": 0.6629, "step": 84870 }, { "epoch": 0.94, "learning_rate": 3.4336706823751734e-05, "loss": 0.6952, "step": 84875 }, { "epoch": 0.94, "learning_rate": 3.433578409661322e-05, "loss": 0.7532, "step": 84880 }, { "epoch": 0.94, "learning_rate": 3.433486136947472e-05, "loss": 0.6543, "step": 84885 }, { "epoch": 0.94, "learning_rate": 3.43339386423362e-05, "loss": 0.6838, "step": 84890 }, { "epoch": 0.94, "learning_rate": 3.4333015915197686e-05, "loss": 0.6996, "step": 84895 }, { "epoch": 0.94, "learning_rate": 3.4332093188059173e-05, "loss": 0.6782, "step": 84900 }, { "epoch": 0.94, "learning_rate": 3.433117046092066e-05, "loss": 0.6919, "step": 84905 }, { "epoch": 0.94, "learning_rate": 3.433024773378215e-05, "loss": 0.696, "step": 84910 }, { "epoch": 0.94, "learning_rate": 3.432932500664364e-05, "loss": 0.7316, "step": 84915 }, { "epoch": 0.94, "learning_rate": 3.4328402279505125e-05, "loss": 0.7172, "step": 84920 }, { "epoch": 0.94, "learning_rate": 3.432747955236661e-05, "loss": 0.6532, "step": 84925 }, { "epoch": 0.94, "learning_rate": 3.43265568252281e-05, "loss": 0.7333, "step": 84930 }, { "epoch": 0.94, "learning_rate": 3.432563409808959e-05, "loss": 0.6702, "step": 84935 }, { "epoch": 0.94, "learning_rate": 3.4324711370951076e-05, "loss": 0.6615, "step": 84940 }, { "epoch": 0.94, "learning_rate": 3.432378864381256e-05, "loss": 0.7517, "step": 84945 }, { "epoch": 0.94, "learning_rate": 3.432286591667405e-05, "loss": 0.6423, "step": 84950 }, { "epoch": 0.94, "learning_rate": 3.432194318953554e-05, "loss": 0.7558, "step": 84955 }, { "epoch": 0.94, "learning_rate": 3.432102046239703e-05, "loss": 0.7149, "step": 84960 }, { "epoch": 0.94, "learning_rate": 3.432009773525851e-05, "loss": 0.739, "step": 84965 }, { "epoch": 0.94, "learning_rate": 3.431917500812e-05, "loss": 0.6825, "step": 84970 }, { "epoch": 0.94, "learning_rate": 3.431825228098149e-05, "loss": 0.6879, "step": 84975 }, { "epoch": 0.94, "learning_rate": 3.431732955384297e-05, "loss": 0.6967, "step": 84980 }, { "epoch": 0.94, "learning_rate": 3.431640682670446e-05, "loss": 0.6763, "step": 84985 }, { "epoch": 0.94, "learning_rate": 3.4315484099565955e-05, "loss": 0.7627, "step": 84990 }, { "epoch": 0.94, "learning_rate": 3.431456137242744e-05, "loss": 0.7306, "step": 84995 }, { "epoch": 0.94, "learning_rate": 3.4313638645288924e-05, "loss": 0.7041, "step": 85000 }, { "epoch": 0.94, "eval_loss": 0.6497337818145752, "eval_runtime": 69.303, "eval_samples_per_second": 28.859, "eval_steps_per_second": 14.429, "step": 85000 }, { "epoch": 0.94, "learning_rate": 3.431271591815041e-05, "loss": 0.6861, "step": 85005 }, { "epoch": 0.94, "learning_rate": 3.43117931910119e-05, "loss": 0.7083, "step": 85010 }, { "epoch": 0.94, "learning_rate": 3.431087046387339e-05, "loss": 0.7527, "step": 85015 }, { "epoch": 0.94, "learning_rate": 3.4309947736734875e-05, "loss": 0.733, "step": 85020 }, { "epoch": 0.94, "learning_rate": 3.430902500959636e-05, "loss": 0.7097, "step": 85025 }, { "epoch": 0.94, "learning_rate": 3.430810228245785e-05, "loss": 0.6821, "step": 85030 }, { "epoch": 0.94, "learning_rate": 3.430717955531934e-05, "loss": 0.7245, "step": 85035 }, { "epoch": 0.94, "learning_rate": 3.4306256828180826e-05, "loss": 0.7203, "step": 85040 }, { "epoch": 0.94, "learning_rate": 3.4305334101042314e-05, "loss": 0.653, "step": 85045 }, { "epoch": 0.94, "learning_rate": 3.43044113739038e-05, "loss": 0.6557, "step": 85050 }, { "epoch": 0.94, "learning_rate": 3.430348864676529e-05, "loss": 0.7047, "step": 85055 }, { "epoch": 0.94, "learning_rate": 3.430256591962678e-05, "loss": 0.7084, "step": 85060 }, { "epoch": 0.94, "learning_rate": 3.4301643192488266e-05, "loss": 0.6895, "step": 85065 }, { "epoch": 0.94, "learning_rate": 3.4300720465349754e-05, "loss": 0.7636, "step": 85070 }, { "epoch": 0.94, "learning_rate": 3.429979773821124e-05, "loss": 0.6592, "step": 85075 }, { "epoch": 0.94, "learning_rate": 3.429887501107273e-05, "loss": 0.7394, "step": 85080 }, { "epoch": 0.94, "learning_rate": 3.429795228393422e-05, "loss": 0.6819, "step": 85085 }, { "epoch": 0.94, "learning_rate": 3.42970295567957e-05, "loss": 0.6519, "step": 85090 }, { "epoch": 0.94, "learning_rate": 3.4296106829657186e-05, "loss": 0.6595, "step": 85095 }, { "epoch": 0.94, "learning_rate": 3.429518410251868e-05, "loss": 0.658, "step": 85100 }, { "epoch": 0.94, "learning_rate": 3.429426137538017e-05, "loss": 0.6896, "step": 85105 }, { "epoch": 0.94, "learning_rate": 3.429333864824165e-05, "loss": 0.6437, "step": 85110 }, { "epoch": 0.94, "learning_rate": 3.429241592110314e-05, "loss": 0.6728, "step": 85115 }, { "epoch": 0.94, "learning_rate": 3.429149319396463e-05, "loss": 0.7231, "step": 85120 }, { "epoch": 0.94, "learning_rate": 3.429057046682611e-05, "loss": 0.6588, "step": 85125 }, { "epoch": 0.94, "learning_rate": 3.42896477396876e-05, "loss": 0.6983, "step": 85130 }, { "epoch": 0.94, "learning_rate": 3.428872501254909e-05, "loss": 0.6943, "step": 85135 }, { "epoch": 0.94, "learning_rate": 3.4287802285410583e-05, "loss": 0.6653, "step": 85140 }, { "epoch": 0.94, "learning_rate": 3.4286879558272065e-05, "loss": 0.7033, "step": 85145 }, { "epoch": 0.94, "learning_rate": 3.428595683113355e-05, "loss": 0.6486, "step": 85150 }, { "epoch": 0.94, "learning_rate": 3.428503410399504e-05, "loss": 0.7439, "step": 85155 }, { "epoch": 0.94, "learning_rate": 3.428411137685653e-05, "loss": 0.6671, "step": 85160 }, { "epoch": 0.94, "learning_rate": 3.4283188649718016e-05, "loss": 0.6878, "step": 85165 }, { "epoch": 0.94, "learning_rate": 3.4282265922579504e-05, "loss": 0.7466, "step": 85170 }, { "epoch": 0.94, "learning_rate": 3.428134319544099e-05, "loss": 0.674, "step": 85175 }, { "epoch": 0.94, "learning_rate": 3.428042046830248e-05, "loss": 0.6895, "step": 85180 }, { "epoch": 0.94, "learning_rate": 3.427949774116397e-05, "loss": 0.6611, "step": 85185 }, { "epoch": 0.94, "learning_rate": 3.4278575014025455e-05, "loss": 0.6736, "step": 85190 }, { "epoch": 0.94, "learning_rate": 3.427765228688694e-05, "loss": 0.674, "step": 85195 }, { "epoch": 0.94, "learning_rate": 3.4276729559748424e-05, "loss": 0.645, "step": 85200 }, { "epoch": 0.94, "learning_rate": 3.427580683260992e-05, "loss": 0.7011, "step": 85205 }, { "epoch": 0.94, "learning_rate": 3.4274884105471407e-05, "loss": 0.7511, "step": 85210 }, { "epoch": 0.94, "learning_rate": 3.4273961378332894e-05, "loss": 0.7219, "step": 85215 }, { "epoch": 0.94, "learning_rate": 3.4273038651194375e-05, "loss": 0.6813, "step": 85220 }, { "epoch": 0.94, "learning_rate": 3.427211592405587e-05, "loss": 0.6994, "step": 85225 }, { "epoch": 0.94, "learning_rate": 3.427119319691736e-05, "loss": 0.6629, "step": 85230 }, { "epoch": 0.94, "learning_rate": 3.427027046977884e-05, "loss": 0.7558, "step": 85235 }, { "epoch": 0.94, "learning_rate": 3.426934774264033e-05, "loss": 0.6913, "step": 85240 }, { "epoch": 0.94, "learning_rate": 3.4268425015501815e-05, "loss": 0.7214, "step": 85245 }, { "epoch": 0.94, "learning_rate": 3.426750228836331e-05, "loss": 0.7049, "step": 85250 }, { "epoch": 0.94, "learning_rate": 3.426657956122479e-05, "loss": 0.6963, "step": 85255 }, { "epoch": 0.94, "learning_rate": 3.426565683408628e-05, "loss": 0.6364, "step": 85260 }, { "epoch": 0.94, "learning_rate": 3.4264734106947766e-05, "loss": 0.6767, "step": 85265 }, { "epoch": 0.94, "learning_rate": 3.426381137980926e-05, "loss": 0.6677, "step": 85270 }, { "epoch": 0.94, "learning_rate": 3.426288865267074e-05, "loss": 0.7327, "step": 85275 }, { "epoch": 0.94, "learning_rate": 3.426196592553223e-05, "loss": 0.6645, "step": 85280 }, { "epoch": 0.94, "learning_rate": 3.426104319839372e-05, "loss": 0.6581, "step": 85285 }, { "epoch": 0.94, "learning_rate": 3.4260120471255205e-05, "loss": 0.713, "step": 85290 }, { "epoch": 0.94, "learning_rate": 3.425919774411669e-05, "loss": 0.76, "step": 85295 }, { "epoch": 0.94, "learning_rate": 3.425827501697818e-05, "loss": 0.6244, "step": 85300 }, { "epoch": 0.94, "learning_rate": 3.425735228983967e-05, "loss": 0.6556, "step": 85305 }, { "epoch": 0.94, "learning_rate": 3.425642956270116e-05, "loss": 0.6753, "step": 85310 }, { "epoch": 0.94, "learning_rate": 3.4255506835562645e-05, "loss": 0.7178, "step": 85315 }, { "epoch": 0.94, "learning_rate": 3.425458410842413e-05, "loss": 0.5981, "step": 85320 }, { "epoch": 0.94, "learning_rate": 3.425366138128562e-05, "loss": 0.7275, "step": 85325 }, { "epoch": 0.94, "learning_rate": 3.42527386541471e-05, "loss": 0.7118, "step": 85330 }, { "epoch": 0.94, "learning_rate": 3.4251815927008596e-05, "loss": 0.6896, "step": 85335 }, { "epoch": 0.94, "learning_rate": 3.4250893199870084e-05, "loss": 0.748, "step": 85340 }, { "epoch": 0.95, "learning_rate": 3.424997047273157e-05, "loss": 0.721, "step": 85345 }, { "epoch": 0.95, "learning_rate": 3.424904774559305e-05, "loss": 0.6981, "step": 85350 }, { "epoch": 0.95, "learning_rate": 3.424812501845455e-05, "loss": 0.6826, "step": 85355 }, { "epoch": 0.95, "learning_rate": 3.4247202291316035e-05, "loss": 0.664, "step": 85360 }, { "epoch": 0.95, "learning_rate": 3.4246279564177516e-05, "loss": 0.7369, "step": 85365 }, { "epoch": 0.95, "learning_rate": 3.4245356837039004e-05, "loss": 0.7149, "step": 85370 }, { "epoch": 0.95, "learning_rate": 3.42444341099005e-05, "loss": 0.7309, "step": 85375 }, { "epoch": 0.95, "learning_rate": 3.424351138276199e-05, "loss": 0.6774, "step": 85380 }, { "epoch": 0.95, "learning_rate": 3.424258865562347e-05, "loss": 0.6929, "step": 85385 }, { "epoch": 0.95, "learning_rate": 3.4241665928484956e-05, "loss": 0.6717, "step": 85390 }, { "epoch": 0.95, "learning_rate": 3.4240743201346443e-05, "loss": 0.6415, "step": 85395 }, { "epoch": 0.95, "learning_rate": 3.423982047420793e-05, "loss": 0.6996, "step": 85400 }, { "epoch": 0.95, "learning_rate": 3.423889774706942e-05, "loss": 0.7179, "step": 85405 }, { "epoch": 0.95, "learning_rate": 3.423797501993091e-05, "loss": 0.6894, "step": 85410 }, { "epoch": 0.95, "learning_rate": 3.4237052292792395e-05, "loss": 0.7132, "step": 85415 }, { "epoch": 0.95, "learning_rate": 3.423612956565388e-05, "loss": 0.6301, "step": 85420 }, { "epoch": 0.95, "learning_rate": 3.423520683851537e-05, "loss": 0.7347, "step": 85425 }, { "epoch": 0.95, "learning_rate": 3.423428411137686e-05, "loss": 0.7265, "step": 85430 }, { "epoch": 0.95, "learning_rate": 3.4233361384238346e-05, "loss": 0.7019, "step": 85435 }, { "epoch": 0.95, "learning_rate": 3.4232438657099834e-05, "loss": 0.6484, "step": 85440 }, { "epoch": 0.95, "learning_rate": 3.423151592996132e-05, "loss": 0.6958, "step": 85445 }, { "epoch": 0.95, "learning_rate": 3.423059320282281e-05, "loss": 0.7192, "step": 85450 }, { "epoch": 0.95, "learning_rate": 3.42296704756843e-05, "loss": 0.7201, "step": 85455 }, { "epoch": 0.95, "learning_rate": 3.4228747748545785e-05, "loss": 0.6933, "step": 85460 }, { "epoch": 0.95, "learning_rate": 3.422782502140727e-05, "loss": 0.7098, "step": 85465 }, { "epoch": 0.95, "learning_rate": 3.422690229426876e-05, "loss": 0.727, "step": 85470 }, { "epoch": 0.95, "learning_rate": 3.422597956713024e-05, "loss": 0.66, "step": 85475 }, { "epoch": 0.95, "learning_rate": 3.422505683999173e-05, "loss": 0.7078, "step": 85480 }, { "epoch": 0.95, "learning_rate": 3.4224134112853225e-05, "loss": 0.6527, "step": 85485 }, { "epoch": 0.95, "learning_rate": 3.422321138571471e-05, "loss": 0.7179, "step": 85490 }, { "epoch": 0.95, "learning_rate": 3.4222288658576194e-05, "loss": 0.7165, "step": 85495 }, { "epoch": 0.95, "learning_rate": 3.422136593143768e-05, "loss": 0.6791, "step": 85500 }, { "epoch": 0.95, "learning_rate": 3.4220443204299176e-05, "loss": 0.6876, "step": 85505 }, { "epoch": 0.95, "learning_rate": 3.421952047716066e-05, "loss": 0.726, "step": 85510 }, { "epoch": 0.95, "learning_rate": 3.4218597750022145e-05, "loss": 0.7845, "step": 85515 }, { "epoch": 0.95, "learning_rate": 3.421767502288363e-05, "loss": 0.6614, "step": 85520 }, { "epoch": 0.95, "learning_rate": 3.421675229574513e-05, "loss": 0.7393, "step": 85525 }, { "epoch": 0.95, "learning_rate": 3.421582956860661e-05, "loss": 0.6768, "step": 85530 }, { "epoch": 0.95, "learning_rate": 3.4214906841468096e-05, "loss": 0.6756, "step": 85535 }, { "epoch": 0.95, "learning_rate": 3.4213984114329584e-05, "loss": 0.6839, "step": 85540 }, { "epoch": 0.95, "learning_rate": 3.421306138719107e-05, "loss": 0.6913, "step": 85545 }, { "epoch": 0.95, "learning_rate": 3.421213866005256e-05, "loss": 0.744, "step": 85550 }, { "epoch": 0.95, "learning_rate": 3.421121593291405e-05, "loss": 0.6533, "step": 85555 }, { "epoch": 0.95, "learning_rate": 3.4210293205775536e-05, "loss": 0.714, "step": 85560 }, { "epoch": 0.95, "learning_rate": 3.4209370478637023e-05, "loss": 0.6761, "step": 85565 }, { "epoch": 0.95, "learning_rate": 3.420844775149851e-05, "loss": 0.71, "step": 85570 }, { "epoch": 0.95, "learning_rate": 3.420752502436e-05, "loss": 0.6345, "step": 85575 }, { "epoch": 0.95, "learning_rate": 3.420660229722149e-05, "loss": 0.7229, "step": 85580 }, { "epoch": 0.95, "learning_rate": 3.420567957008297e-05, "loss": 0.7117, "step": 85585 }, { "epoch": 0.95, "learning_rate": 3.420475684294446e-05, "loss": 0.6322, "step": 85590 }, { "epoch": 0.95, "learning_rate": 3.420383411580595e-05, "loss": 0.7318, "step": 85595 }, { "epoch": 0.95, "learning_rate": 3.420291138866744e-05, "loss": 0.7031, "step": 85600 }, { "epoch": 0.95, "learning_rate": 3.420198866152892e-05, "loss": 0.6917, "step": 85605 }, { "epoch": 0.95, "learning_rate": 3.4201065934390414e-05, "loss": 0.6941, "step": 85610 }, { "epoch": 0.95, "learning_rate": 3.42001432072519e-05, "loss": 0.6938, "step": 85615 }, { "epoch": 0.95, "learning_rate": 3.419922048011338e-05, "loss": 0.6862, "step": 85620 }, { "epoch": 0.95, "learning_rate": 3.419829775297487e-05, "loss": 0.7629, "step": 85625 }, { "epoch": 0.95, "learning_rate": 3.419737502583636e-05, "loss": 0.678, "step": 85630 }, { "epoch": 0.95, "learning_rate": 3.419645229869785e-05, "loss": 0.6994, "step": 85635 }, { "epoch": 0.95, "learning_rate": 3.4195529571559334e-05, "loss": 0.6774, "step": 85640 }, { "epoch": 0.95, "learning_rate": 3.419460684442082e-05, "loss": 0.7097, "step": 85645 }, { "epoch": 0.95, "learning_rate": 3.419368411728231e-05, "loss": 0.7179, "step": 85650 }, { "epoch": 0.95, "learning_rate": 3.4192761390143805e-05, "loss": 0.6629, "step": 85655 }, { "epoch": 0.95, "learning_rate": 3.4191838663005286e-05, "loss": 0.6395, "step": 85660 }, { "epoch": 0.95, "learning_rate": 3.4190915935866774e-05, "loss": 0.6829, "step": 85665 }, { "epoch": 0.95, "learning_rate": 3.418999320872826e-05, "loss": 0.6097, "step": 85670 }, { "epoch": 0.95, "learning_rate": 3.418907048158975e-05, "loss": 0.7434, "step": 85675 }, { "epoch": 0.95, "learning_rate": 3.418814775445124e-05, "loss": 0.6942, "step": 85680 }, { "epoch": 0.95, "learning_rate": 3.4187225027312725e-05, "loss": 0.7434, "step": 85685 }, { "epoch": 0.95, "learning_rate": 3.418630230017421e-05, "loss": 0.7201, "step": 85690 }, { "epoch": 0.95, "learning_rate": 3.41853795730357e-05, "loss": 0.7697, "step": 85695 }, { "epoch": 0.95, "learning_rate": 3.418445684589719e-05, "loss": 0.7245, "step": 85700 }, { "epoch": 0.95, "learning_rate": 3.4183534118758676e-05, "loss": 0.6768, "step": 85705 }, { "epoch": 0.95, "learning_rate": 3.4182611391620164e-05, "loss": 0.6898, "step": 85710 }, { "epoch": 0.95, "learning_rate": 3.4181688664481645e-05, "loss": 0.683, "step": 85715 }, { "epoch": 0.95, "learning_rate": 3.418076593734314e-05, "loss": 0.6415, "step": 85720 }, { "epoch": 0.95, "learning_rate": 3.417984321020463e-05, "loss": 0.7323, "step": 85725 }, { "epoch": 0.95, "learning_rate": 3.4178920483066116e-05, "loss": 0.6892, "step": 85730 }, { "epoch": 0.95, "learning_rate": 3.41779977559276e-05, "loss": 0.7111, "step": 85735 }, { "epoch": 0.95, "learning_rate": 3.417707502878909e-05, "loss": 0.7027, "step": 85740 }, { "epoch": 0.95, "learning_rate": 3.417615230165058e-05, "loss": 0.7059, "step": 85745 }, { "epoch": 0.95, "learning_rate": 3.417522957451206e-05, "loss": 0.6731, "step": 85750 }, { "epoch": 0.95, "learning_rate": 3.417430684737355e-05, "loss": 0.6427, "step": 85755 }, { "epoch": 0.95, "learning_rate": 3.417338412023504e-05, "loss": 0.6899, "step": 85760 }, { "epoch": 0.95, "learning_rate": 3.417246139309653e-05, "loss": 0.6842, "step": 85765 }, { "epoch": 0.95, "learning_rate": 3.417153866595801e-05, "loss": 0.7116, "step": 85770 }, { "epoch": 0.95, "learning_rate": 3.41706159388195e-05, "loss": 0.6688, "step": 85775 }, { "epoch": 0.95, "learning_rate": 3.416969321168099e-05, "loss": 0.6889, "step": 85780 }, { "epoch": 0.95, "learning_rate": 3.4168770484542475e-05, "loss": 0.7121, "step": 85785 }, { "epoch": 0.95, "learning_rate": 3.416784775740396e-05, "loss": 0.6753, "step": 85790 }, { "epoch": 0.95, "learning_rate": 3.416692503026545e-05, "loss": 0.69, "step": 85795 }, { "epoch": 0.95, "learning_rate": 3.416600230312694e-05, "loss": 0.6985, "step": 85800 }, { "epoch": 0.95, "learning_rate": 3.416507957598843e-05, "loss": 0.6929, "step": 85805 }, { "epoch": 0.95, "learning_rate": 3.4164156848849915e-05, "loss": 0.7101, "step": 85810 }, { "epoch": 0.95, "learning_rate": 3.41632341217114e-05, "loss": 0.6906, "step": 85815 }, { "epoch": 0.95, "learning_rate": 3.416231139457289e-05, "loss": 0.7689, "step": 85820 }, { "epoch": 0.95, "learning_rate": 3.416138866743438e-05, "loss": 0.6768, "step": 85825 }, { "epoch": 0.95, "learning_rate": 3.4160465940295866e-05, "loss": 0.7298, "step": 85830 }, { "epoch": 0.95, "learning_rate": 3.4159543213157354e-05, "loss": 0.7294, "step": 85835 }, { "epoch": 0.95, "learning_rate": 3.415862048601884e-05, "loss": 0.7303, "step": 85840 }, { "epoch": 0.95, "learning_rate": 3.415769775888032e-05, "loss": 0.6508, "step": 85845 }, { "epoch": 0.95, "learning_rate": 3.415677503174182e-05, "loss": 0.688, "step": 85850 }, { "epoch": 0.95, "learning_rate": 3.4155852304603305e-05, "loss": 0.6844, "step": 85855 }, { "epoch": 0.95, "learning_rate": 3.4154929577464786e-05, "loss": 0.5924, "step": 85860 }, { "epoch": 0.95, "learning_rate": 3.4154006850326274e-05, "loss": 0.6765, "step": 85865 }, { "epoch": 0.95, "learning_rate": 3.415308412318777e-05, "loss": 0.6604, "step": 85870 }, { "epoch": 0.95, "learning_rate": 3.4152161396049257e-05, "loss": 0.6131, "step": 85875 }, { "epoch": 0.95, "learning_rate": 3.415123866891074e-05, "loss": 0.6275, "step": 85880 }, { "epoch": 0.95, "learning_rate": 3.4150315941772225e-05, "loss": 0.7406, "step": 85885 }, { "epoch": 0.95, "learning_rate": 3.414939321463372e-05, "loss": 0.6835, "step": 85890 }, { "epoch": 0.95, "learning_rate": 3.41484704874952e-05, "loss": 0.6964, "step": 85895 }, { "epoch": 0.95, "learning_rate": 3.414754776035669e-05, "loss": 0.7543, "step": 85900 }, { "epoch": 0.95, "learning_rate": 3.414662503321818e-05, "loss": 0.7268, "step": 85905 }, { "epoch": 0.95, "learning_rate": 3.414570230607967e-05, "loss": 0.7141, "step": 85910 }, { "epoch": 0.95, "learning_rate": 3.414477957894115e-05, "loss": 0.6969, "step": 85915 }, { "epoch": 0.95, "learning_rate": 3.414385685180264e-05, "loss": 0.684, "step": 85920 }, { "epoch": 0.95, "learning_rate": 3.414293412466413e-05, "loss": 0.6616, "step": 85925 }, { "epoch": 0.95, "learning_rate": 3.4142011397525616e-05, "loss": 0.6619, "step": 85930 }, { "epoch": 0.95, "learning_rate": 3.4141088670387104e-05, "loss": 0.7371, "step": 85935 }, { "epoch": 0.95, "learning_rate": 3.414016594324859e-05, "loss": 0.7678, "step": 85940 }, { "epoch": 0.95, "learning_rate": 3.413924321611008e-05, "loss": 0.7054, "step": 85945 }, { "epoch": 0.95, "learning_rate": 3.413832048897157e-05, "loss": 0.6643, "step": 85950 }, { "epoch": 0.95, "learning_rate": 3.4137397761833055e-05, "loss": 0.7001, "step": 85955 }, { "epoch": 0.95, "learning_rate": 3.413647503469454e-05, "loss": 0.6909, "step": 85960 }, { "epoch": 0.95, "learning_rate": 3.413555230755603e-05, "loss": 0.6936, "step": 85965 }, { "epoch": 0.95, "learning_rate": 3.413462958041751e-05, "loss": 0.6901, "step": 85970 }, { "epoch": 0.95, "learning_rate": 3.413370685327901e-05, "loss": 0.6582, "step": 85975 }, { "epoch": 0.95, "learning_rate": 3.4132784126140495e-05, "loss": 0.643, "step": 85980 }, { "epoch": 0.95, "learning_rate": 3.413186139900198e-05, "loss": 0.6707, "step": 85985 }, { "epoch": 0.95, "learning_rate": 3.4130938671863464e-05, "loss": 0.7014, "step": 85990 }, { "epoch": 0.95, "learning_rate": 3.413001594472496e-05, "loss": 0.6651, "step": 85995 }, { "epoch": 0.95, "learning_rate": 3.4129093217586446e-05, "loss": 0.6864, "step": 86000 }, { "epoch": 0.95, "eval_loss": 0.6432331204414368, "eval_runtime": 69.2606, "eval_samples_per_second": 28.876, "eval_steps_per_second": 14.438, "step": 86000 }, { "epoch": 0.95, "learning_rate": 3.4128170490447934e-05, "loss": 0.6691, "step": 86005 }, { "epoch": 0.95, "learning_rate": 3.4127247763309415e-05, "loss": 0.7141, "step": 86010 }, { "epoch": 0.95, "learning_rate": 3.41263250361709e-05, "loss": 0.6598, "step": 86015 }, { "epoch": 0.95, "learning_rate": 3.41254023090324e-05, "loss": 0.6436, "step": 86020 }, { "epoch": 0.95, "learning_rate": 3.412447958189388e-05, "loss": 0.6502, "step": 86025 }, { "epoch": 0.95, "learning_rate": 3.4123556854755366e-05, "loss": 0.7042, "step": 86030 }, { "epoch": 0.95, "learning_rate": 3.4122634127616854e-05, "loss": 0.7635, "step": 86035 }, { "epoch": 0.95, "learning_rate": 3.412171140047835e-05, "loss": 0.6757, "step": 86040 }, { "epoch": 0.95, "learning_rate": 3.412078867333983e-05, "loss": 0.7431, "step": 86045 }, { "epoch": 0.95, "learning_rate": 3.411986594620132e-05, "loss": 0.6821, "step": 86050 }, { "epoch": 0.95, "learning_rate": 3.4118943219062806e-05, "loss": 0.655, "step": 86055 }, { "epoch": 0.95, "learning_rate": 3.4118020491924293e-05, "loss": 0.6605, "step": 86060 }, { "epoch": 0.95, "learning_rate": 3.411709776478578e-05, "loss": 0.6487, "step": 86065 }, { "epoch": 0.95, "learning_rate": 3.411617503764727e-05, "loss": 0.645, "step": 86070 }, { "epoch": 0.95, "learning_rate": 3.411525231050876e-05, "loss": 0.6799, "step": 86075 }, { "epoch": 0.95, "learning_rate": 3.4114329583370245e-05, "loss": 0.6967, "step": 86080 }, { "epoch": 0.95, "learning_rate": 3.411340685623173e-05, "loss": 0.6634, "step": 86085 }, { "epoch": 0.95, "learning_rate": 3.411248412909322e-05, "loss": 0.7282, "step": 86090 }, { "epoch": 0.95, "learning_rate": 3.411156140195471e-05, "loss": 0.7009, "step": 86095 }, { "epoch": 0.95, "learning_rate": 3.411063867481619e-05, "loss": 0.658, "step": 86100 }, { "epoch": 0.95, "learning_rate": 3.4109715947677684e-05, "loss": 0.657, "step": 86105 }, { "epoch": 0.95, "learning_rate": 3.410879322053917e-05, "loss": 0.6731, "step": 86110 }, { "epoch": 0.95, "learning_rate": 3.410787049340066e-05, "loss": 0.7187, "step": 86115 }, { "epoch": 0.95, "learning_rate": 3.410694776626214e-05, "loss": 0.6964, "step": 86120 }, { "epoch": 0.95, "learning_rate": 3.4106025039123635e-05, "loss": 0.6938, "step": 86125 }, { "epoch": 0.95, "learning_rate": 3.410510231198512e-05, "loss": 0.6864, "step": 86130 }, { "epoch": 0.95, "learning_rate": 3.4104179584846604e-05, "loss": 0.6551, "step": 86135 }, { "epoch": 0.95, "learning_rate": 3.410325685770809e-05, "loss": 0.6865, "step": 86140 }, { "epoch": 0.95, "learning_rate": 3.410233413056959e-05, "loss": 0.7015, "step": 86145 }, { "epoch": 0.95, "learning_rate": 3.4101411403431075e-05, "loss": 0.6054, "step": 86150 }, { "epoch": 0.95, "learning_rate": 3.4100488676292556e-05, "loss": 0.6623, "step": 86155 }, { "epoch": 0.95, "learning_rate": 3.4099565949154044e-05, "loss": 0.6776, "step": 86160 }, { "epoch": 0.95, "learning_rate": 3.409864322201553e-05, "loss": 0.7265, "step": 86165 }, { "epoch": 0.95, "learning_rate": 3.409772049487702e-05, "loss": 0.7076, "step": 86170 }, { "epoch": 0.95, "learning_rate": 3.409679776773851e-05, "loss": 0.7179, "step": 86175 }, { "epoch": 0.95, "learning_rate": 3.4095875040599995e-05, "loss": 0.6705, "step": 86180 }, { "epoch": 0.95, "learning_rate": 3.409495231346148e-05, "loss": 0.7216, "step": 86185 }, { "epoch": 0.95, "learning_rate": 3.409402958632297e-05, "loss": 0.632, "step": 86190 }, { "epoch": 0.95, "learning_rate": 3.409310685918446e-05, "loss": 0.7018, "step": 86195 }, { "epoch": 0.95, "learning_rate": 3.4092184132045946e-05, "loss": 0.7056, "step": 86200 }, { "epoch": 0.95, "learning_rate": 3.4091261404907434e-05, "loss": 0.6881, "step": 86205 }, { "epoch": 0.95, "learning_rate": 3.409033867776892e-05, "loss": 0.6816, "step": 86210 }, { "epoch": 0.95, "learning_rate": 3.408941595063041e-05, "loss": 0.6689, "step": 86215 }, { "epoch": 0.95, "learning_rate": 3.40884932234919e-05, "loss": 0.657, "step": 86220 }, { "epoch": 0.95, "learning_rate": 3.4087570496353386e-05, "loss": 0.6833, "step": 86225 }, { "epoch": 0.95, "learning_rate": 3.408664776921487e-05, "loss": 0.7179, "step": 86230 }, { "epoch": 0.95, "learning_rate": 3.408572504207636e-05, "loss": 0.701, "step": 86235 }, { "epoch": 0.95, "learning_rate": 3.408480231493785e-05, "loss": 0.6879, "step": 86240 }, { "epoch": 0.95, "learning_rate": 3.408387958779933e-05, "loss": 0.6606, "step": 86245 }, { "epoch": 0.96, "learning_rate": 3.408295686066082e-05, "loss": 0.6765, "step": 86250 }, { "epoch": 0.96, "learning_rate": 3.408203413352231e-05, "loss": 0.6816, "step": 86255 }, { "epoch": 0.96, "learning_rate": 3.40811114063838e-05, "loss": 0.7495, "step": 86260 }, { "epoch": 0.96, "learning_rate": 3.408018867924528e-05, "loss": 0.6688, "step": 86265 }, { "epoch": 0.96, "learning_rate": 3.407926595210677e-05, "loss": 0.7526, "step": 86270 }, { "epoch": 0.96, "learning_rate": 3.4078343224968264e-05, "loss": 0.7218, "step": 86275 }, { "epoch": 0.96, "learning_rate": 3.4077420497829745e-05, "loss": 0.6565, "step": 86280 }, { "epoch": 0.96, "learning_rate": 3.407649777069123e-05, "loss": 0.7034, "step": 86285 }, { "epoch": 0.96, "learning_rate": 3.407557504355272e-05, "loss": 0.6937, "step": 86290 }, { "epoch": 0.96, "learning_rate": 3.4074652316414215e-05, "loss": 0.695, "step": 86295 }, { "epoch": 0.96, "learning_rate": 3.4073729589275697e-05, "loss": 0.6807, "step": 86300 }, { "epoch": 0.96, "learning_rate": 3.4072806862137184e-05, "loss": 0.6911, "step": 86305 }, { "epoch": 0.96, "learning_rate": 3.407188413499867e-05, "loss": 0.692, "step": 86310 }, { "epoch": 0.96, "learning_rate": 3.407096140786016e-05, "loss": 0.6725, "step": 86315 }, { "epoch": 0.96, "learning_rate": 3.407003868072165e-05, "loss": 0.7309, "step": 86320 }, { "epoch": 0.96, "learning_rate": 3.4069115953583136e-05, "loss": 0.7181, "step": 86325 }, { "epoch": 0.96, "learning_rate": 3.4068193226444624e-05, "loss": 0.7185, "step": 86330 }, { "epoch": 0.96, "learning_rate": 3.406727049930611e-05, "loss": 0.6907, "step": 86335 }, { "epoch": 0.96, "learning_rate": 3.40663477721676e-05, "loss": 0.6184, "step": 86340 }, { "epoch": 0.96, "learning_rate": 3.406542504502909e-05, "loss": 0.7077, "step": 86345 }, { "epoch": 0.96, "learning_rate": 3.4064502317890575e-05, "loss": 0.6752, "step": 86350 }, { "epoch": 0.96, "learning_rate": 3.4063579590752056e-05, "loss": 0.7679, "step": 86355 }, { "epoch": 0.96, "learning_rate": 3.406265686361355e-05, "loss": 0.6792, "step": 86360 }, { "epoch": 0.96, "learning_rate": 3.406173413647504e-05, "loss": 0.7009, "step": 86365 }, { "epoch": 0.96, "learning_rate": 3.4060811409336526e-05, "loss": 0.6868, "step": 86370 }, { "epoch": 0.96, "learning_rate": 3.405988868219801e-05, "loss": 0.6713, "step": 86375 }, { "epoch": 0.96, "learning_rate": 3.4058965955059495e-05, "loss": 0.6048, "step": 86380 }, { "epoch": 0.96, "learning_rate": 3.405804322792099e-05, "loss": 0.6931, "step": 86385 }, { "epoch": 0.96, "learning_rate": 3.405712050078248e-05, "loss": 0.6928, "step": 86390 }, { "epoch": 0.96, "learning_rate": 3.405619777364396e-05, "loss": 0.7203, "step": 86395 }, { "epoch": 0.96, "learning_rate": 3.405527504650545e-05, "loss": 0.6767, "step": 86400 }, { "epoch": 0.96, "learning_rate": 3.405435231936694e-05, "loss": 0.7035, "step": 86405 }, { "epoch": 0.96, "learning_rate": 3.405342959222842e-05, "loss": 0.6898, "step": 86410 }, { "epoch": 0.96, "learning_rate": 3.405250686508991e-05, "loss": 0.7047, "step": 86415 }, { "epoch": 0.96, "learning_rate": 3.40515841379514e-05, "loss": 0.6799, "step": 86420 }, { "epoch": 0.96, "learning_rate": 3.405066141081289e-05, "loss": 0.6878, "step": 86425 }, { "epoch": 0.96, "learning_rate": 3.4049738683674374e-05, "loss": 0.7217, "step": 86430 }, { "epoch": 0.96, "learning_rate": 3.404881595653586e-05, "loss": 0.6209, "step": 86435 }, { "epoch": 0.96, "learning_rate": 3.404789322939735e-05, "loss": 0.694, "step": 86440 }, { "epoch": 0.96, "learning_rate": 3.404697050225884e-05, "loss": 0.6895, "step": 86445 }, { "epoch": 0.96, "learning_rate": 3.4046047775120325e-05, "loss": 0.6964, "step": 86450 }, { "epoch": 0.96, "learning_rate": 3.404512504798181e-05, "loss": 0.7208, "step": 86455 }, { "epoch": 0.96, "learning_rate": 3.40442023208433e-05, "loss": 0.7115, "step": 86460 }, { "epoch": 0.96, "learning_rate": 3.404327959370479e-05, "loss": 0.6846, "step": 86465 }, { "epoch": 0.96, "learning_rate": 3.404235686656628e-05, "loss": 0.7261, "step": 86470 }, { "epoch": 0.96, "learning_rate": 3.4041434139427764e-05, "loss": 0.7034, "step": 86475 }, { "epoch": 0.96, "learning_rate": 3.404051141228925e-05, "loss": 0.6778, "step": 86480 }, { "epoch": 0.96, "learning_rate": 3.4039588685150733e-05, "loss": 0.6733, "step": 86485 }, { "epoch": 0.96, "learning_rate": 3.403866595801223e-05, "loss": 0.7287, "step": 86490 }, { "epoch": 0.96, "learning_rate": 3.4037743230873716e-05, "loss": 0.6711, "step": 86495 }, { "epoch": 0.96, "learning_rate": 3.4036820503735204e-05, "loss": 0.6598, "step": 86500 }, { "epoch": 0.96, "learning_rate": 3.4035897776596685e-05, "loss": 0.6244, "step": 86505 }, { "epoch": 0.96, "learning_rate": 3.403497504945818e-05, "loss": 0.6436, "step": 86510 }, { "epoch": 0.96, "learning_rate": 3.403405232231967e-05, "loss": 0.6643, "step": 86515 }, { "epoch": 0.96, "learning_rate": 3.403312959518115e-05, "loss": 0.6475, "step": 86520 }, { "epoch": 0.96, "learning_rate": 3.4032206868042636e-05, "loss": 0.667, "step": 86525 }, { "epoch": 0.96, "learning_rate": 3.4031284140904124e-05, "loss": 0.7311, "step": 86530 }, { "epoch": 0.96, "learning_rate": 3.403036141376562e-05, "loss": 0.6573, "step": 86535 }, { "epoch": 0.96, "learning_rate": 3.40294386866271e-05, "loss": 0.6632, "step": 86540 }, { "epoch": 0.96, "learning_rate": 3.402851595948859e-05, "loss": 0.7208, "step": 86545 }, { "epoch": 0.96, "learning_rate": 3.4027593232350075e-05, "loss": 0.7373, "step": 86550 }, { "epoch": 0.96, "learning_rate": 3.402667050521156e-05, "loss": 0.6615, "step": 86555 }, { "epoch": 0.96, "learning_rate": 3.402574777807305e-05, "loss": 0.6865, "step": 86560 }, { "epoch": 0.96, "learning_rate": 3.402482505093454e-05, "loss": 0.6507, "step": 86565 }, { "epoch": 0.96, "learning_rate": 3.402390232379603e-05, "loss": 0.6932, "step": 86570 }, { "epoch": 0.96, "learning_rate": 3.4022979596657515e-05, "loss": 0.6966, "step": 86575 }, { "epoch": 0.96, "learning_rate": 3.4022056869519e-05, "loss": 0.6532, "step": 86580 }, { "epoch": 0.96, "learning_rate": 3.402113414238049e-05, "loss": 0.6582, "step": 86585 }, { "epoch": 0.96, "learning_rate": 3.402021141524198e-05, "loss": 0.6913, "step": 86590 }, { "epoch": 0.96, "learning_rate": 3.4019288688103466e-05, "loss": 0.7, "step": 86595 }, { "epoch": 0.96, "learning_rate": 3.4018365960964954e-05, "loss": 0.6659, "step": 86600 }, { "epoch": 0.96, "learning_rate": 3.401744323382644e-05, "loss": 0.6822, "step": 86605 }, { "epoch": 0.96, "learning_rate": 3.401652050668793e-05, "loss": 0.6819, "step": 86610 }, { "epoch": 0.96, "learning_rate": 3.401559777954941e-05, "loss": 0.6771, "step": 86615 }, { "epoch": 0.96, "learning_rate": 3.4014675052410905e-05, "loss": 0.703, "step": 86620 }, { "epoch": 0.96, "learning_rate": 3.401375232527239e-05, "loss": 0.6924, "step": 86625 }, { "epoch": 0.96, "learning_rate": 3.4012829598133874e-05, "loss": 0.736, "step": 86630 }, { "epoch": 0.96, "learning_rate": 3.401190687099536e-05, "loss": 0.6761, "step": 86635 }, { "epoch": 0.96, "learning_rate": 3.401098414385686e-05, "loss": 0.7312, "step": 86640 }, { "epoch": 0.96, "learning_rate": 3.4010061416718345e-05, "loss": 0.7234, "step": 86645 }, { "epoch": 0.96, "learning_rate": 3.4009138689579826e-05, "loss": 0.74, "step": 86650 }, { "epoch": 0.96, "learning_rate": 3.4008215962441313e-05, "loss": 0.7276, "step": 86655 }, { "epoch": 0.96, "learning_rate": 3.400729323530281e-05, "loss": 0.7321, "step": 86660 }, { "epoch": 0.96, "learning_rate": 3.400637050816429e-05, "loss": 0.7509, "step": 86665 }, { "epoch": 0.96, "learning_rate": 3.400544778102578e-05, "loss": 0.7794, "step": 86670 }, { "epoch": 0.96, "learning_rate": 3.4004525053887265e-05, "loss": 0.6861, "step": 86675 }, { "epoch": 0.96, "learning_rate": 3.400360232674875e-05, "loss": 0.6974, "step": 86680 }, { "epoch": 0.96, "learning_rate": 3.400267959961024e-05, "loss": 0.6573, "step": 86685 }, { "epoch": 0.96, "learning_rate": 3.400175687247173e-05, "loss": 0.6892, "step": 86690 }, { "epoch": 0.96, "learning_rate": 3.4000834145333216e-05, "loss": 0.6467, "step": 86695 }, { "epoch": 0.96, "learning_rate": 3.3999911418194704e-05, "loss": 0.6768, "step": 86700 }, { "epoch": 0.96, "learning_rate": 3.399898869105619e-05, "loss": 0.6717, "step": 86705 }, { "epoch": 0.96, "learning_rate": 3.399806596391768e-05, "loss": 0.662, "step": 86710 }, { "epoch": 0.96, "learning_rate": 3.399714323677917e-05, "loss": 0.7288, "step": 86715 }, { "epoch": 0.96, "learning_rate": 3.3996220509640656e-05, "loss": 0.7022, "step": 86720 }, { "epoch": 0.96, "learning_rate": 3.399529778250214e-05, "loss": 0.6455, "step": 86725 }, { "epoch": 0.96, "learning_rate": 3.399437505536363e-05, "loss": 0.6538, "step": 86730 }, { "epoch": 0.96, "learning_rate": 3.399345232822512e-05, "loss": 0.6552, "step": 86735 }, { "epoch": 0.96, "learning_rate": 3.39925296010866e-05, "loss": 0.7434, "step": 86740 }, { "epoch": 0.96, "learning_rate": 3.3991606873948095e-05, "loss": 0.6977, "step": 86745 }, { "epoch": 0.96, "learning_rate": 3.399068414680958e-05, "loss": 0.7424, "step": 86750 }, { "epoch": 0.96, "learning_rate": 3.398976141967107e-05, "loss": 0.6555, "step": 86755 }, { "epoch": 0.96, "learning_rate": 3.398883869253255e-05, "loss": 0.7244, "step": 86760 }, { "epoch": 0.96, "learning_rate": 3.398791596539404e-05, "loss": 0.6897, "step": 86765 }, { "epoch": 0.96, "learning_rate": 3.3986993238255534e-05, "loss": 0.6857, "step": 86770 }, { "epoch": 0.96, "learning_rate": 3.398607051111702e-05, "loss": 0.7169, "step": 86775 }, { "epoch": 0.96, "learning_rate": 3.39851477839785e-05, "loss": 0.6847, "step": 86780 }, { "epoch": 0.96, "learning_rate": 3.398422505683999e-05, "loss": 0.6599, "step": 86785 }, { "epoch": 0.96, "learning_rate": 3.3983302329701485e-05, "loss": 0.6945, "step": 86790 }, { "epoch": 0.96, "learning_rate": 3.3982379602562966e-05, "loss": 0.6949, "step": 86795 }, { "epoch": 0.96, "learning_rate": 3.3981456875424454e-05, "loss": 0.706, "step": 86800 }, { "epoch": 0.96, "learning_rate": 3.398053414828594e-05, "loss": 0.7476, "step": 86805 }, { "epoch": 0.96, "learning_rate": 3.397961142114744e-05, "loss": 0.6423, "step": 86810 }, { "epoch": 0.96, "learning_rate": 3.397868869400892e-05, "loss": 0.7101, "step": 86815 }, { "epoch": 0.96, "learning_rate": 3.3977765966870406e-05, "loss": 0.6945, "step": 86820 }, { "epoch": 0.96, "learning_rate": 3.3976843239731894e-05, "loss": 0.6979, "step": 86825 }, { "epoch": 0.96, "learning_rate": 3.397592051259338e-05, "loss": 0.6486, "step": 86830 }, { "epoch": 0.96, "learning_rate": 3.397499778545487e-05, "loss": 0.681, "step": 86835 }, { "epoch": 0.96, "learning_rate": 3.397407505831636e-05, "loss": 0.7907, "step": 86840 }, { "epoch": 0.96, "learning_rate": 3.3973152331177845e-05, "loss": 0.697, "step": 86845 }, { "epoch": 0.96, "learning_rate": 3.397222960403933e-05, "loss": 0.6971, "step": 86850 }, { "epoch": 0.96, "learning_rate": 3.397130687690082e-05, "loss": 0.7505, "step": 86855 }, { "epoch": 0.96, "learning_rate": 3.397038414976231e-05, "loss": 0.6662, "step": 86860 }, { "epoch": 0.96, "learning_rate": 3.3969461422623796e-05, "loss": 0.6797, "step": 86865 }, { "epoch": 0.96, "learning_rate": 3.396853869548528e-05, "loss": 0.6775, "step": 86870 }, { "epoch": 0.96, "learning_rate": 3.396761596834677e-05, "loss": 0.675, "step": 86875 }, { "epoch": 0.96, "learning_rate": 3.396669324120826e-05, "loss": 0.7308, "step": 86880 }, { "epoch": 0.96, "learning_rate": 3.396577051406975e-05, "loss": 0.6778, "step": 86885 }, { "epoch": 0.96, "learning_rate": 3.396484778693123e-05, "loss": 0.7227, "step": 86890 }, { "epoch": 0.96, "learning_rate": 3.3963925059792723e-05, "loss": 0.7204, "step": 86895 }, { "epoch": 0.96, "learning_rate": 3.396300233265421e-05, "loss": 0.6506, "step": 86900 }, { "epoch": 0.96, "learning_rate": 3.396207960551569e-05, "loss": 0.6707, "step": 86905 }, { "epoch": 0.96, "learning_rate": 3.396115687837718e-05, "loss": 0.7438, "step": 86910 }, { "epoch": 0.96, "learning_rate": 3.396023415123867e-05, "loss": 0.6757, "step": 86915 }, { "epoch": 0.96, "learning_rate": 3.395931142410016e-05, "loss": 0.731, "step": 86920 }, { "epoch": 0.96, "learning_rate": 3.3958388696961644e-05, "loss": 0.6621, "step": 86925 }, { "epoch": 0.96, "learning_rate": 3.395746596982313e-05, "loss": 0.6657, "step": 86930 }, { "epoch": 0.96, "learning_rate": 3.395654324268462e-05, "loss": 0.699, "step": 86935 }, { "epoch": 0.96, "learning_rate": 3.395562051554611e-05, "loss": 0.6752, "step": 86940 }, { "epoch": 0.96, "learning_rate": 3.3954697788407595e-05, "loss": 0.7048, "step": 86945 }, { "epoch": 0.96, "learning_rate": 3.395377506126908e-05, "loss": 0.7064, "step": 86950 }, { "epoch": 0.96, "learning_rate": 3.395285233413057e-05, "loss": 0.6946, "step": 86955 }, { "epoch": 0.96, "learning_rate": 3.395192960699206e-05, "loss": 0.664, "step": 86960 }, { "epoch": 0.96, "learning_rate": 3.3951006879853547e-05, "loss": 0.6698, "step": 86965 }, { "epoch": 0.96, "learning_rate": 3.3950084152715034e-05, "loss": 0.6678, "step": 86970 }, { "epoch": 0.96, "learning_rate": 3.394916142557652e-05, "loss": 0.6966, "step": 86975 }, { "epoch": 0.96, "learning_rate": 3.394823869843801e-05, "loss": 0.6629, "step": 86980 }, { "epoch": 0.96, "learning_rate": 3.39473159712995e-05, "loss": 0.7278, "step": 86985 }, { "epoch": 0.96, "learning_rate": 3.3946393244160986e-05, "loss": 0.6957, "step": 86990 }, { "epoch": 0.96, "learning_rate": 3.3945470517022474e-05, "loss": 0.6851, "step": 86995 }, { "epoch": 0.96, "learning_rate": 3.3944547789883955e-05, "loss": 0.7308, "step": 87000 }, { "epoch": 0.96, "eval_loss": 0.649683952331543, "eval_runtime": 69.3837, "eval_samples_per_second": 28.825, "eval_steps_per_second": 14.413, "step": 87000 }, { "epoch": 0.96, "learning_rate": 3.394362506274545e-05, "loss": 0.7236, "step": 87005 }, { "epoch": 0.96, "learning_rate": 3.394270233560694e-05, "loss": 0.6472, "step": 87010 }, { "epoch": 0.96, "learning_rate": 3.394177960846842e-05, "loss": 0.7196, "step": 87015 }, { "epoch": 0.96, "learning_rate": 3.3940856881329906e-05, "loss": 0.7226, "step": 87020 }, { "epoch": 0.96, "learning_rate": 3.39399341541914e-05, "loss": 0.682, "step": 87025 }, { "epoch": 0.96, "learning_rate": 3.393901142705289e-05, "loss": 0.6561, "step": 87030 }, { "epoch": 0.96, "learning_rate": 3.393808869991437e-05, "loss": 0.6758, "step": 87035 }, { "epoch": 0.96, "learning_rate": 3.393716597277586e-05, "loss": 0.7189, "step": 87040 }, { "epoch": 0.96, "learning_rate": 3.393624324563735e-05, "loss": 0.7145, "step": 87045 }, { "epoch": 0.96, "learning_rate": 3.393532051849883e-05, "loss": 0.7382, "step": 87050 }, { "epoch": 0.96, "learning_rate": 3.393439779136032e-05, "loss": 0.7148, "step": 87055 }, { "epoch": 0.96, "learning_rate": 3.393347506422181e-05, "loss": 0.6744, "step": 87060 }, { "epoch": 0.96, "learning_rate": 3.39325523370833e-05, "loss": 0.6701, "step": 87065 }, { "epoch": 0.96, "learning_rate": 3.3931629609944785e-05, "loss": 0.6884, "step": 87070 }, { "epoch": 0.96, "learning_rate": 3.393070688280627e-05, "loss": 0.7481, "step": 87075 }, { "epoch": 0.96, "learning_rate": 3.392978415566776e-05, "loss": 0.7008, "step": 87080 }, { "epoch": 0.96, "learning_rate": 3.392886142852925e-05, "loss": 0.7005, "step": 87085 }, { "epoch": 0.96, "learning_rate": 3.3927938701390736e-05, "loss": 0.671, "step": 87090 }, { "epoch": 0.96, "learning_rate": 3.3927015974252224e-05, "loss": 0.7041, "step": 87095 }, { "epoch": 0.96, "learning_rate": 3.392609324711371e-05, "loss": 0.6449, "step": 87100 }, { "epoch": 0.96, "learning_rate": 3.39251705199752e-05, "loss": 0.7334, "step": 87105 }, { "epoch": 0.96, "learning_rate": 3.392424779283669e-05, "loss": 0.7026, "step": 87110 }, { "epoch": 0.96, "learning_rate": 3.3923325065698175e-05, "loss": 0.7392, "step": 87115 }, { "epoch": 0.96, "learning_rate": 3.392240233855966e-05, "loss": 0.6943, "step": 87120 }, { "epoch": 0.96, "learning_rate": 3.3921479611421144e-05, "loss": 0.7156, "step": 87125 }, { "epoch": 0.96, "learning_rate": 3.392055688428264e-05, "loss": 0.6557, "step": 87130 }, { "epoch": 0.96, "learning_rate": 3.391963415714413e-05, "loss": 0.7117, "step": 87135 }, { "epoch": 0.96, "learning_rate": 3.3918711430005614e-05, "loss": 0.6326, "step": 87140 }, { "epoch": 0.96, "learning_rate": 3.3917788702867096e-05, "loss": 0.7682, "step": 87145 }, { "epoch": 0.96, "learning_rate": 3.3916865975728583e-05, "loss": 0.6949, "step": 87150 }, { "epoch": 0.97, "learning_rate": 3.391594324859008e-05, "loss": 0.672, "step": 87155 }, { "epoch": 0.97, "learning_rate": 3.3915020521451566e-05, "loss": 0.7819, "step": 87160 }, { "epoch": 0.97, "learning_rate": 3.391409779431305e-05, "loss": 0.6944, "step": 87165 }, { "epoch": 0.97, "learning_rate": 3.3913175067174535e-05, "loss": 0.663, "step": 87170 }, { "epoch": 0.97, "learning_rate": 3.391225234003603e-05, "loss": 0.6668, "step": 87175 }, { "epoch": 0.97, "learning_rate": 3.391132961289751e-05, "loss": 0.6596, "step": 87180 }, { "epoch": 0.97, "learning_rate": 3.3910406885759e-05, "loss": 0.7035, "step": 87185 }, { "epoch": 0.97, "learning_rate": 3.3909484158620486e-05, "loss": 0.7092, "step": 87190 }, { "epoch": 0.97, "learning_rate": 3.390856143148198e-05, "loss": 0.7236, "step": 87195 }, { "epoch": 0.97, "learning_rate": 3.390763870434346e-05, "loss": 0.7138, "step": 87200 }, { "epoch": 0.97, "learning_rate": 3.390671597720495e-05, "loss": 0.685, "step": 87205 }, { "epoch": 0.97, "learning_rate": 3.390579325006644e-05, "loss": 0.6552, "step": 87210 }, { "epoch": 0.97, "learning_rate": 3.3904870522927925e-05, "loss": 0.6867, "step": 87215 }, { "epoch": 0.97, "learning_rate": 3.390394779578941e-05, "loss": 0.6862, "step": 87220 }, { "epoch": 0.97, "learning_rate": 3.39030250686509e-05, "loss": 0.6679, "step": 87225 }, { "epoch": 0.97, "learning_rate": 3.390210234151239e-05, "loss": 0.6996, "step": 87230 }, { "epoch": 0.97, "learning_rate": 3.390117961437388e-05, "loss": 0.6752, "step": 87235 }, { "epoch": 0.97, "learning_rate": 3.3900256887235365e-05, "loss": 0.6466, "step": 87240 }, { "epoch": 0.97, "learning_rate": 3.389933416009685e-05, "loss": 0.6596, "step": 87245 }, { "epoch": 0.97, "learning_rate": 3.389841143295834e-05, "loss": 0.6419, "step": 87250 }, { "epoch": 0.97, "learning_rate": 3.389748870581982e-05, "loss": 0.7151, "step": 87255 }, { "epoch": 0.97, "learning_rate": 3.3896565978681316e-05, "loss": 0.7343, "step": 87260 }, { "epoch": 0.97, "learning_rate": 3.3895643251542804e-05, "loss": 0.6478, "step": 87265 }, { "epoch": 0.97, "learning_rate": 3.389472052440429e-05, "loss": 0.7246, "step": 87270 }, { "epoch": 0.97, "learning_rate": 3.389379779726577e-05, "loss": 0.7172, "step": 87275 }, { "epoch": 0.97, "learning_rate": 3.389287507012727e-05, "loss": 0.7024, "step": 87280 }, { "epoch": 0.97, "learning_rate": 3.3891952342988755e-05, "loss": 0.6736, "step": 87285 }, { "epoch": 0.97, "learning_rate": 3.3891029615850236e-05, "loss": 0.6628, "step": 87290 }, { "epoch": 0.97, "learning_rate": 3.3890106888711724e-05, "loss": 0.7262, "step": 87295 }, { "epoch": 0.97, "learning_rate": 3.388918416157321e-05, "loss": 0.6657, "step": 87300 }, { "epoch": 0.97, "learning_rate": 3.388826143443471e-05, "loss": 0.6845, "step": 87305 }, { "epoch": 0.97, "learning_rate": 3.388733870729619e-05, "loss": 0.6656, "step": 87310 }, { "epoch": 0.97, "learning_rate": 3.3886415980157676e-05, "loss": 0.6707, "step": 87315 }, { "epoch": 0.97, "learning_rate": 3.3885493253019163e-05, "loss": 0.6161, "step": 87320 }, { "epoch": 0.97, "learning_rate": 3.388457052588065e-05, "loss": 0.7131, "step": 87325 }, { "epoch": 0.97, "learning_rate": 3.388364779874214e-05, "loss": 0.7045, "step": 87330 }, { "epoch": 0.97, "learning_rate": 3.388272507160363e-05, "loss": 0.6403, "step": 87335 }, { "epoch": 0.97, "learning_rate": 3.3881802344465115e-05, "loss": 0.7553, "step": 87340 }, { "epoch": 0.97, "learning_rate": 3.38808796173266e-05, "loss": 0.6839, "step": 87345 }, { "epoch": 0.97, "learning_rate": 3.387995689018809e-05, "loss": 0.7257, "step": 87350 }, { "epoch": 0.97, "learning_rate": 3.387903416304958e-05, "loss": 0.7394, "step": 87355 }, { "epoch": 0.97, "learning_rate": 3.3878111435911066e-05, "loss": 0.6943, "step": 87360 }, { "epoch": 0.97, "learning_rate": 3.387718870877255e-05, "loss": 0.7177, "step": 87365 }, { "epoch": 0.97, "learning_rate": 3.387626598163404e-05, "loss": 0.6716, "step": 87370 }, { "epoch": 0.97, "learning_rate": 3.387534325449553e-05, "loss": 0.6603, "step": 87375 }, { "epoch": 0.97, "learning_rate": 3.387442052735702e-05, "loss": 0.6965, "step": 87380 }, { "epoch": 0.97, "learning_rate": 3.38734978002185e-05, "loss": 0.6678, "step": 87385 }, { "epoch": 0.97, "learning_rate": 3.387257507307999e-05, "loss": 0.666, "step": 87390 }, { "epoch": 0.97, "learning_rate": 3.387165234594148e-05, "loss": 0.7041, "step": 87395 }, { "epoch": 0.97, "learning_rate": 3.387072961880296e-05, "loss": 0.6517, "step": 87400 }, { "epoch": 0.97, "learning_rate": 3.386980689166445e-05, "loss": 0.6667, "step": 87405 }, { "epoch": 0.97, "learning_rate": 3.3868884164525945e-05, "loss": 0.7136, "step": 87410 }, { "epoch": 0.97, "learning_rate": 3.386796143738743e-05, "loss": 0.6496, "step": 87415 }, { "epoch": 0.97, "learning_rate": 3.3867038710248914e-05, "loss": 0.6441, "step": 87420 }, { "epoch": 0.97, "learning_rate": 3.38661159831104e-05, "loss": 0.6836, "step": 87425 }, { "epoch": 0.97, "learning_rate": 3.3865193255971896e-05, "loss": 0.7282, "step": 87430 }, { "epoch": 0.97, "learning_rate": 3.386427052883338e-05, "loss": 0.6605, "step": 87435 }, { "epoch": 0.97, "learning_rate": 3.3863347801694865e-05, "loss": 0.6134, "step": 87440 }, { "epoch": 0.97, "learning_rate": 3.386242507455635e-05, "loss": 0.6479, "step": 87445 }, { "epoch": 0.97, "learning_rate": 3.386150234741784e-05, "loss": 0.6656, "step": 87450 }, { "epoch": 0.97, "learning_rate": 3.386057962027933e-05, "loss": 0.7047, "step": 87455 }, { "epoch": 0.97, "learning_rate": 3.3859656893140816e-05, "loss": 0.657, "step": 87460 }, { "epoch": 0.97, "learning_rate": 3.3858734166002304e-05, "loss": 0.7128, "step": 87465 }, { "epoch": 0.97, "learning_rate": 3.385781143886379e-05, "loss": 0.7089, "step": 87470 }, { "epoch": 0.97, "learning_rate": 3.385688871172528e-05, "loss": 0.6809, "step": 87475 }, { "epoch": 0.97, "learning_rate": 3.385596598458677e-05, "loss": 0.6297, "step": 87480 }, { "epoch": 0.97, "learning_rate": 3.3855043257448256e-05, "loss": 0.6776, "step": 87485 }, { "epoch": 0.97, "learning_rate": 3.3854120530309744e-05, "loss": 0.6912, "step": 87490 }, { "epoch": 0.97, "learning_rate": 3.385319780317123e-05, "loss": 0.6946, "step": 87495 }, { "epoch": 0.97, "learning_rate": 3.385227507603272e-05, "loss": 0.6948, "step": 87500 }, { "epoch": 0.97, "learning_rate": 3.385135234889421e-05, "loss": 0.693, "step": 87505 }, { "epoch": 0.97, "learning_rate": 3.385042962175569e-05, "loss": 0.7119, "step": 87510 }, { "epoch": 0.97, "learning_rate": 3.3849506894617176e-05, "loss": 0.7217, "step": 87515 }, { "epoch": 0.97, "learning_rate": 3.384858416747867e-05, "loss": 0.6691, "step": 87520 }, { "epoch": 0.97, "learning_rate": 3.384766144034016e-05, "loss": 0.6452, "step": 87525 }, { "epoch": 0.97, "learning_rate": 3.384673871320164e-05, "loss": 0.6949, "step": 87530 }, { "epoch": 0.97, "learning_rate": 3.384581598606313e-05, "loss": 0.6959, "step": 87535 }, { "epoch": 0.97, "learning_rate": 3.384489325892462e-05, "loss": 0.6658, "step": 87540 }, { "epoch": 0.97, "learning_rate": 3.384397053178611e-05, "loss": 0.7003, "step": 87545 }, { "epoch": 0.97, "learning_rate": 3.384304780464759e-05, "loss": 0.6965, "step": 87550 }, { "epoch": 0.97, "learning_rate": 3.384212507750908e-05, "loss": 0.6808, "step": 87555 }, { "epoch": 0.97, "learning_rate": 3.3841202350370573e-05, "loss": 0.6396, "step": 87560 }, { "epoch": 0.97, "learning_rate": 3.3840279623232055e-05, "loss": 0.6309, "step": 87565 }, { "epoch": 0.97, "learning_rate": 3.383935689609354e-05, "loss": 0.7076, "step": 87570 }, { "epoch": 0.97, "learning_rate": 3.383843416895503e-05, "loss": 0.7044, "step": 87575 }, { "epoch": 0.97, "learning_rate": 3.3837511441816525e-05, "loss": 0.7248, "step": 87580 }, { "epoch": 0.97, "learning_rate": 3.3836588714678006e-05, "loss": 0.6552, "step": 87585 }, { "epoch": 0.97, "learning_rate": 3.3835665987539494e-05, "loss": 0.646, "step": 87590 }, { "epoch": 0.97, "learning_rate": 3.383474326040098e-05, "loss": 0.6849, "step": 87595 }, { "epoch": 0.97, "learning_rate": 3.383382053326247e-05, "loss": 0.6747, "step": 87600 }, { "epoch": 0.97, "learning_rate": 3.383289780612396e-05, "loss": 0.6629, "step": 87605 }, { "epoch": 0.97, "learning_rate": 3.3831975078985445e-05, "loss": 0.7292, "step": 87610 }, { "epoch": 0.97, "learning_rate": 3.383105235184693e-05, "loss": 0.7007, "step": 87615 }, { "epoch": 0.97, "learning_rate": 3.383012962470842e-05, "loss": 0.7069, "step": 87620 }, { "epoch": 0.97, "learning_rate": 3.382920689756991e-05, "loss": 0.7097, "step": 87625 }, { "epoch": 0.97, "learning_rate": 3.3828284170431397e-05, "loss": 0.6964, "step": 87630 }, { "epoch": 0.97, "learning_rate": 3.3827361443292884e-05, "loss": 0.6374, "step": 87635 }, { "epoch": 0.97, "learning_rate": 3.3826438716154365e-05, "loss": 0.6795, "step": 87640 }, { "epoch": 0.97, "learning_rate": 3.382551598901586e-05, "loss": 0.5763, "step": 87645 }, { "epoch": 0.97, "learning_rate": 3.382459326187735e-05, "loss": 0.6941, "step": 87650 }, { "epoch": 0.97, "learning_rate": 3.3823670534738836e-05, "loss": 0.6638, "step": 87655 }, { "epoch": 0.97, "learning_rate": 3.382274780760032e-05, "loss": 0.6859, "step": 87660 }, { "epoch": 0.97, "learning_rate": 3.382182508046181e-05, "loss": 0.6469, "step": 87665 }, { "epoch": 0.97, "learning_rate": 3.38209023533233e-05, "loss": 0.6639, "step": 87670 }, { "epoch": 0.97, "learning_rate": 3.381997962618478e-05, "loss": 0.6528, "step": 87675 }, { "epoch": 0.97, "learning_rate": 3.381905689904627e-05, "loss": 0.6092, "step": 87680 }, { "epoch": 0.97, "learning_rate": 3.3818134171907756e-05, "loss": 0.6622, "step": 87685 }, { "epoch": 0.97, "learning_rate": 3.381721144476925e-05, "loss": 0.69, "step": 87690 }, { "epoch": 0.97, "learning_rate": 3.381628871763073e-05, "loss": 0.6426, "step": 87695 }, { "epoch": 0.97, "learning_rate": 3.381536599049222e-05, "loss": 0.6898, "step": 87700 }, { "epoch": 0.97, "learning_rate": 3.381444326335371e-05, "loss": 0.636, "step": 87705 }, { "epoch": 0.97, "learning_rate": 3.3813520536215195e-05, "loss": 0.6594, "step": 87710 }, { "epoch": 0.97, "learning_rate": 3.381259780907668e-05, "loss": 0.7438, "step": 87715 }, { "epoch": 0.97, "learning_rate": 3.381167508193817e-05, "loss": 0.6738, "step": 87720 }, { "epoch": 0.97, "learning_rate": 3.381075235479966e-05, "loss": 0.7025, "step": 87725 }, { "epoch": 0.97, "learning_rate": 3.380982962766115e-05, "loss": 0.6663, "step": 87730 }, { "epoch": 0.97, "learning_rate": 3.3808906900522635e-05, "loss": 0.7136, "step": 87735 }, { "epoch": 0.97, "learning_rate": 3.380798417338412e-05, "loss": 0.6629, "step": 87740 }, { "epoch": 0.97, "learning_rate": 3.380706144624561e-05, "loss": 0.6917, "step": 87745 }, { "epoch": 0.97, "learning_rate": 3.380613871910709e-05, "loss": 0.7181, "step": 87750 }, { "epoch": 0.97, "learning_rate": 3.3805215991968586e-05, "loss": 0.6958, "step": 87755 }, { "epoch": 0.97, "learning_rate": 3.3804293264830074e-05, "loss": 0.7214, "step": 87760 }, { "epoch": 0.97, "learning_rate": 3.380337053769156e-05, "loss": 0.7253, "step": 87765 }, { "epoch": 0.97, "learning_rate": 3.380244781055304e-05, "loss": 0.6483, "step": 87770 }, { "epoch": 0.97, "learning_rate": 3.380152508341454e-05, "loss": 0.7128, "step": 87775 }, { "epoch": 0.97, "learning_rate": 3.3800602356276025e-05, "loss": 0.6248, "step": 87780 }, { "epoch": 0.97, "learning_rate": 3.3799679629137506e-05, "loss": 0.733, "step": 87785 }, { "epoch": 0.97, "learning_rate": 3.3798756901998994e-05, "loss": 0.7152, "step": 87790 }, { "epoch": 0.97, "learning_rate": 3.379783417486049e-05, "loss": 0.6943, "step": 87795 }, { "epoch": 0.97, "learning_rate": 3.379691144772198e-05, "loss": 0.6999, "step": 87800 }, { "epoch": 0.97, "learning_rate": 3.379598872058346e-05, "loss": 0.6148, "step": 87805 }, { "epoch": 0.97, "learning_rate": 3.3795065993444946e-05, "loss": 0.6976, "step": 87810 }, { "epoch": 0.97, "learning_rate": 3.379414326630644e-05, "loss": 0.6763, "step": 87815 }, { "epoch": 0.97, "learning_rate": 3.379322053916792e-05, "loss": 0.7973, "step": 87820 }, { "epoch": 0.97, "learning_rate": 3.379229781202941e-05, "loss": 0.6408, "step": 87825 }, { "epoch": 0.97, "learning_rate": 3.37913750848909e-05, "loss": 0.7249, "step": 87830 }, { "epoch": 0.97, "learning_rate": 3.3790452357752385e-05, "loss": 0.7047, "step": 87835 }, { "epoch": 0.97, "learning_rate": 3.378952963061387e-05, "loss": 0.6606, "step": 87840 }, { "epoch": 0.97, "learning_rate": 3.378860690347536e-05, "loss": 0.7289, "step": 87845 }, { "epoch": 0.97, "learning_rate": 3.378768417633685e-05, "loss": 0.7392, "step": 87850 }, { "epoch": 0.97, "learning_rate": 3.3786761449198336e-05, "loss": 0.6353, "step": 87855 }, { "epoch": 0.97, "learning_rate": 3.3785838722059824e-05, "loss": 0.7496, "step": 87860 }, { "epoch": 0.97, "learning_rate": 3.378491599492131e-05, "loss": 0.6869, "step": 87865 }, { "epoch": 0.97, "learning_rate": 3.37839932677828e-05, "loss": 0.7004, "step": 87870 }, { "epoch": 0.97, "learning_rate": 3.378307054064429e-05, "loss": 0.67, "step": 87875 }, { "epoch": 0.97, "learning_rate": 3.3782147813505775e-05, "loss": 0.6732, "step": 87880 }, { "epoch": 0.97, "learning_rate": 3.378122508636726e-05, "loss": 0.6534, "step": 87885 }, { "epoch": 0.97, "learning_rate": 3.378030235922875e-05, "loss": 0.6343, "step": 87890 }, { "epoch": 0.97, "learning_rate": 3.377937963209023e-05, "loss": 0.6843, "step": 87895 }, { "epoch": 0.97, "learning_rate": 3.377845690495172e-05, "loss": 0.6745, "step": 87900 }, { "epoch": 0.97, "learning_rate": 3.3777534177813215e-05, "loss": 0.7032, "step": 87905 }, { "epoch": 0.97, "learning_rate": 3.37766114506747e-05, "loss": 0.6706, "step": 87910 }, { "epoch": 0.97, "learning_rate": 3.3775688723536184e-05, "loss": 0.7108, "step": 87915 }, { "epoch": 0.97, "learning_rate": 3.377476599639767e-05, "loss": 0.7374, "step": 87920 }, { "epoch": 0.97, "learning_rate": 3.3773843269259166e-05, "loss": 0.6805, "step": 87925 }, { "epoch": 0.97, "learning_rate": 3.3772920542120654e-05, "loss": 0.7074, "step": 87930 }, { "epoch": 0.97, "learning_rate": 3.3771997814982135e-05, "loss": 0.6947, "step": 87935 }, { "epoch": 0.97, "learning_rate": 3.377107508784362e-05, "loss": 0.6809, "step": 87940 }, { "epoch": 0.97, "learning_rate": 3.377015236070512e-05, "loss": 0.708, "step": 87945 }, { "epoch": 0.97, "learning_rate": 3.37692296335666e-05, "loss": 0.6571, "step": 87950 }, { "epoch": 0.97, "learning_rate": 3.3768306906428086e-05, "loss": 0.6971, "step": 87955 }, { "epoch": 0.97, "learning_rate": 3.3767384179289574e-05, "loss": 0.7143, "step": 87960 }, { "epoch": 0.97, "learning_rate": 3.376646145215107e-05, "loss": 0.6822, "step": 87965 }, { "epoch": 0.97, "learning_rate": 3.376553872501255e-05, "loss": 0.6637, "step": 87970 }, { "epoch": 0.97, "learning_rate": 3.376461599787404e-05, "loss": 0.6949, "step": 87975 }, { "epoch": 0.97, "learning_rate": 3.3763693270735526e-05, "loss": 0.6585, "step": 87980 }, { "epoch": 0.97, "learning_rate": 3.3762770543597013e-05, "loss": 0.69, "step": 87985 }, { "epoch": 0.97, "learning_rate": 3.37618478164585e-05, "loss": 0.6719, "step": 87990 }, { "epoch": 0.97, "learning_rate": 3.376092508931999e-05, "loss": 0.7138, "step": 87995 }, { "epoch": 0.97, "learning_rate": 3.376000236218148e-05, "loss": 0.6886, "step": 88000 }, { "epoch": 0.97, "eval_loss": 0.6673852801322937, "eval_runtime": 69.2351, "eval_samples_per_second": 28.887, "eval_steps_per_second": 14.444, "step": 88000 }, { "epoch": 0.97, "learning_rate": 3.3759079635042965e-05, "loss": 0.6024, "step": 88005 }, { "epoch": 0.97, "learning_rate": 3.375815690790445e-05, "loss": 0.6917, "step": 88010 }, { "epoch": 0.97, "learning_rate": 3.375723418076594e-05, "loss": 0.6831, "step": 88015 }, { "epoch": 0.97, "learning_rate": 3.375631145362743e-05, "loss": 0.728, "step": 88020 }, { "epoch": 0.97, "learning_rate": 3.375538872648891e-05, "loss": 0.671, "step": 88025 }, { "epoch": 0.97, "learning_rate": 3.3754465999350404e-05, "loss": 0.6935, "step": 88030 }, { "epoch": 0.97, "learning_rate": 3.375354327221189e-05, "loss": 0.643, "step": 88035 }, { "epoch": 0.97, "learning_rate": 3.375262054507338e-05, "loss": 0.6646, "step": 88040 }, { "epoch": 0.97, "learning_rate": 3.375169781793486e-05, "loss": 0.7039, "step": 88045 }, { "epoch": 0.97, "learning_rate": 3.375077509079635e-05, "loss": 0.6742, "step": 88050 }, { "epoch": 0.98, "learning_rate": 3.374985236365784e-05, "loss": 0.7375, "step": 88055 }, { "epoch": 0.98, "learning_rate": 3.3748929636519324e-05, "loss": 0.672, "step": 88060 }, { "epoch": 0.98, "learning_rate": 3.374800690938081e-05, "loss": 0.7148, "step": 88065 }, { "epoch": 0.98, "learning_rate": 3.37470841822423e-05, "loss": 0.6941, "step": 88070 }, { "epoch": 0.98, "learning_rate": 3.3746161455103795e-05, "loss": 0.6629, "step": 88075 }, { "epoch": 0.98, "learning_rate": 3.3745238727965276e-05, "loss": 0.6678, "step": 88080 }, { "epoch": 0.98, "learning_rate": 3.3744316000826764e-05, "loss": 0.6654, "step": 88085 }, { "epoch": 0.98, "learning_rate": 3.374339327368825e-05, "loss": 0.6541, "step": 88090 }, { "epoch": 0.98, "learning_rate": 3.374247054654974e-05, "loss": 0.6965, "step": 88095 }, { "epoch": 0.98, "learning_rate": 3.374154781941123e-05, "loss": 0.6778, "step": 88100 }, { "epoch": 0.98, "learning_rate": 3.3740625092272715e-05, "loss": 0.6982, "step": 88105 }, { "epoch": 0.98, "learning_rate": 3.37397023651342e-05, "loss": 0.7145, "step": 88110 }, { "epoch": 0.98, "learning_rate": 3.373877963799569e-05, "loss": 0.6933, "step": 88115 }, { "epoch": 0.98, "learning_rate": 3.373785691085718e-05, "loss": 0.7228, "step": 88120 }, { "epoch": 0.98, "learning_rate": 3.3736934183718666e-05, "loss": 0.7488, "step": 88125 }, { "epoch": 0.98, "learning_rate": 3.3736011456580154e-05, "loss": 0.6816, "step": 88130 }, { "epoch": 0.98, "learning_rate": 3.3735088729441635e-05, "loss": 0.6397, "step": 88135 }, { "epoch": 0.98, "learning_rate": 3.373416600230313e-05, "loss": 0.6736, "step": 88140 }, { "epoch": 0.98, "learning_rate": 3.373324327516462e-05, "loss": 0.6541, "step": 88145 }, { "epoch": 0.98, "learning_rate": 3.3732320548026106e-05, "loss": 0.7597, "step": 88150 }, { "epoch": 0.98, "learning_rate": 3.373139782088759e-05, "loss": 0.6481, "step": 88155 }, { "epoch": 0.98, "learning_rate": 3.373047509374908e-05, "loss": 0.7173, "step": 88160 }, { "epoch": 0.98, "learning_rate": 3.372955236661057e-05, "loss": 0.6849, "step": 88165 }, { "epoch": 0.98, "learning_rate": 3.372862963947205e-05, "loss": 0.7201, "step": 88170 }, { "epoch": 0.98, "learning_rate": 3.372770691233354e-05, "loss": 0.6473, "step": 88175 }, { "epoch": 0.98, "learning_rate": 3.372678418519503e-05, "loss": 0.6527, "step": 88180 }, { "epoch": 0.98, "learning_rate": 3.372586145805652e-05, "loss": 0.673, "step": 88185 }, { "epoch": 0.98, "learning_rate": 3.3724938730918e-05, "loss": 0.6835, "step": 88190 }, { "epoch": 0.98, "learning_rate": 3.372401600377949e-05, "loss": 0.7552, "step": 88195 }, { "epoch": 0.98, "learning_rate": 3.372309327664098e-05, "loss": 0.6959, "step": 88200 }, { "epoch": 0.98, "learning_rate": 3.372217054950247e-05, "loss": 0.6317, "step": 88205 }, { "epoch": 0.98, "learning_rate": 3.372124782236395e-05, "loss": 0.6978, "step": 88210 }, { "epoch": 0.98, "learning_rate": 3.372032509522544e-05, "loss": 0.6366, "step": 88215 }, { "epoch": 0.98, "learning_rate": 3.371940236808693e-05, "loss": 0.6416, "step": 88220 }, { "epoch": 0.98, "learning_rate": 3.371847964094842e-05, "loss": 0.7227, "step": 88225 }, { "epoch": 0.98, "learning_rate": 3.3717556913809905e-05, "loss": 0.7326, "step": 88230 }, { "epoch": 0.98, "learning_rate": 3.371663418667139e-05, "loss": 0.6843, "step": 88235 }, { "epoch": 0.98, "learning_rate": 3.371571145953288e-05, "loss": 0.6964, "step": 88240 }, { "epoch": 0.98, "learning_rate": 3.371478873239437e-05, "loss": 0.6593, "step": 88245 }, { "epoch": 0.98, "learning_rate": 3.3713866005255856e-05, "loss": 0.6399, "step": 88250 }, { "epoch": 0.98, "learning_rate": 3.3712943278117344e-05, "loss": 0.7068, "step": 88255 }, { "epoch": 0.98, "learning_rate": 3.371202055097883e-05, "loss": 0.6873, "step": 88260 }, { "epoch": 0.98, "learning_rate": 3.371109782384032e-05, "loss": 0.6565, "step": 88265 }, { "epoch": 0.98, "learning_rate": 3.371017509670181e-05, "loss": 0.6608, "step": 88270 }, { "epoch": 0.98, "learning_rate": 3.3709252369563295e-05, "loss": 0.7082, "step": 88275 }, { "epoch": 0.98, "learning_rate": 3.370832964242478e-05, "loss": 0.7321, "step": 88280 }, { "epoch": 0.98, "learning_rate": 3.3707406915286264e-05, "loss": 0.7014, "step": 88285 }, { "epoch": 0.98, "learning_rate": 3.370648418814776e-05, "loss": 0.7597, "step": 88290 }, { "epoch": 0.98, "learning_rate": 3.3705561461009247e-05, "loss": 0.6514, "step": 88295 }, { "epoch": 0.98, "learning_rate": 3.370463873387073e-05, "loss": 0.6673, "step": 88300 }, { "epoch": 0.98, "learning_rate": 3.3703716006732215e-05, "loss": 0.7193, "step": 88305 }, { "epoch": 0.98, "learning_rate": 3.370279327959371e-05, "loss": 0.7133, "step": 88310 }, { "epoch": 0.98, "learning_rate": 3.37018705524552e-05, "loss": 0.7144, "step": 88315 }, { "epoch": 0.98, "learning_rate": 3.370094782531668e-05, "loss": 0.6913, "step": 88320 }, { "epoch": 0.98, "learning_rate": 3.370002509817817e-05, "loss": 0.6898, "step": 88325 }, { "epoch": 0.98, "learning_rate": 3.369910237103966e-05, "loss": 0.708, "step": 88330 }, { "epoch": 0.98, "learning_rate": 3.369817964390114e-05, "loss": 0.6899, "step": 88335 }, { "epoch": 0.98, "learning_rate": 3.369725691676263e-05, "loss": 0.6749, "step": 88340 }, { "epoch": 0.98, "learning_rate": 3.369633418962412e-05, "loss": 0.6536, "step": 88345 }, { "epoch": 0.98, "learning_rate": 3.3695411462485606e-05, "loss": 0.6701, "step": 88350 }, { "epoch": 0.98, "learning_rate": 3.3694488735347094e-05, "loss": 0.723, "step": 88355 }, { "epoch": 0.98, "learning_rate": 3.369356600820858e-05, "loss": 0.7215, "step": 88360 }, { "epoch": 0.98, "learning_rate": 3.369264328107007e-05, "loss": 0.6949, "step": 88365 }, { "epoch": 0.98, "learning_rate": 3.369172055393156e-05, "loss": 0.6928, "step": 88370 }, { "epoch": 0.98, "learning_rate": 3.3690797826793045e-05, "loss": 0.6727, "step": 88375 }, { "epoch": 0.98, "learning_rate": 3.368987509965453e-05, "loss": 0.7086, "step": 88380 }, { "epoch": 0.98, "learning_rate": 3.368895237251602e-05, "loss": 0.6856, "step": 88385 }, { "epoch": 0.98, "learning_rate": 3.368802964537751e-05, "loss": 0.6357, "step": 88390 }, { "epoch": 0.98, "learning_rate": 3.3687106918239e-05, "loss": 0.7012, "step": 88395 }, { "epoch": 0.98, "learning_rate": 3.3686184191100485e-05, "loss": 0.6805, "step": 88400 }, { "epoch": 0.98, "learning_rate": 3.368526146396197e-05, "loss": 0.73, "step": 88405 }, { "epoch": 0.98, "learning_rate": 3.3684338736823454e-05, "loss": 0.7661, "step": 88410 }, { "epoch": 0.98, "learning_rate": 3.368341600968495e-05, "loss": 0.6922, "step": 88415 }, { "epoch": 0.98, "learning_rate": 3.3682493282546436e-05, "loss": 0.7228, "step": 88420 }, { "epoch": 0.98, "learning_rate": 3.3681570555407924e-05, "loss": 0.6915, "step": 88425 }, { "epoch": 0.98, "learning_rate": 3.3680647828269405e-05, "loss": 0.7135, "step": 88430 }, { "epoch": 0.98, "learning_rate": 3.367972510113089e-05, "loss": 0.7599, "step": 88435 }, { "epoch": 0.98, "learning_rate": 3.367880237399239e-05, "loss": 0.6946, "step": 88440 }, { "epoch": 0.98, "learning_rate": 3.367787964685387e-05, "loss": 0.6703, "step": 88445 }, { "epoch": 0.98, "learning_rate": 3.3676956919715356e-05, "loss": 0.7141, "step": 88450 }, { "epoch": 0.98, "learning_rate": 3.3676034192576844e-05, "loss": 0.6199, "step": 88455 }, { "epoch": 0.98, "learning_rate": 3.367511146543834e-05, "loss": 0.6466, "step": 88460 }, { "epoch": 0.98, "learning_rate": 3.367418873829982e-05, "loss": 0.6717, "step": 88465 }, { "epoch": 0.98, "learning_rate": 3.367326601116131e-05, "loss": 0.7283, "step": 88470 }, { "epoch": 0.98, "learning_rate": 3.3672343284022796e-05, "loss": 0.6741, "step": 88475 }, { "epoch": 0.98, "learning_rate": 3.3671420556884283e-05, "loss": 0.7279, "step": 88480 }, { "epoch": 0.98, "learning_rate": 3.367049782974577e-05, "loss": 0.6748, "step": 88485 }, { "epoch": 0.98, "learning_rate": 3.366957510260726e-05, "loss": 0.6557, "step": 88490 }, { "epoch": 0.98, "learning_rate": 3.366865237546875e-05, "loss": 0.7387, "step": 88495 }, { "epoch": 0.98, "learning_rate": 3.3667729648330235e-05, "loss": 0.7321, "step": 88500 }, { "epoch": 0.98, "learning_rate": 3.366680692119172e-05, "loss": 0.6843, "step": 88505 }, { "epoch": 0.98, "learning_rate": 3.366588419405321e-05, "loss": 0.7315, "step": 88510 }, { "epoch": 0.98, "learning_rate": 3.36649614669147e-05, "loss": 0.6978, "step": 88515 }, { "epoch": 0.98, "learning_rate": 3.366403873977618e-05, "loss": 0.7184, "step": 88520 }, { "epoch": 0.98, "learning_rate": 3.3663116012637674e-05, "loss": 0.7203, "step": 88525 }, { "epoch": 0.98, "learning_rate": 3.366219328549916e-05, "loss": 0.7259, "step": 88530 }, { "epoch": 0.98, "learning_rate": 3.366127055836065e-05, "loss": 0.7235, "step": 88535 }, { "epoch": 0.98, "learning_rate": 3.366034783122213e-05, "loss": 0.7635, "step": 88540 }, { "epoch": 0.98, "learning_rate": 3.3659425104083625e-05, "loss": 0.6966, "step": 88545 }, { "epoch": 0.98, "learning_rate": 3.365850237694511e-05, "loss": 0.7484, "step": 88550 }, { "epoch": 0.98, "learning_rate": 3.3657579649806594e-05, "loss": 0.6797, "step": 88555 }, { "epoch": 0.98, "learning_rate": 3.365665692266808e-05, "loss": 0.727, "step": 88560 }, { "epoch": 0.98, "learning_rate": 3.365573419552958e-05, "loss": 0.6665, "step": 88565 }, { "epoch": 0.98, "learning_rate": 3.3654811468391065e-05, "loss": 0.6487, "step": 88570 }, { "epoch": 0.98, "learning_rate": 3.3653888741252546e-05, "loss": 0.6983, "step": 88575 }, { "epoch": 0.98, "learning_rate": 3.3652966014114034e-05, "loss": 0.7064, "step": 88580 }, { "epoch": 0.98, "learning_rate": 3.365204328697552e-05, "loss": 0.7034, "step": 88585 }, { "epoch": 0.98, "learning_rate": 3.3651120559837016e-05, "loss": 0.7143, "step": 88590 }, { "epoch": 0.98, "learning_rate": 3.36501978326985e-05, "loss": 0.6628, "step": 88595 }, { "epoch": 0.98, "learning_rate": 3.3649275105559985e-05, "loss": 0.7667, "step": 88600 }, { "epoch": 0.98, "learning_rate": 3.364835237842147e-05, "loss": 0.7205, "step": 88605 }, { "epoch": 0.98, "learning_rate": 3.364742965128296e-05, "loss": 0.6669, "step": 88610 }, { "epoch": 0.98, "learning_rate": 3.364650692414445e-05, "loss": 0.6576, "step": 88615 }, { "epoch": 0.98, "learning_rate": 3.3645584197005936e-05, "loss": 0.7205, "step": 88620 }, { "epoch": 0.98, "learning_rate": 3.3644661469867424e-05, "loss": 0.6744, "step": 88625 }, { "epoch": 0.98, "learning_rate": 3.364373874272891e-05, "loss": 0.6832, "step": 88630 }, { "epoch": 0.98, "learning_rate": 3.36428160155904e-05, "loss": 0.6731, "step": 88635 }, { "epoch": 0.98, "learning_rate": 3.364189328845189e-05, "loss": 0.6924, "step": 88640 }, { "epoch": 0.98, "learning_rate": 3.3640970561313376e-05, "loss": 0.7309, "step": 88645 }, { "epoch": 0.98, "learning_rate": 3.3640047834174863e-05, "loss": 0.7267, "step": 88650 }, { "epoch": 0.98, "learning_rate": 3.363912510703635e-05, "loss": 0.7003, "step": 88655 }, { "epoch": 0.98, "learning_rate": 3.363820237989784e-05, "loss": 0.6929, "step": 88660 }, { "epoch": 0.98, "learning_rate": 3.363727965275933e-05, "loss": 0.6654, "step": 88665 }, { "epoch": 0.98, "learning_rate": 3.363635692562081e-05, "loss": 0.6635, "step": 88670 }, { "epoch": 0.98, "learning_rate": 3.36354341984823e-05, "loss": 0.6952, "step": 88675 }, { "epoch": 0.98, "learning_rate": 3.363451147134379e-05, "loss": 0.6952, "step": 88680 }, { "epoch": 0.98, "learning_rate": 3.363358874420527e-05, "loss": 0.6763, "step": 88685 }, { "epoch": 0.98, "learning_rate": 3.363266601706676e-05, "loss": 0.6934, "step": 88690 }, { "epoch": 0.98, "learning_rate": 3.3631743289928254e-05, "loss": 0.6719, "step": 88695 }, { "epoch": 0.98, "learning_rate": 3.363082056278974e-05, "loss": 0.6473, "step": 88700 }, { "epoch": 0.98, "learning_rate": 3.362989783565122e-05, "loss": 0.6864, "step": 88705 }, { "epoch": 0.98, "learning_rate": 3.362897510851271e-05, "loss": 0.6951, "step": 88710 }, { "epoch": 0.98, "learning_rate": 3.3628052381374206e-05, "loss": 0.6661, "step": 88715 }, { "epoch": 0.98, "learning_rate": 3.3627129654235687e-05, "loss": 0.6827, "step": 88720 }, { "epoch": 0.98, "learning_rate": 3.3626206927097174e-05, "loss": 0.6413, "step": 88725 }, { "epoch": 0.98, "learning_rate": 3.362528419995866e-05, "loss": 0.7184, "step": 88730 }, { "epoch": 0.98, "learning_rate": 3.362436147282015e-05, "loss": 0.702, "step": 88735 }, { "epoch": 0.98, "learning_rate": 3.362343874568164e-05, "loss": 0.7071, "step": 88740 }, { "epoch": 0.98, "learning_rate": 3.3622516018543126e-05, "loss": 0.7049, "step": 88745 }, { "epoch": 0.98, "learning_rate": 3.3621593291404614e-05, "loss": 0.6507, "step": 88750 }, { "epoch": 0.98, "learning_rate": 3.36206705642661e-05, "loss": 0.6505, "step": 88755 }, { "epoch": 0.98, "learning_rate": 3.361974783712759e-05, "loss": 0.7014, "step": 88760 }, { "epoch": 0.98, "learning_rate": 3.361882510998908e-05, "loss": 0.6735, "step": 88765 }, { "epoch": 0.98, "learning_rate": 3.3617902382850565e-05, "loss": 0.715, "step": 88770 }, { "epoch": 0.98, "learning_rate": 3.361697965571205e-05, "loss": 0.6798, "step": 88775 }, { "epoch": 0.98, "learning_rate": 3.361605692857354e-05, "loss": 0.66, "step": 88780 }, { "epoch": 0.98, "learning_rate": 3.361513420143503e-05, "loss": 0.7137, "step": 88785 }, { "epoch": 0.98, "learning_rate": 3.3614211474296516e-05, "loss": 0.6832, "step": 88790 }, { "epoch": 0.98, "learning_rate": 3.3613288747158e-05, "loss": 0.6734, "step": 88795 }, { "epoch": 0.98, "learning_rate": 3.361236602001949e-05, "loss": 0.6578, "step": 88800 }, { "epoch": 0.98, "learning_rate": 3.361144329288098e-05, "loss": 0.6579, "step": 88805 }, { "epoch": 0.98, "learning_rate": 3.361052056574247e-05, "loss": 0.641, "step": 88810 }, { "epoch": 0.98, "learning_rate": 3.360959783860395e-05, "loss": 0.6586, "step": 88815 }, { "epoch": 0.98, "learning_rate": 3.360867511146544e-05, "loss": 0.7499, "step": 88820 }, { "epoch": 0.98, "learning_rate": 3.360775238432693e-05, "loss": 0.6858, "step": 88825 }, { "epoch": 0.98, "learning_rate": 3.360682965718841e-05, "loss": 0.6537, "step": 88830 }, { "epoch": 0.98, "learning_rate": 3.36059069300499e-05, "loss": 0.6989, "step": 88835 }, { "epoch": 0.98, "learning_rate": 3.360498420291139e-05, "loss": 0.6549, "step": 88840 }, { "epoch": 0.98, "learning_rate": 3.360406147577288e-05, "loss": 0.688, "step": 88845 }, { "epoch": 0.98, "learning_rate": 3.3603138748634364e-05, "loss": 0.6417, "step": 88850 }, { "epoch": 0.98, "learning_rate": 3.360221602149585e-05, "loss": 0.7436, "step": 88855 }, { "epoch": 0.98, "learning_rate": 3.360129329435734e-05, "loss": 0.7464, "step": 88860 }, { "epoch": 0.98, "learning_rate": 3.360037056721883e-05, "loss": 0.6463, "step": 88865 }, { "epoch": 0.98, "learning_rate": 3.3599447840080315e-05, "loss": 0.6695, "step": 88870 }, { "epoch": 0.98, "learning_rate": 3.35985251129418e-05, "loss": 0.7058, "step": 88875 }, { "epoch": 0.98, "learning_rate": 3.359760238580329e-05, "loss": 0.6374, "step": 88880 }, { "epoch": 0.98, "learning_rate": 3.359667965866478e-05, "loss": 0.6757, "step": 88885 }, { "epoch": 0.98, "learning_rate": 3.359575693152627e-05, "loss": 0.64, "step": 88890 }, { "epoch": 0.98, "learning_rate": 3.3594834204387755e-05, "loss": 0.6826, "step": 88895 }, { "epoch": 0.98, "learning_rate": 3.359391147724924e-05, "loss": 0.7445, "step": 88900 }, { "epoch": 0.98, "learning_rate": 3.3592988750110723e-05, "loss": 0.7335, "step": 88905 }, { "epoch": 0.98, "learning_rate": 3.359206602297222e-05, "loss": 0.7109, "step": 88910 }, { "epoch": 0.98, "learning_rate": 3.3591143295833706e-05, "loss": 0.6687, "step": 88915 }, { "epoch": 0.98, "learning_rate": 3.3590220568695194e-05, "loss": 0.6947, "step": 88920 }, { "epoch": 0.98, "learning_rate": 3.3589297841556675e-05, "loss": 0.7015, "step": 88925 }, { "epoch": 0.98, "learning_rate": 3.358837511441817e-05, "loss": 0.6818, "step": 88930 }, { "epoch": 0.98, "learning_rate": 3.358745238727966e-05, "loss": 0.746, "step": 88935 }, { "epoch": 0.98, "learning_rate": 3.358652966014114e-05, "loss": 0.7255, "step": 88940 }, { "epoch": 0.98, "learning_rate": 3.3585606933002626e-05, "loss": 0.6809, "step": 88945 }, { "epoch": 0.98, "learning_rate": 3.358468420586412e-05, "loss": 0.6894, "step": 88950 }, { "epoch": 0.98, "learning_rate": 3.358376147872561e-05, "loss": 0.674, "step": 88955 }, { "epoch": 0.99, "learning_rate": 3.358283875158709e-05, "loss": 0.7441, "step": 88960 }, { "epoch": 0.99, "learning_rate": 3.358191602444858e-05, "loss": 0.6422, "step": 88965 }, { "epoch": 0.99, "learning_rate": 3.3580993297310065e-05, "loss": 0.7194, "step": 88970 }, { "epoch": 0.99, "learning_rate": 3.358007057017156e-05, "loss": 0.6529, "step": 88975 }, { "epoch": 0.99, "learning_rate": 3.357914784303304e-05, "loss": 0.6129, "step": 88980 }, { "epoch": 0.99, "learning_rate": 3.357822511589453e-05, "loss": 0.7681, "step": 88985 }, { "epoch": 0.99, "learning_rate": 3.357730238875602e-05, "loss": 0.664, "step": 88990 }, { "epoch": 0.99, "learning_rate": 3.3576379661617505e-05, "loss": 0.6659, "step": 88995 }, { "epoch": 0.99, "learning_rate": 3.357545693447899e-05, "loss": 0.6947, "step": 89000 }, { "epoch": 0.99, "eval_loss": 0.6638471484184265, "eval_runtime": 69.2587, "eval_samples_per_second": 28.877, "eval_steps_per_second": 14.439, "step": 89000 }, { "epoch": 0.99, "learning_rate": 3.357453420734048e-05, "loss": 0.6076, "step": 89005 }, { "epoch": 0.99, "learning_rate": 3.357361148020197e-05, "loss": 0.7373, "step": 89010 }, { "epoch": 0.99, "learning_rate": 3.3572688753063456e-05, "loss": 0.696, "step": 89015 }, { "epoch": 0.99, "learning_rate": 3.3571766025924944e-05, "loss": 0.6479, "step": 89020 }, { "epoch": 0.99, "learning_rate": 3.357084329878643e-05, "loss": 0.6431, "step": 89025 }, { "epoch": 0.99, "learning_rate": 3.356992057164792e-05, "loss": 0.7062, "step": 89030 }, { "epoch": 0.99, "learning_rate": 3.35689978445094e-05, "loss": 0.665, "step": 89035 }, { "epoch": 0.99, "learning_rate": 3.3568075117370895e-05, "loss": 0.6849, "step": 89040 }, { "epoch": 0.99, "learning_rate": 3.356715239023238e-05, "loss": 0.7443, "step": 89045 }, { "epoch": 0.99, "learning_rate": 3.356622966309387e-05, "loss": 0.6584, "step": 89050 }, { "epoch": 0.99, "learning_rate": 3.356530693595535e-05, "loss": 0.6294, "step": 89055 }, { "epoch": 0.99, "learning_rate": 3.356438420881685e-05, "loss": 0.6297, "step": 89060 }, { "epoch": 0.99, "learning_rate": 3.3563461481678335e-05, "loss": 0.6964, "step": 89065 }, { "epoch": 0.99, "learning_rate": 3.3562538754539816e-05, "loss": 0.6916, "step": 89070 }, { "epoch": 0.99, "learning_rate": 3.3561616027401304e-05, "loss": 0.6518, "step": 89075 }, { "epoch": 0.99, "learning_rate": 3.35606933002628e-05, "loss": 0.7782, "step": 89080 }, { "epoch": 0.99, "learning_rate": 3.3559770573124286e-05, "loss": 0.7193, "step": 89085 }, { "epoch": 0.99, "learning_rate": 3.355884784598577e-05, "loss": 0.6723, "step": 89090 }, { "epoch": 0.99, "learning_rate": 3.3557925118847255e-05, "loss": 0.7047, "step": 89095 }, { "epoch": 0.99, "learning_rate": 3.355700239170875e-05, "loss": 0.7131, "step": 89100 }, { "epoch": 0.99, "learning_rate": 3.355607966457023e-05, "loss": 0.6712, "step": 89105 }, { "epoch": 0.99, "learning_rate": 3.355515693743172e-05, "loss": 0.6778, "step": 89110 }, { "epoch": 0.99, "learning_rate": 3.3554234210293206e-05, "loss": 0.6935, "step": 89115 }, { "epoch": 0.99, "learning_rate": 3.3553311483154694e-05, "loss": 0.739, "step": 89120 }, { "epoch": 0.99, "learning_rate": 3.355238875601618e-05, "loss": 0.624, "step": 89125 }, { "epoch": 0.99, "learning_rate": 3.355146602887767e-05, "loss": 0.7279, "step": 89130 }, { "epoch": 0.99, "learning_rate": 3.355054330173916e-05, "loss": 0.6322, "step": 89135 }, { "epoch": 0.99, "learning_rate": 3.3549620574600646e-05, "loss": 0.6574, "step": 89140 }, { "epoch": 0.99, "learning_rate": 3.3548697847462133e-05, "loss": 0.6699, "step": 89145 }, { "epoch": 0.99, "learning_rate": 3.354777512032362e-05, "loss": 0.7076, "step": 89150 }, { "epoch": 0.99, "learning_rate": 3.354685239318511e-05, "loss": 0.6526, "step": 89155 }, { "epoch": 0.99, "learning_rate": 3.35459296660466e-05, "loss": 0.6366, "step": 89160 }, { "epoch": 0.99, "learning_rate": 3.3545006938908085e-05, "loss": 0.6858, "step": 89165 }, { "epoch": 0.99, "learning_rate": 3.354408421176957e-05, "loss": 0.725, "step": 89170 }, { "epoch": 0.99, "learning_rate": 3.354316148463106e-05, "loss": 0.7105, "step": 89175 }, { "epoch": 0.99, "learning_rate": 3.354223875749254e-05, "loss": 0.7154, "step": 89180 }, { "epoch": 0.99, "learning_rate": 3.354131603035403e-05, "loss": 0.7029, "step": 89185 }, { "epoch": 0.99, "learning_rate": 3.3540393303215524e-05, "loss": 0.6601, "step": 89190 }, { "epoch": 0.99, "learning_rate": 3.353947057607701e-05, "loss": 0.612, "step": 89195 }, { "epoch": 0.99, "learning_rate": 3.353854784893849e-05, "loss": 0.7437, "step": 89200 }, { "epoch": 0.99, "learning_rate": 3.353762512179998e-05, "loss": 0.7396, "step": 89205 }, { "epoch": 0.99, "learning_rate": 3.3536702394661475e-05, "loss": 0.7874, "step": 89210 }, { "epoch": 0.99, "learning_rate": 3.3535779667522956e-05, "loss": 0.6334, "step": 89215 }, { "epoch": 0.99, "learning_rate": 3.3534856940384444e-05, "loss": 0.6927, "step": 89220 }, { "epoch": 0.99, "learning_rate": 3.353393421324593e-05, "loss": 0.6777, "step": 89225 }, { "epoch": 0.99, "learning_rate": 3.353301148610743e-05, "loss": 0.6695, "step": 89230 }, { "epoch": 0.99, "learning_rate": 3.353208875896891e-05, "loss": 0.7205, "step": 89235 }, { "epoch": 0.99, "learning_rate": 3.3531166031830396e-05, "loss": 0.6984, "step": 89240 }, { "epoch": 0.99, "learning_rate": 3.3530243304691884e-05, "loss": 0.704, "step": 89245 }, { "epoch": 0.99, "learning_rate": 3.352932057755337e-05, "loss": 0.6681, "step": 89250 }, { "epoch": 0.99, "learning_rate": 3.352839785041486e-05, "loss": 0.6752, "step": 89255 }, { "epoch": 0.99, "learning_rate": 3.352747512327635e-05, "loss": 0.6921, "step": 89260 }, { "epoch": 0.99, "learning_rate": 3.3526552396137835e-05, "loss": 0.641, "step": 89265 }, { "epoch": 0.99, "learning_rate": 3.352562966899932e-05, "loss": 0.6968, "step": 89270 }, { "epoch": 0.99, "learning_rate": 3.352470694186081e-05, "loss": 0.6835, "step": 89275 }, { "epoch": 0.99, "learning_rate": 3.35237842147223e-05, "loss": 0.7015, "step": 89280 }, { "epoch": 0.99, "learning_rate": 3.3522861487583786e-05, "loss": 0.6856, "step": 89285 }, { "epoch": 0.99, "learning_rate": 3.352193876044527e-05, "loss": 0.668, "step": 89290 }, { "epoch": 0.99, "learning_rate": 3.352101603330676e-05, "loss": 0.6568, "step": 89295 }, { "epoch": 0.99, "learning_rate": 3.352009330616825e-05, "loss": 0.7225, "step": 89300 }, { "epoch": 0.99, "learning_rate": 3.351917057902974e-05, "loss": 0.6639, "step": 89305 }, { "epoch": 0.99, "learning_rate": 3.351824785189122e-05, "loss": 0.7358, "step": 89310 }, { "epoch": 0.99, "learning_rate": 3.3517325124752713e-05, "loss": 0.614, "step": 89315 }, { "epoch": 0.99, "learning_rate": 3.35164023976142e-05, "loss": 0.6708, "step": 89320 }, { "epoch": 0.99, "learning_rate": 3.351547967047568e-05, "loss": 0.6911, "step": 89325 }, { "epoch": 0.99, "learning_rate": 3.351455694333717e-05, "loss": 0.7312, "step": 89330 }, { "epoch": 0.99, "learning_rate": 3.3513634216198665e-05, "loss": 0.7058, "step": 89335 }, { "epoch": 0.99, "learning_rate": 3.351271148906015e-05, "loss": 0.6211, "step": 89340 }, { "epoch": 0.99, "learning_rate": 3.3511788761921634e-05, "loss": 0.7033, "step": 89345 }, { "epoch": 0.99, "learning_rate": 3.351086603478312e-05, "loss": 0.6813, "step": 89350 }, { "epoch": 0.99, "learning_rate": 3.350994330764461e-05, "loss": 0.7264, "step": 89355 }, { "epoch": 0.99, "learning_rate": 3.3509020580506104e-05, "loss": 0.6679, "step": 89360 }, { "epoch": 0.99, "learning_rate": 3.3508097853367585e-05, "loss": 0.7046, "step": 89365 }, { "epoch": 0.99, "learning_rate": 3.350717512622907e-05, "loss": 0.7158, "step": 89370 }, { "epoch": 0.99, "learning_rate": 3.350625239909056e-05, "loss": 0.6712, "step": 89375 }, { "epoch": 0.99, "learning_rate": 3.350532967195205e-05, "loss": 0.7544, "step": 89380 }, { "epoch": 0.99, "learning_rate": 3.3504406944813537e-05, "loss": 0.7144, "step": 89385 }, { "epoch": 0.99, "learning_rate": 3.3503484217675024e-05, "loss": 0.6898, "step": 89390 }, { "epoch": 0.99, "learning_rate": 3.350256149053651e-05, "loss": 0.7194, "step": 89395 }, { "epoch": 0.99, "learning_rate": 3.3501638763398e-05, "loss": 0.7251, "step": 89400 }, { "epoch": 0.99, "learning_rate": 3.350071603625949e-05, "loss": 0.6732, "step": 89405 }, { "epoch": 0.99, "learning_rate": 3.3499793309120976e-05, "loss": 0.6651, "step": 89410 }, { "epoch": 0.99, "learning_rate": 3.3498870581982464e-05, "loss": 0.7105, "step": 89415 }, { "epoch": 0.99, "learning_rate": 3.3497947854843945e-05, "loss": 0.6837, "step": 89420 }, { "epoch": 0.99, "learning_rate": 3.349702512770544e-05, "loss": 0.6591, "step": 89425 }, { "epoch": 0.99, "learning_rate": 3.349610240056693e-05, "loss": 0.7283, "step": 89430 }, { "epoch": 0.99, "learning_rate": 3.3495179673428415e-05, "loss": 0.7591, "step": 89435 }, { "epoch": 0.99, "learning_rate": 3.3494256946289896e-05, "loss": 0.7401, "step": 89440 }, { "epoch": 0.99, "learning_rate": 3.349333421915139e-05, "loss": 0.7238, "step": 89445 }, { "epoch": 0.99, "learning_rate": 3.349241149201288e-05, "loss": 0.6809, "step": 89450 }, { "epoch": 0.99, "learning_rate": 3.349148876487436e-05, "loss": 0.6964, "step": 89455 }, { "epoch": 0.99, "learning_rate": 3.349056603773585e-05, "loss": 0.6457, "step": 89460 }, { "epoch": 0.99, "learning_rate": 3.348964331059734e-05, "loss": 0.7179, "step": 89465 }, { "epoch": 0.99, "learning_rate": 3.348872058345883e-05, "loss": 0.6633, "step": 89470 }, { "epoch": 0.99, "learning_rate": 3.348779785632031e-05, "loss": 0.7344, "step": 89475 }, { "epoch": 0.99, "learning_rate": 3.34868751291818e-05, "loss": 0.7157, "step": 89480 }, { "epoch": 0.99, "learning_rate": 3.3485952402043294e-05, "loss": 0.6585, "step": 89485 }, { "epoch": 0.99, "learning_rate": 3.3485029674904775e-05, "loss": 0.6567, "step": 89490 }, { "epoch": 0.99, "learning_rate": 3.348410694776626e-05, "loss": 0.707, "step": 89495 }, { "epoch": 0.99, "learning_rate": 3.348318422062775e-05, "loss": 0.673, "step": 89500 }, { "epoch": 0.99, "learning_rate": 3.348226149348924e-05, "loss": 0.6756, "step": 89505 }, { "epoch": 0.99, "learning_rate": 3.3481338766350726e-05, "loss": 0.7085, "step": 89510 }, { "epoch": 0.99, "learning_rate": 3.3480416039212214e-05, "loss": 0.6368, "step": 89515 }, { "epoch": 0.99, "learning_rate": 3.34794933120737e-05, "loss": 0.718, "step": 89520 }, { "epoch": 0.99, "learning_rate": 3.347857058493519e-05, "loss": 0.7363, "step": 89525 }, { "epoch": 0.99, "learning_rate": 3.347764785779668e-05, "loss": 0.6379, "step": 89530 }, { "epoch": 0.99, "learning_rate": 3.3476725130658165e-05, "loss": 0.6991, "step": 89535 }, { "epoch": 0.99, "learning_rate": 3.347580240351965e-05, "loss": 0.6691, "step": 89540 }, { "epoch": 0.99, "learning_rate": 3.347487967638114e-05, "loss": 0.692, "step": 89545 }, { "epoch": 0.99, "learning_rate": 3.347395694924263e-05, "loss": 0.6732, "step": 89550 }, { "epoch": 0.99, "learning_rate": 3.347303422210412e-05, "loss": 0.6864, "step": 89555 }, { "epoch": 0.99, "learning_rate": 3.3472111494965605e-05, "loss": 0.6937, "step": 89560 }, { "epoch": 0.99, "learning_rate": 3.3471188767827086e-05, "loss": 0.7512, "step": 89565 }, { "epoch": 0.99, "learning_rate": 3.3470266040688573e-05, "loss": 0.6996, "step": 89570 }, { "epoch": 0.99, "learning_rate": 3.346934331355007e-05, "loss": 0.6996, "step": 89575 }, { "epoch": 0.99, "learning_rate": 3.3468420586411556e-05, "loss": 0.7484, "step": 89580 }, { "epoch": 0.99, "learning_rate": 3.346749785927304e-05, "loss": 0.6571, "step": 89585 }, { "epoch": 0.99, "learning_rate": 3.3466575132134525e-05, "loss": 0.6995, "step": 89590 }, { "epoch": 0.99, "learning_rate": 3.346565240499602e-05, "loss": 0.6905, "step": 89595 }, { "epoch": 0.99, "learning_rate": 3.34647296778575e-05, "loss": 0.7438, "step": 89600 }, { "epoch": 0.99, "learning_rate": 3.346380695071899e-05, "loss": 0.658, "step": 89605 }, { "epoch": 0.99, "learning_rate": 3.3462884223580476e-05, "loss": 0.6483, "step": 89610 }, { "epoch": 0.99, "learning_rate": 3.346196149644197e-05, "loss": 0.6917, "step": 89615 }, { "epoch": 0.99, "learning_rate": 3.346103876930345e-05, "loss": 0.6453, "step": 89620 }, { "epoch": 0.99, "learning_rate": 3.346011604216494e-05, "loss": 0.6976, "step": 89625 }, { "epoch": 0.99, "learning_rate": 3.345919331502643e-05, "loss": 0.6595, "step": 89630 }, { "epoch": 0.99, "learning_rate": 3.3458270587887915e-05, "loss": 0.6597, "step": 89635 }, { "epoch": 0.99, "learning_rate": 3.34573478607494e-05, "loss": 0.6779, "step": 89640 }, { "epoch": 0.99, "learning_rate": 3.345642513361089e-05, "loss": 0.6925, "step": 89645 }, { "epoch": 0.99, "learning_rate": 3.345550240647238e-05, "loss": 0.6721, "step": 89650 }, { "epoch": 0.99, "learning_rate": 3.345457967933387e-05, "loss": 0.6791, "step": 89655 }, { "epoch": 0.99, "learning_rate": 3.3453656952195355e-05, "loss": 0.6694, "step": 89660 }, { "epoch": 0.99, "learning_rate": 3.345273422505684e-05, "loss": 0.7372, "step": 89665 }, { "epoch": 0.99, "learning_rate": 3.345181149791833e-05, "loss": 0.6821, "step": 89670 }, { "epoch": 0.99, "learning_rate": 3.345088877077981e-05, "loss": 0.738, "step": 89675 }, { "epoch": 0.99, "learning_rate": 3.3449966043641306e-05, "loss": 0.6803, "step": 89680 }, { "epoch": 0.99, "learning_rate": 3.3449043316502794e-05, "loss": 0.6982, "step": 89685 }, { "epoch": 0.99, "learning_rate": 3.344812058936428e-05, "loss": 0.6715, "step": 89690 }, { "epoch": 0.99, "learning_rate": 3.344719786222576e-05, "loss": 0.7144, "step": 89695 }, { "epoch": 0.99, "learning_rate": 3.344627513508726e-05, "loss": 0.6786, "step": 89700 }, { "epoch": 0.99, "learning_rate": 3.3445352407948745e-05, "loss": 0.6329, "step": 89705 }, { "epoch": 0.99, "learning_rate": 3.3444429680810226e-05, "loss": 0.6323, "step": 89710 }, { "epoch": 0.99, "learning_rate": 3.3443506953671714e-05, "loss": 0.6419, "step": 89715 }, { "epoch": 0.99, "learning_rate": 3.34425842265332e-05, "loss": 0.7023, "step": 89720 }, { "epoch": 0.99, "learning_rate": 3.34416614993947e-05, "loss": 0.676, "step": 89725 }, { "epoch": 0.99, "learning_rate": 3.344073877225618e-05, "loss": 0.6463, "step": 89730 }, { "epoch": 0.99, "learning_rate": 3.3439816045117666e-05, "loss": 0.6686, "step": 89735 }, { "epoch": 0.99, "learning_rate": 3.3438893317979154e-05, "loss": 0.7145, "step": 89740 }, { "epoch": 0.99, "learning_rate": 3.343797059084065e-05, "loss": 0.6575, "step": 89745 }, { "epoch": 0.99, "learning_rate": 3.343704786370213e-05, "loss": 0.6826, "step": 89750 }, { "epoch": 0.99, "learning_rate": 3.343612513656362e-05, "loss": 0.7065, "step": 89755 }, { "epoch": 0.99, "learning_rate": 3.3435202409425105e-05, "loss": 0.717, "step": 89760 }, { "epoch": 0.99, "learning_rate": 3.343427968228659e-05, "loss": 0.6911, "step": 89765 }, { "epoch": 0.99, "learning_rate": 3.343335695514808e-05, "loss": 0.6967, "step": 89770 }, { "epoch": 0.99, "learning_rate": 3.343243422800957e-05, "loss": 0.745, "step": 89775 }, { "epoch": 0.99, "learning_rate": 3.3431511500871056e-05, "loss": 0.7186, "step": 89780 }, { "epoch": 0.99, "learning_rate": 3.3430588773732544e-05, "loss": 0.6764, "step": 89785 }, { "epoch": 0.99, "learning_rate": 3.342966604659403e-05, "loss": 0.7302, "step": 89790 }, { "epoch": 0.99, "learning_rate": 3.342874331945552e-05, "loss": 0.6682, "step": 89795 }, { "epoch": 0.99, "learning_rate": 3.342782059231701e-05, "loss": 0.6772, "step": 89800 }, { "epoch": 0.99, "learning_rate": 3.342689786517849e-05, "loss": 0.6073, "step": 89805 }, { "epoch": 0.99, "learning_rate": 3.342597513803998e-05, "loss": 0.6396, "step": 89810 }, { "epoch": 0.99, "learning_rate": 3.342505241090147e-05, "loss": 0.6656, "step": 89815 }, { "epoch": 0.99, "learning_rate": 3.342412968376296e-05, "loss": 0.6924, "step": 89820 }, { "epoch": 0.99, "learning_rate": 3.342320695662444e-05, "loss": 0.7232, "step": 89825 }, { "epoch": 0.99, "learning_rate": 3.3422284229485935e-05, "loss": 0.6482, "step": 89830 }, { "epoch": 0.99, "learning_rate": 3.342136150234742e-05, "loss": 0.7089, "step": 89835 }, { "epoch": 0.99, "learning_rate": 3.3420438775208904e-05, "loss": 0.7147, "step": 89840 }, { "epoch": 0.99, "learning_rate": 3.341951604807039e-05, "loss": 0.6796, "step": 89845 }, { "epoch": 0.99, "learning_rate": 3.3418593320931886e-05, "loss": 0.6612, "step": 89850 }, { "epoch": 0.99, "learning_rate": 3.3417670593793374e-05, "loss": 0.6715, "step": 89855 }, { "epoch": 0.99, "learning_rate": 3.3416747866654855e-05, "loss": 0.7288, "step": 89860 }, { "epoch": 1.0, "learning_rate": 3.341582513951634e-05, "loss": 0.7205, "step": 89865 }, { "epoch": 1.0, "learning_rate": 3.341490241237783e-05, "loss": 0.6737, "step": 89870 }, { "epoch": 1.0, "learning_rate": 3.341397968523932e-05, "loss": 0.7179, "step": 89875 }, { "epoch": 1.0, "learning_rate": 3.3413056958100806e-05, "loss": 0.7266, "step": 89880 }, { "epoch": 1.0, "learning_rate": 3.3412134230962294e-05, "loss": 0.6594, "step": 89885 }, { "epoch": 1.0, "learning_rate": 3.341121150382378e-05, "loss": 0.6427, "step": 89890 }, { "epoch": 1.0, "learning_rate": 3.341028877668527e-05, "loss": 0.7137, "step": 89895 }, { "epoch": 1.0, "learning_rate": 3.340936604954676e-05, "loss": 0.6903, "step": 89900 }, { "epoch": 1.0, "learning_rate": 3.3408443322408246e-05, "loss": 0.7257, "step": 89905 }, { "epoch": 1.0, "learning_rate": 3.3407520595269734e-05, "loss": 0.6724, "step": 89910 }, { "epoch": 1.0, "learning_rate": 3.340659786813122e-05, "loss": 0.7221, "step": 89915 }, { "epoch": 1.0, "learning_rate": 3.340567514099271e-05, "loss": 0.696, "step": 89920 }, { "epoch": 1.0, "learning_rate": 3.34047524138542e-05, "loss": 0.6611, "step": 89925 }, { "epoch": 1.0, "learning_rate": 3.3403829686715685e-05, "loss": 0.6335, "step": 89930 }, { "epoch": 1.0, "learning_rate": 3.340290695957717e-05, "loss": 0.6745, "step": 89935 }, { "epoch": 1.0, "learning_rate": 3.340198423243866e-05, "loss": 0.7266, "step": 89940 }, { "epoch": 1.0, "learning_rate": 3.340106150530015e-05, "loss": 0.6944, "step": 89945 }, { "epoch": 1.0, "learning_rate": 3.340013877816163e-05, "loss": 0.6752, "step": 89950 }, { "epoch": 1.0, "learning_rate": 3.339921605102312e-05, "loss": 0.6913, "step": 89955 }, { "epoch": 1.0, "learning_rate": 3.339829332388461e-05, "loss": 0.6833, "step": 89960 }, { "epoch": 1.0, "learning_rate": 3.33973705967461e-05, "loss": 0.6821, "step": 89965 }, { "epoch": 1.0, "learning_rate": 3.339644786960758e-05, "loss": 0.6633, "step": 89970 }, { "epoch": 1.0, "learning_rate": 3.339552514246907e-05, "loss": 0.6732, "step": 89975 }, { "epoch": 1.0, "learning_rate": 3.3394602415330563e-05, "loss": 0.7408, "step": 89980 }, { "epoch": 1.0, "learning_rate": 3.3393679688192045e-05, "loss": 0.688, "step": 89985 }, { "epoch": 1.0, "learning_rate": 3.339275696105353e-05, "loss": 0.6733, "step": 89990 }, { "epoch": 1.0, "learning_rate": 3.339183423391502e-05, "loss": 0.6636, "step": 89995 }, { "epoch": 1.0, "learning_rate": 3.3390911506776515e-05, "loss": 0.6567, "step": 90000 }, { "epoch": 1.0, "eval_loss": 0.6242142915725708, "eval_runtime": 69.2353, "eval_samples_per_second": 28.887, "eval_steps_per_second": 14.444, "step": 90000 }, { "epoch": 1.0, "learning_rate": 3.3389988779637996e-05, "loss": 0.6996, "step": 90005 }, { "epoch": 1.0, "learning_rate": 3.3389066052499484e-05, "loss": 0.74, "step": 90010 }, { "epoch": 1.0, "learning_rate": 3.338814332536097e-05, "loss": 0.724, "step": 90015 }, { "epoch": 1.0, "learning_rate": 3.338722059822246e-05, "loss": 0.7316, "step": 90020 }, { "epoch": 1.0, "learning_rate": 3.338629787108395e-05, "loss": 0.7171, "step": 90025 }, { "epoch": 1.0, "learning_rate": 3.3385375143945435e-05, "loss": 0.6371, "step": 90030 }, { "epoch": 1.0, "learning_rate": 3.338445241680692e-05, "loss": 0.5937, "step": 90035 }, { "epoch": 1.0, "learning_rate": 3.338352968966841e-05, "loss": 0.6958, "step": 90040 }, { "epoch": 1.0, "learning_rate": 3.33826069625299e-05, "loss": 0.6772, "step": 90045 }, { "epoch": 1.0, "learning_rate": 3.3381684235391387e-05, "loss": 0.707, "step": 90050 }, { "epoch": 1.0, "learning_rate": 3.3380761508252874e-05, "loss": 0.6767, "step": 90055 }, { "epoch": 1.0, "learning_rate": 3.3379838781114355e-05, "loss": 0.6954, "step": 90060 }, { "epoch": 1.0, "learning_rate": 3.337891605397585e-05, "loss": 0.6726, "step": 90065 }, { "epoch": 1.0, "learning_rate": 3.337799332683734e-05, "loss": 0.6737, "step": 90070 }, { "epoch": 1.0, "learning_rate": 3.3377070599698826e-05, "loss": 0.6992, "step": 90075 }, { "epoch": 1.0, "learning_rate": 3.337614787256031e-05, "loss": 0.7028, "step": 90080 }, { "epoch": 1.0, "learning_rate": 3.33752251454218e-05, "loss": 0.7311, "step": 90085 }, { "epoch": 1.0, "learning_rate": 3.337430241828329e-05, "loss": 0.688, "step": 90090 }, { "epoch": 1.0, "learning_rate": 3.337337969114477e-05, "loss": 0.666, "step": 90095 }, { "epoch": 1.0, "learning_rate": 3.337245696400626e-05, "loss": 0.7086, "step": 90100 }, { "epoch": 1.0, "learning_rate": 3.3371534236867746e-05, "loss": 0.672, "step": 90105 }, { "epoch": 1.0, "learning_rate": 3.337061150972924e-05, "loss": 0.7093, "step": 90110 }, { "epoch": 1.0, "learning_rate": 3.336968878259072e-05, "loss": 0.7089, "step": 90115 }, { "epoch": 1.0, "learning_rate": 3.336876605545221e-05, "loss": 0.7291, "step": 90120 }, { "epoch": 1.0, "learning_rate": 3.33678433283137e-05, "loss": 0.683, "step": 90125 }, { "epoch": 1.0, "learning_rate": 3.336692060117519e-05, "loss": 0.6973, "step": 90130 }, { "epoch": 1.0, "learning_rate": 3.336599787403667e-05, "loss": 0.7413, "step": 90135 }, { "epoch": 1.0, "learning_rate": 3.336507514689816e-05, "loss": 0.6863, "step": 90140 }, { "epoch": 1.0, "learning_rate": 3.336415241975965e-05, "loss": 0.6883, "step": 90145 }, { "epoch": 1.0, "learning_rate": 3.336322969262114e-05, "loss": 0.6637, "step": 90150 }, { "epoch": 1.0, "learning_rate": 3.3362306965482625e-05, "loss": 0.7273, "step": 90155 }, { "epoch": 1.0, "learning_rate": 3.336138423834411e-05, "loss": 0.7361, "step": 90160 }, { "epoch": 1.0, "learning_rate": 3.33604615112056e-05, "loss": 0.6986, "step": 90165 }, { "epoch": 1.0, "learning_rate": 3.335953878406709e-05, "loss": 0.6575, "step": 90170 }, { "epoch": 1.0, "learning_rate": 3.3358616056928576e-05, "loss": 0.6833, "step": 90175 }, { "epoch": 1.0, "learning_rate": 3.3357693329790064e-05, "loss": 0.7201, "step": 90180 }, { "epoch": 1.0, "learning_rate": 3.335677060265155e-05, "loss": 0.6895, "step": 90185 }, { "epoch": 1.0, "learning_rate": 3.335584787551303e-05, "loss": 0.6583, "step": 90190 }, { "epoch": 1.0, "learning_rate": 3.335492514837453e-05, "loss": 0.6888, "step": 90195 }, { "epoch": 1.0, "learning_rate": 3.3354002421236015e-05, "loss": 0.6524, "step": 90200 }, { "epoch": 1.0, "learning_rate": 3.33530796940975e-05, "loss": 0.6828, "step": 90205 }, { "epoch": 1.0, "learning_rate": 3.3352156966958984e-05, "loss": 0.6947, "step": 90210 }, { "epoch": 1.0, "learning_rate": 3.335123423982048e-05, "loss": 0.7466, "step": 90215 }, { "epoch": 1.0, "learning_rate": 3.335031151268197e-05, "loss": 0.6979, "step": 90220 }, { "epoch": 1.0, "learning_rate": 3.334938878554345e-05, "loss": 0.6119, "step": 90225 }, { "epoch": 1.0, "learning_rate": 3.3348466058404936e-05, "loss": 0.6597, "step": 90230 }, { "epoch": 1.0, "learning_rate": 3.334754333126643e-05, "loss": 0.7593, "step": 90235 }, { "epoch": 1.0, "learning_rate": 3.334662060412792e-05, "loss": 0.6771, "step": 90240 }, { "epoch": 1.0, "learning_rate": 3.33456978769894e-05, "loss": 0.7527, "step": 90245 }, { "epoch": 1.0, "learning_rate": 3.334477514985089e-05, "loss": 0.7325, "step": 90250 }, { "epoch": 1.0, "learning_rate": 3.3343852422712375e-05, "loss": 0.7295, "step": 90255 }, { "epoch": 1.0, "learning_rate": 3.334292969557386e-05, "loss": 0.6199, "step": 90260 }, { "epoch": 1.0, "learning_rate": 3.334200696843535e-05, "loss": 0.6784, "step": 90265 }, { "epoch": 1.0, "learning_rate": 3.334108424129684e-05, "loss": 0.7196, "step": 90270 }, { "epoch": 1.0, "learning_rate": 3.3340161514158326e-05, "loss": 0.6889, "step": 90275 }, { "epoch": 1.0, "learning_rate": 3.3339238787019814e-05, "loss": 0.7387, "step": 90280 }, { "epoch": 1.0, "learning_rate": 3.33383160598813e-05, "loss": 0.6922, "step": 90285 }, { "epoch": 1.0, "learning_rate": 3.333739333274279e-05, "loss": 0.7579, "step": 90290 }, { "epoch": 1.0, "learning_rate": 3.333647060560428e-05, "loss": 0.7043, "step": 90295 }, { "epoch": 1.0, "learning_rate": 3.3335547878465765e-05, "loss": 0.6586, "step": 90300 }, { "epoch": 1.0, "learning_rate": 3.333462515132725e-05, "loss": 0.6937, "step": 90305 }, { "epoch": 1.0, "learning_rate": 3.333370242418874e-05, "loss": 0.6698, "step": 90310 }, { "epoch": 1.0, "learning_rate": 3.333277969705023e-05, "loss": 0.774, "step": 90315 }, { "epoch": 1.0, "learning_rate": 3.333185696991172e-05, "loss": 0.7051, "step": 90320 }, { "epoch": 1.0, "learning_rate": 3.3330934242773205e-05, "loss": 0.6771, "step": 90325 }, { "epoch": 1.0, "learning_rate": 3.333001151563469e-05, "loss": 0.6564, "step": 90330 }, { "epoch": 1.0, "learning_rate": 3.3329088788496174e-05, "loss": 0.6969, "step": 90335 }, { "epoch": 1.0, "learning_rate": 3.332816606135766e-05, "loss": 0.685, "step": 90340 }, { "epoch": 1.0, "learning_rate": 3.3327243334219156e-05, "loss": 0.6829, "step": 90345 }, { "epoch": 1.0, "learning_rate": 3.3326320607080644e-05, "loss": 0.6997, "step": 90350 }, { "epoch": 1.0, "learning_rate": 3.3325397879942125e-05, "loss": 0.7009, "step": 90355 }, { "epoch": 1.0, "learning_rate": 3.332447515280361e-05, "loss": 0.6902, "step": 90360 }, { "epoch": 1.0, "learning_rate": 3.332355242566511e-05, "loss": 0.6677, "step": 90365 }, { "epoch": 1.0, "learning_rate": 3.332262969852659e-05, "loss": 0.6593, "step": 90370 }, { "epoch": 1.0, "learning_rate": 3.3321706971388076e-05, "loss": 0.6283, "step": 90375 }, { "epoch": 1.0, "learning_rate": 3.3320784244249564e-05, "loss": 0.6754, "step": 90380 }, { "epoch": 1.0, "learning_rate": 3.331986151711106e-05, "loss": 0.6905, "step": 90385 }, { "epoch": 1.0, "learning_rate": 3.331893878997254e-05, "loss": 0.702, "step": 90390 }, { "epoch": 1.0, "learning_rate": 3.331801606283403e-05, "loss": 0.6388, "step": 90395 }, { "epoch": 1.0, "learning_rate": 3.3317093335695516e-05, "loss": 0.722, "step": 90400 }, { "epoch": 1.0, "learning_rate": 3.3316170608557003e-05, "loss": 0.6996, "step": 90405 }, { "epoch": 1.0, "learning_rate": 3.331524788141849e-05, "loss": 0.7538, "step": 90410 }, { "epoch": 1.0, "learning_rate": 3.331432515427998e-05, "loss": 0.6281, "step": 90415 }, { "epoch": 1.0, "learning_rate": 3.331340242714147e-05, "loss": 0.691, "step": 90420 }, { "epoch": 1.0, "learning_rate": 3.3312479700002955e-05, "loss": 0.6454, "step": 90425 }, { "epoch": 1.0, "learning_rate": 3.331155697286444e-05, "loss": 0.6571, "step": 90430 }, { "epoch": 1.0, "learning_rate": 3.331063424572593e-05, "loss": 0.6676, "step": 90435 }, { "epoch": 1.0, "learning_rate": 3.330971151858742e-05, "loss": 0.7256, "step": 90440 }, { "epoch": 1.0, "learning_rate": 3.33087887914489e-05, "loss": 0.6686, "step": 90445 }, { "epoch": 1.0, "learning_rate": 3.3307866064310394e-05, "loss": 0.6998, "step": 90450 }, { "epoch": 1.0, "learning_rate": 3.330694333717188e-05, "loss": 0.6816, "step": 90455 }, { "epoch": 1.0, "learning_rate": 3.330602061003337e-05, "loss": 0.7315, "step": 90460 }, { "epoch": 1.0, "learning_rate": 3.330509788289485e-05, "loss": 0.6597, "step": 90465 }, { "epoch": 1.0, "learning_rate": 3.3304175155756346e-05, "loss": 0.6965, "step": 90470 }, { "epoch": 1.0, "learning_rate": 3.330325242861783e-05, "loss": 0.7165, "step": 90475 }, { "epoch": 1.0, "learning_rate": 3.330232970147932e-05, "loss": 0.6892, "step": 90480 }, { "epoch": 1.0, "learning_rate": 3.33014069743408e-05, "loss": 0.6841, "step": 90485 }, { "epoch": 1.0, "learning_rate": 3.330048424720229e-05, "loss": 0.6786, "step": 90490 }, { "epoch": 1.0, "learning_rate": 3.3299561520063785e-05, "loss": 0.6561, "step": 90495 }, { "epoch": 1.0, "learning_rate": 3.3298638792925266e-05, "loss": 0.6802, "step": 90500 }, { "epoch": 1.0, "learning_rate": 3.3297716065786754e-05, "loss": 0.6653, "step": 90505 }, { "epoch": 1.0, "learning_rate": 3.329679333864824e-05, "loss": 0.6877, "step": 90510 }, { "epoch": 1.0, "learning_rate": 3.3295870611509736e-05, "loss": 0.6808, "step": 90515 }, { "epoch": 1.0, "learning_rate": 3.329494788437122e-05, "loss": 0.6831, "step": 90520 }, { "epoch": 1.0, "learning_rate": 3.3294025157232705e-05, "loss": 0.7039, "step": 90525 }, { "epoch": 1.0, "learning_rate": 3.329310243009419e-05, "loss": 0.7074, "step": 90530 }, { "epoch": 1.0, "learning_rate": 3.329217970295568e-05, "loss": 0.6988, "step": 90535 }, { "epoch": 1.0, "learning_rate": 3.329125697581717e-05, "loss": 0.7135, "step": 90540 }, { "epoch": 1.0, "learning_rate": 3.3290334248678656e-05, "loss": 0.6404, "step": 90545 }, { "epoch": 1.0, "learning_rate": 3.3289411521540144e-05, "loss": 0.6761, "step": 90550 }, { "epoch": 1.0, "learning_rate": 3.328848879440163e-05, "loss": 0.6948, "step": 90555 }, { "epoch": 1.0, "learning_rate": 3.328756606726312e-05, "loss": 0.758, "step": 90560 }, { "epoch": 1.0, "learning_rate": 3.328664334012461e-05, "loss": 0.6936, "step": 90565 }, { "epoch": 1.0, "learning_rate": 3.3285720612986096e-05, "loss": 0.6642, "step": 90570 }, { "epoch": 1.0, "learning_rate": 3.328479788584758e-05, "loss": 0.6918, "step": 90575 }, { "epoch": 1.0, "learning_rate": 3.328387515870907e-05, "loss": 0.5939, "step": 90580 }, { "epoch": 1.0, "learning_rate": 3.328295243157056e-05, "loss": 0.7426, "step": 90585 }, { "epoch": 1.0, "learning_rate": 3.328202970443205e-05, "loss": 0.6816, "step": 90590 }, { "epoch": 1.0, "learning_rate": 3.328110697729353e-05, "loss": 0.6321, "step": 90595 }, { "epoch": 1.0, "learning_rate": 3.328018425015502e-05, "loss": 0.7209, "step": 90600 }, { "epoch": 1.0, "learning_rate": 3.327926152301651e-05, "loss": 0.659, "step": 90605 }, { "epoch": 1.0, "learning_rate": 3.327833879587799e-05, "loss": 0.6998, "step": 90610 }, { "epoch": 1.0, "learning_rate": 3.327741606873948e-05, "loss": 0.6765, "step": 90615 }, { "epoch": 1.0, "learning_rate": 3.3276493341600974e-05, "loss": 0.6918, "step": 90620 }, { "epoch": 1.0, "learning_rate": 3.327557061446246e-05, "loss": 0.6673, "step": 90625 }, { "epoch": 1.0, "learning_rate": 3.327464788732394e-05, "loss": 0.6576, "step": 90630 }, { "epoch": 1.0, "learning_rate": 3.327372516018543e-05, "loss": 0.6373, "step": 90635 }, { "epoch": 1.0, "learning_rate": 3.327280243304692e-05, "loss": 0.6483, "step": 90640 }, { "epoch": 1.0, "learning_rate": 3.327187970590841e-05, "loss": 0.6576, "step": 90645 }, { "epoch": 1.0, "learning_rate": 3.3270956978769895e-05, "loss": 0.7778, "step": 90650 }, { "epoch": 1.0, "learning_rate": 3.327003425163138e-05, "loss": 0.6321, "step": 90655 }, { "epoch": 1.0, "learning_rate": 3.326911152449287e-05, "loss": 0.7056, "step": 90660 }, { "epoch": 1.0, "learning_rate": 3.326818879735436e-05, "loss": 0.6438, "step": 90665 }, { "epoch": 1.0, "learning_rate": 3.3267266070215846e-05, "loss": 0.7224, "step": 90670 }, { "epoch": 1.0, "learning_rate": 3.3266343343077334e-05, "loss": 0.7091, "step": 90675 }, { "epoch": 1.0, "learning_rate": 3.326542061593882e-05, "loss": 0.6726, "step": 90680 }, { "epoch": 1.0, "learning_rate": 3.326449788880031e-05, "loss": 0.7069, "step": 90685 }, { "epoch": 1.0, "learning_rate": 3.32635751616618e-05, "loss": 0.6817, "step": 90690 }, { "epoch": 1.0, "learning_rate": 3.3262652434523285e-05, "loss": 0.745, "step": 90695 }, { "epoch": 1.0, "learning_rate": 3.326172970738477e-05, "loss": 0.6767, "step": 90700 }, { "epoch": 1.0, "learning_rate": 3.3260806980246254e-05, "loss": 0.636, "step": 90705 }, { "epoch": 1.0, "learning_rate": 3.325988425310775e-05, "loss": 0.6768, "step": 90710 }, { "epoch": 1.0, "learning_rate": 3.3258961525969237e-05, "loss": 0.7086, "step": 90715 }, { "epoch": 1.0, "learning_rate": 3.325803879883072e-05, "loss": 0.6967, "step": 90720 }, { "epoch": 1.0, "learning_rate": 3.3257116071692205e-05, "loss": 0.6755, "step": 90725 }, { "epoch": 1.0, "learning_rate": 3.32561933445537e-05, "loss": 0.6939, "step": 90730 }, { "epoch": 1.0, "learning_rate": 3.325527061741519e-05, "loss": 0.6783, "step": 90735 }, { "epoch": 1.0, "learning_rate": 3.325434789027667e-05, "loss": 0.7425, "step": 90740 }, { "epoch": 1.0, "learning_rate": 3.325342516313816e-05, "loss": 0.7156, "step": 90745 }, { "epoch": 1.0, "learning_rate": 3.325250243599965e-05, "loss": 0.6204, "step": 90750 }, { "epoch": 1.0, "learning_rate": 3.325157970886113e-05, "loss": 0.6597, "step": 90755 }, { "epoch": 1.0, "learning_rate": 3.325065698172262e-05, "loss": 0.739, "step": 90760 }, { "epoch": 1.01, "learning_rate": 3.324973425458411e-05, "loss": 0.7211, "step": 90765 }, { "epoch": 1.01, "learning_rate": 3.32488115274456e-05, "loss": 0.6145, "step": 90770 }, { "epoch": 1.01, "learning_rate": 3.3247888800307084e-05, "loss": 0.6409, "step": 90775 }, { "epoch": 1.01, "learning_rate": 3.324696607316857e-05, "loss": 0.6898, "step": 90780 }, { "epoch": 1.01, "learning_rate": 3.324604334603006e-05, "loss": 0.7136, "step": 90785 }, { "epoch": 1.01, "learning_rate": 3.324512061889155e-05, "loss": 0.7008, "step": 90790 }, { "epoch": 1.01, "learning_rate": 3.3244197891753035e-05, "loss": 0.7291, "step": 90795 }, { "epoch": 1.01, "learning_rate": 3.324327516461452e-05, "loss": 0.6973, "step": 90800 }, { "epoch": 1.01, "learning_rate": 3.324235243747601e-05, "loss": 0.6766, "step": 90805 }, { "epoch": 1.01, "learning_rate": 3.32414297103375e-05, "loss": 0.7059, "step": 90810 }, { "epoch": 1.01, "learning_rate": 3.324050698319899e-05, "loss": 0.7235, "step": 90815 }, { "epoch": 1.01, "learning_rate": 3.3239584256060475e-05, "loss": 0.7117, "step": 90820 }, { "epoch": 1.01, "learning_rate": 3.323866152892196e-05, "loss": 0.678, "step": 90825 }, { "epoch": 1.01, "learning_rate": 3.3237738801783444e-05, "loss": 0.6778, "step": 90830 }, { "epoch": 1.01, "learning_rate": 3.323681607464494e-05, "loss": 0.7022, "step": 90835 }, { "epoch": 1.01, "learning_rate": 3.3235893347506426e-05, "loss": 0.7248, "step": 90840 }, { "epoch": 1.01, "learning_rate": 3.3234970620367914e-05, "loss": 0.7059, "step": 90845 }, { "epoch": 1.01, "learning_rate": 3.3234047893229395e-05, "loss": 0.6912, "step": 90850 }, { "epoch": 1.01, "learning_rate": 3.323312516609088e-05, "loss": 0.6319, "step": 90855 }, { "epoch": 1.01, "learning_rate": 3.323220243895238e-05, "loss": 0.7385, "step": 90860 }, { "epoch": 1.01, "learning_rate": 3.3231279711813865e-05, "loss": 0.7229, "step": 90865 }, { "epoch": 1.01, "learning_rate": 3.3230356984675346e-05, "loss": 0.6555, "step": 90870 }, { "epoch": 1.01, "learning_rate": 3.3229434257536834e-05, "loss": 0.7057, "step": 90875 }, { "epoch": 1.01, "learning_rate": 3.322851153039833e-05, "loss": 0.6809, "step": 90880 }, { "epoch": 1.01, "learning_rate": 3.322758880325981e-05, "loss": 0.6608, "step": 90885 }, { "epoch": 1.01, "learning_rate": 3.32266660761213e-05, "loss": 0.6782, "step": 90890 }, { "epoch": 1.01, "learning_rate": 3.3225743348982786e-05, "loss": 0.7317, "step": 90895 }, { "epoch": 1.01, "learning_rate": 3.322482062184428e-05, "loss": 0.6795, "step": 90900 }, { "epoch": 1.01, "learning_rate": 3.322389789470576e-05, "loss": 0.6585, "step": 90905 }, { "epoch": 1.01, "learning_rate": 3.322297516756725e-05, "loss": 0.6653, "step": 90910 }, { "epoch": 1.01, "learning_rate": 3.322205244042874e-05, "loss": 0.6781, "step": 90915 }, { "epoch": 1.01, "learning_rate": 3.3221129713290225e-05, "loss": 0.7074, "step": 90920 }, { "epoch": 1.01, "learning_rate": 3.322020698615171e-05, "loss": 0.67, "step": 90925 }, { "epoch": 1.01, "learning_rate": 3.32192842590132e-05, "loss": 0.6933, "step": 90930 }, { "epoch": 1.01, "learning_rate": 3.321836153187469e-05, "loss": 0.7156, "step": 90935 }, { "epoch": 1.01, "learning_rate": 3.3217438804736176e-05, "loss": 0.704, "step": 90940 }, { "epoch": 1.01, "learning_rate": 3.3216516077597664e-05, "loss": 0.7139, "step": 90945 }, { "epoch": 1.01, "learning_rate": 3.321559335045915e-05, "loss": 0.657, "step": 90950 }, { "epoch": 1.01, "learning_rate": 3.321467062332064e-05, "loss": 0.6908, "step": 90955 }, { "epoch": 1.01, "learning_rate": 3.321374789618212e-05, "loss": 0.6986, "step": 90960 }, { "epoch": 1.01, "learning_rate": 3.3212825169043615e-05, "loss": 0.6696, "step": 90965 }, { "epoch": 1.01, "learning_rate": 3.32119024419051e-05, "loss": 0.7198, "step": 90970 }, { "epoch": 1.01, "learning_rate": 3.321097971476659e-05, "loss": 0.7008, "step": 90975 }, { "epoch": 1.01, "learning_rate": 3.321005698762807e-05, "loss": 0.6806, "step": 90980 }, { "epoch": 1.01, "learning_rate": 3.320913426048957e-05, "loss": 0.6749, "step": 90985 }, { "epoch": 1.01, "learning_rate": 3.3208211533351055e-05, "loss": 0.6389, "step": 90990 }, { "epoch": 1.01, "learning_rate": 3.3207288806212536e-05, "loss": 0.6783, "step": 90995 }, { "epoch": 1.01, "learning_rate": 3.3206366079074024e-05, "loss": 0.7185, "step": 91000 }, { "epoch": 1.01, "eval_loss": 0.6704195737838745, "eval_runtime": 69.8725, "eval_samples_per_second": 28.624, "eval_steps_per_second": 14.312, "step": 91000 }, { "epoch": 1.01, "learning_rate": 3.320544335193552e-05, "loss": 0.637, "step": 91005 }, { "epoch": 1.01, "learning_rate": 3.3204520624797006e-05, "loss": 0.699, "step": 91010 }, { "epoch": 1.01, "learning_rate": 3.320359789765849e-05, "loss": 0.7257, "step": 91015 }, { "epoch": 1.01, "learning_rate": 3.3202675170519975e-05, "loss": 0.6568, "step": 91020 }, { "epoch": 1.01, "learning_rate": 3.320175244338146e-05, "loss": 0.6627, "step": 91025 }, { "epoch": 1.01, "learning_rate": 3.320082971624295e-05, "loss": 0.6846, "step": 91030 }, { "epoch": 1.01, "learning_rate": 3.319990698910444e-05, "loss": 0.6812, "step": 91035 }, { "epoch": 1.01, "learning_rate": 3.3198984261965926e-05, "loss": 0.6573, "step": 91040 }, { "epoch": 1.01, "learning_rate": 3.3198061534827414e-05, "loss": 0.6296, "step": 91045 }, { "epoch": 1.01, "learning_rate": 3.31971388076889e-05, "loss": 0.715, "step": 91050 }, { "epoch": 1.01, "learning_rate": 3.319621608055039e-05, "loss": 0.6916, "step": 91055 }, { "epoch": 1.01, "learning_rate": 3.319529335341188e-05, "loss": 0.7095, "step": 91060 }, { "epoch": 1.01, "learning_rate": 3.3194370626273366e-05, "loss": 0.7337, "step": 91065 }, { "epoch": 1.01, "learning_rate": 3.3193447899134853e-05, "loss": 0.6816, "step": 91070 }, { "epoch": 1.01, "learning_rate": 3.319252517199634e-05, "loss": 0.6432, "step": 91075 }, { "epoch": 1.01, "learning_rate": 3.319160244485783e-05, "loss": 0.7023, "step": 91080 }, { "epoch": 1.01, "learning_rate": 3.319067971771932e-05, "loss": 0.6528, "step": 91085 }, { "epoch": 1.01, "learning_rate": 3.31897569905808e-05, "loss": 0.7077, "step": 91090 }, { "epoch": 1.01, "learning_rate": 3.318883426344229e-05, "loss": 0.6989, "step": 91095 }, { "epoch": 1.01, "learning_rate": 3.318791153630378e-05, "loss": 0.7, "step": 91100 }, { "epoch": 1.01, "learning_rate": 3.318698880916526e-05, "loss": 0.6685, "step": 91105 }, { "epoch": 1.01, "learning_rate": 3.318606608202675e-05, "loss": 0.6929, "step": 91110 }, { "epoch": 1.01, "learning_rate": 3.3185143354888244e-05, "loss": 0.6487, "step": 91115 }, { "epoch": 1.01, "learning_rate": 3.318422062774973e-05, "loss": 0.6776, "step": 91120 }, { "epoch": 1.01, "learning_rate": 3.318329790061121e-05, "loss": 0.6428, "step": 91125 }, { "epoch": 1.01, "learning_rate": 3.31823751734727e-05, "loss": 0.6166, "step": 91130 }, { "epoch": 1.01, "learning_rate": 3.3181452446334196e-05, "loss": 0.7112, "step": 91135 }, { "epoch": 1.01, "learning_rate": 3.3180529719195677e-05, "loss": 0.6026, "step": 91140 }, { "epoch": 1.01, "learning_rate": 3.3179606992057164e-05, "loss": 0.7702, "step": 91145 }, { "epoch": 1.01, "learning_rate": 3.317868426491865e-05, "loss": 0.7388, "step": 91150 }, { "epoch": 1.01, "learning_rate": 3.317776153778015e-05, "loss": 0.6614, "step": 91155 }, { "epoch": 1.01, "learning_rate": 3.317683881064163e-05, "loss": 0.6994, "step": 91160 }, { "epoch": 1.01, "learning_rate": 3.3175916083503116e-05, "loss": 0.7217, "step": 91165 }, { "epoch": 1.01, "learning_rate": 3.3174993356364604e-05, "loss": 0.7739, "step": 91170 }, { "epoch": 1.01, "learning_rate": 3.317407062922609e-05, "loss": 0.7008, "step": 91175 }, { "epoch": 1.01, "learning_rate": 3.317314790208758e-05, "loss": 0.6726, "step": 91180 }, { "epoch": 1.01, "learning_rate": 3.317222517494907e-05, "loss": 0.7129, "step": 91185 }, { "epoch": 1.01, "learning_rate": 3.3171302447810555e-05, "loss": 0.6528, "step": 91190 }, { "epoch": 1.01, "learning_rate": 3.317037972067204e-05, "loss": 0.6468, "step": 91195 }, { "epoch": 1.01, "learning_rate": 3.316945699353353e-05, "loss": 0.6651, "step": 91200 }, { "epoch": 1.01, "learning_rate": 3.316853426639502e-05, "loss": 0.6657, "step": 91205 }, { "epoch": 1.01, "learning_rate": 3.3167611539256506e-05, "loss": 0.6386, "step": 91210 }, { "epoch": 1.01, "learning_rate": 3.316668881211799e-05, "loss": 0.6948, "step": 91215 }, { "epoch": 1.01, "learning_rate": 3.316576608497948e-05, "loss": 0.6897, "step": 91220 }, { "epoch": 1.01, "learning_rate": 3.316484335784097e-05, "loss": 0.6991, "step": 91225 }, { "epoch": 1.01, "learning_rate": 3.316392063070246e-05, "loss": 0.7305, "step": 91230 }, { "epoch": 1.01, "learning_rate": 3.316299790356394e-05, "loss": 0.6846, "step": 91235 }, { "epoch": 1.01, "learning_rate": 3.316207517642543e-05, "loss": 0.6425, "step": 91240 }, { "epoch": 1.01, "learning_rate": 3.316115244928692e-05, "loss": 0.7234, "step": 91245 }, { "epoch": 1.01, "learning_rate": 3.316022972214841e-05, "loss": 0.6612, "step": 91250 }, { "epoch": 1.01, "learning_rate": 3.315930699500989e-05, "loss": 0.7393, "step": 91255 }, { "epoch": 1.01, "learning_rate": 3.315838426787138e-05, "loss": 0.7107, "step": 91260 }, { "epoch": 1.01, "learning_rate": 3.315746154073287e-05, "loss": 0.7152, "step": 91265 }, { "epoch": 1.01, "learning_rate": 3.3156538813594354e-05, "loss": 0.6265, "step": 91270 }, { "epoch": 1.01, "learning_rate": 3.315561608645584e-05, "loss": 0.7211, "step": 91275 }, { "epoch": 1.01, "learning_rate": 3.315469335931733e-05, "loss": 0.6414, "step": 91280 }, { "epoch": 1.01, "learning_rate": 3.3153770632178824e-05, "loss": 0.6361, "step": 91285 }, { "epoch": 1.01, "learning_rate": 3.3152847905040305e-05, "loss": 0.7289, "step": 91290 }, { "epoch": 1.01, "learning_rate": 3.315192517790179e-05, "loss": 0.6716, "step": 91295 }, { "epoch": 1.01, "learning_rate": 3.315100245076328e-05, "loss": 0.7218, "step": 91300 }, { "epoch": 1.01, "learning_rate": 3.315007972362477e-05, "loss": 0.6801, "step": 91305 }, { "epoch": 1.01, "learning_rate": 3.314915699648626e-05, "loss": 0.7205, "step": 91310 }, { "epoch": 1.01, "learning_rate": 3.3148234269347745e-05, "loss": 0.7267, "step": 91315 }, { "epoch": 1.01, "learning_rate": 3.314731154220923e-05, "loss": 0.7508, "step": 91320 }, { "epoch": 1.01, "learning_rate": 3.314638881507072e-05, "loss": 0.7333, "step": 91325 }, { "epoch": 1.01, "learning_rate": 3.314546608793221e-05, "loss": 0.6924, "step": 91330 }, { "epoch": 1.01, "learning_rate": 3.3144543360793696e-05, "loss": 0.6906, "step": 91335 }, { "epoch": 1.01, "learning_rate": 3.3143620633655184e-05, "loss": 0.6949, "step": 91340 }, { "epoch": 1.01, "learning_rate": 3.3142697906516665e-05, "loss": 0.6872, "step": 91345 }, { "epoch": 1.01, "learning_rate": 3.314177517937816e-05, "loss": 0.6636, "step": 91350 }, { "epoch": 1.01, "learning_rate": 3.314085245223965e-05, "loss": 0.6724, "step": 91355 }, { "epoch": 1.01, "learning_rate": 3.3139929725101135e-05, "loss": 0.6551, "step": 91360 }, { "epoch": 1.01, "learning_rate": 3.3139006997962616e-05, "loss": 0.6817, "step": 91365 }, { "epoch": 1.01, "learning_rate": 3.313808427082411e-05, "loss": 0.7193, "step": 91370 }, { "epoch": 1.01, "learning_rate": 3.31371615436856e-05, "loss": 0.649, "step": 91375 }, { "epoch": 1.01, "learning_rate": 3.313623881654708e-05, "loss": 0.6949, "step": 91380 }, { "epoch": 1.01, "learning_rate": 3.313531608940857e-05, "loss": 0.6414, "step": 91385 }, { "epoch": 1.01, "learning_rate": 3.3134393362270055e-05, "loss": 0.729, "step": 91390 }, { "epoch": 1.01, "learning_rate": 3.313347063513155e-05, "loss": 0.6561, "step": 91395 }, { "epoch": 1.01, "learning_rate": 3.313254790799303e-05, "loss": 0.6311, "step": 91400 }, { "epoch": 1.01, "learning_rate": 3.313162518085452e-05, "loss": 0.7314, "step": 91405 }, { "epoch": 1.01, "learning_rate": 3.313070245371601e-05, "loss": 0.732, "step": 91410 }, { "epoch": 1.01, "learning_rate": 3.3129779726577495e-05, "loss": 0.6787, "step": 91415 }, { "epoch": 1.01, "learning_rate": 3.312885699943898e-05, "loss": 0.6803, "step": 91420 }, { "epoch": 1.01, "learning_rate": 3.312793427230047e-05, "loss": 0.6279, "step": 91425 }, { "epoch": 1.01, "learning_rate": 3.312701154516196e-05, "loss": 0.7144, "step": 91430 }, { "epoch": 1.01, "learning_rate": 3.3126088818023446e-05, "loss": 0.6661, "step": 91435 }, { "epoch": 1.01, "learning_rate": 3.3125166090884934e-05, "loss": 0.6777, "step": 91440 }, { "epoch": 1.01, "learning_rate": 3.312424336374642e-05, "loss": 0.6485, "step": 91445 }, { "epoch": 1.01, "learning_rate": 3.312332063660791e-05, "loss": 0.6509, "step": 91450 }, { "epoch": 1.01, "learning_rate": 3.31223979094694e-05, "loss": 0.6732, "step": 91455 }, { "epoch": 1.01, "learning_rate": 3.3121475182330885e-05, "loss": 0.6944, "step": 91460 }, { "epoch": 1.01, "learning_rate": 3.312055245519237e-05, "loss": 0.7015, "step": 91465 }, { "epoch": 1.01, "learning_rate": 3.311962972805386e-05, "loss": 0.6943, "step": 91470 }, { "epoch": 1.01, "learning_rate": 3.311870700091534e-05, "loss": 0.6647, "step": 91475 }, { "epoch": 1.01, "learning_rate": 3.311778427377684e-05, "loss": 0.692, "step": 91480 }, { "epoch": 1.01, "learning_rate": 3.3116861546638325e-05, "loss": 0.6919, "step": 91485 }, { "epoch": 1.01, "learning_rate": 3.3115938819499806e-05, "loss": 0.7147, "step": 91490 }, { "epoch": 1.01, "learning_rate": 3.3115016092361294e-05, "loss": 0.7027, "step": 91495 }, { "epoch": 1.01, "learning_rate": 3.311409336522279e-05, "loss": 0.6943, "step": 91500 }, { "epoch": 1.01, "learning_rate": 3.3113170638084276e-05, "loss": 0.6676, "step": 91505 }, { "epoch": 1.01, "learning_rate": 3.311224791094576e-05, "loss": 0.6322, "step": 91510 }, { "epoch": 1.01, "learning_rate": 3.3111325183807245e-05, "loss": 0.6887, "step": 91515 }, { "epoch": 1.01, "learning_rate": 3.311040245666874e-05, "loss": 0.6643, "step": 91520 }, { "epoch": 1.01, "learning_rate": 3.310947972953022e-05, "loss": 0.7026, "step": 91525 }, { "epoch": 1.01, "learning_rate": 3.310855700239171e-05, "loss": 0.6494, "step": 91530 }, { "epoch": 1.01, "learning_rate": 3.3107634275253196e-05, "loss": 0.624, "step": 91535 }, { "epoch": 1.01, "learning_rate": 3.3106711548114684e-05, "loss": 0.7295, "step": 91540 }, { "epoch": 1.01, "learning_rate": 3.310578882097617e-05, "loss": 0.6521, "step": 91545 }, { "epoch": 1.01, "learning_rate": 3.310486609383766e-05, "loss": 0.6841, "step": 91550 }, { "epoch": 1.01, "learning_rate": 3.310394336669915e-05, "loss": 0.6586, "step": 91555 }, { "epoch": 1.01, "learning_rate": 3.3103020639560636e-05, "loss": 0.6566, "step": 91560 }, { "epoch": 1.01, "learning_rate": 3.3102097912422123e-05, "loss": 0.6472, "step": 91565 }, { "epoch": 1.01, "learning_rate": 3.310117518528361e-05, "loss": 0.6224, "step": 91570 }, { "epoch": 1.01, "learning_rate": 3.31002524581451e-05, "loss": 0.6605, "step": 91575 }, { "epoch": 1.01, "learning_rate": 3.309932973100659e-05, "loss": 0.6796, "step": 91580 }, { "epoch": 1.01, "learning_rate": 3.3098407003868075e-05, "loss": 0.703, "step": 91585 }, { "epoch": 1.01, "learning_rate": 3.309748427672956e-05, "loss": 0.6677, "step": 91590 }, { "epoch": 1.01, "learning_rate": 3.309656154959105e-05, "loss": 0.6846, "step": 91595 }, { "epoch": 1.01, "learning_rate": 3.309563882245253e-05, "loss": 0.75, "step": 91600 }, { "epoch": 1.01, "learning_rate": 3.3094716095314026e-05, "loss": 0.6309, "step": 91605 }, { "epoch": 1.01, "learning_rate": 3.3093793368175514e-05, "loss": 0.6999, "step": 91610 }, { "epoch": 1.01, "learning_rate": 3.3092870641037e-05, "loss": 0.7175, "step": 91615 }, { "epoch": 1.01, "learning_rate": 3.309194791389848e-05, "loss": 0.6435, "step": 91620 }, { "epoch": 1.01, "learning_rate": 3.309102518675997e-05, "loss": 0.6897, "step": 91625 }, { "epoch": 1.01, "learning_rate": 3.3090102459621465e-05, "loss": 0.7301, "step": 91630 }, { "epoch": 1.01, "learning_rate": 3.308917973248295e-05, "loss": 0.713, "step": 91635 }, { "epoch": 1.01, "learning_rate": 3.3088257005344434e-05, "loss": 0.6721, "step": 91640 }, { "epoch": 1.01, "learning_rate": 3.308733427820592e-05, "loss": 0.6657, "step": 91645 }, { "epoch": 1.01, "learning_rate": 3.308641155106742e-05, "loss": 0.6946, "step": 91650 }, { "epoch": 1.01, "learning_rate": 3.30854888239289e-05, "loss": 0.7093, "step": 91655 }, { "epoch": 1.01, "learning_rate": 3.3084566096790386e-05, "loss": 0.7172, "step": 91660 }, { "epoch": 1.01, "learning_rate": 3.3083643369651874e-05, "loss": 0.6695, "step": 91665 }, { "epoch": 1.02, "learning_rate": 3.308272064251337e-05, "loss": 0.6552, "step": 91670 }, { "epoch": 1.02, "learning_rate": 3.308179791537485e-05, "loss": 0.7132, "step": 91675 }, { "epoch": 1.02, "learning_rate": 3.308087518823634e-05, "loss": 0.7488, "step": 91680 }, { "epoch": 1.02, "learning_rate": 3.3079952461097825e-05, "loss": 0.7306, "step": 91685 }, { "epoch": 1.02, "learning_rate": 3.307902973395931e-05, "loss": 0.6846, "step": 91690 }, { "epoch": 1.02, "learning_rate": 3.30781070068208e-05, "loss": 0.6785, "step": 91695 }, { "epoch": 1.02, "learning_rate": 3.307718427968229e-05, "loss": 0.6599, "step": 91700 }, { "epoch": 1.02, "learning_rate": 3.3076261552543776e-05, "loss": 0.7048, "step": 91705 }, { "epoch": 1.02, "learning_rate": 3.3075338825405264e-05, "loss": 0.694, "step": 91710 }, { "epoch": 1.02, "learning_rate": 3.307441609826675e-05, "loss": 0.6723, "step": 91715 }, { "epoch": 1.02, "learning_rate": 3.307349337112824e-05, "loss": 0.6735, "step": 91720 }, { "epoch": 1.02, "learning_rate": 3.307257064398973e-05, "loss": 0.6717, "step": 91725 }, { "epoch": 1.02, "learning_rate": 3.307164791685121e-05, "loss": 0.6932, "step": 91730 }, { "epoch": 1.02, "learning_rate": 3.3070725189712703e-05, "loss": 0.6715, "step": 91735 }, { "epoch": 1.02, "learning_rate": 3.306980246257419e-05, "loss": 0.6414, "step": 91740 }, { "epoch": 1.02, "learning_rate": 3.306887973543568e-05, "loss": 0.6397, "step": 91745 }, { "epoch": 1.02, "learning_rate": 3.306795700829716e-05, "loss": 0.6547, "step": 91750 }, { "epoch": 1.02, "learning_rate": 3.3067034281158655e-05, "loss": 0.7072, "step": 91755 }, { "epoch": 1.02, "learning_rate": 3.306611155402014e-05, "loss": 0.7175, "step": 91760 }, { "epoch": 1.02, "learning_rate": 3.3065188826881624e-05, "loss": 0.7325, "step": 91765 }, { "epoch": 1.02, "learning_rate": 3.306426609974311e-05, "loss": 0.673, "step": 91770 }, { "epoch": 1.02, "learning_rate": 3.30633433726046e-05, "loss": 0.6844, "step": 91775 }, { "epoch": 1.02, "learning_rate": 3.3062420645466094e-05, "loss": 0.7138, "step": 91780 }, { "epoch": 1.02, "learning_rate": 3.3061497918327575e-05, "loss": 0.6903, "step": 91785 }, { "epoch": 1.02, "learning_rate": 3.306057519118906e-05, "loss": 0.682, "step": 91790 }, { "epoch": 1.02, "learning_rate": 3.305965246405055e-05, "loss": 0.7099, "step": 91795 }, { "epoch": 1.02, "learning_rate": 3.305872973691204e-05, "loss": 0.699, "step": 91800 }, { "epoch": 1.02, "learning_rate": 3.3057807009773527e-05, "loss": 0.7385, "step": 91805 }, { "epoch": 1.02, "learning_rate": 3.3056884282635014e-05, "loss": 0.6521, "step": 91810 }, { "epoch": 1.02, "learning_rate": 3.30559615554965e-05, "loss": 0.6636, "step": 91815 }, { "epoch": 1.02, "learning_rate": 3.305503882835799e-05, "loss": 0.6851, "step": 91820 }, { "epoch": 1.02, "learning_rate": 3.305411610121948e-05, "loss": 0.68, "step": 91825 }, { "epoch": 1.02, "learning_rate": 3.3053193374080966e-05, "loss": 0.6893, "step": 91830 }, { "epoch": 1.02, "learning_rate": 3.3052270646942454e-05, "loss": 0.6687, "step": 91835 }, { "epoch": 1.02, "learning_rate": 3.305134791980394e-05, "loss": 0.6714, "step": 91840 }, { "epoch": 1.02, "learning_rate": 3.305042519266543e-05, "loss": 0.6463, "step": 91845 }, { "epoch": 1.02, "learning_rate": 3.304950246552692e-05, "loss": 0.6816, "step": 91850 }, { "epoch": 1.02, "learning_rate": 3.3048579738388405e-05, "loss": 0.6165, "step": 91855 }, { "epoch": 1.02, "learning_rate": 3.3047657011249886e-05, "loss": 0.6627, "step": 91860 }, { "epoch": 1.02, "learning_rate": 3.304673428411138e-05, "loss": 0.7282, "step": 91865 }, { "epoch": 1.02, "learning_rate": 3.304581155697287e-05, "loss": 0.6962, "step": 91870 }, { "epoch": 1.02, "learning_rate": 3.304488882983435e-05, "loss": 0.6315, "step": 91875 }, { "epoch": 1.02, "learning_rate": 3.304396610269584e-05, "loss": 0.6701, "step": 91880 }, { "epoch": 1.02, "learning_rate": 3.304304337555733e-05, "loss": 0.6572, "step": 91885 }, { "epoch": 1.02, "learning_rate": 3.304212064841882e-05, "loss": 0.7028, "step": 91890 }, { "epoch": 1.02, "learning_rate": 3.30411979212803e-05, "loss": 0.7078, "step": 91895 }, { "epoch": 1.02, "learning_rate": 3.304027519414179e-05, "loss": 0.6939, "step": 91900 }, { "epoch": 1.02, "learning_rate": 3.3039352467003284e-05, "loss": 0.6669, "step": 91905 }, { "epoch": 1.02, "learning_rate": 3.3038429739864765e-05, "loss": 0.6586, "step": 91910 }, { "epoch": 1.02, "learning_rate": 3.303750701272625e-05, "loss": 0.6844, "step": 91915 }, { "epoch": 1.02, "learning_rate": 3.303658428558774e-05, "loss": 0.7147, "step": 91920 }, { "epoch": 1.02, "learning_rate": 3.303566155844923e-05, "loss": 0.6536, "step": 91925 }, { "epoch": 1.02, "learning_rate": 3.3034738831310716e-05, "loss": 0.6876, "step": 91930 }, { "epoch": 1.02, "learning_rate": 3.3033816104172204e-05, "loss": 0.6624, "step": 91935 }, { "epoch": 1.02, "learning_rate": 3.303289337703369e-05, "loss": 0.678, "step": 91940 }, { "epoch": 1.02, "learning_rate": 3.303197064989518e-05, "loss": 0.6521, "step": 91945 }, { "epoch": 1.02, "learning_rate": 3.303104792275667e-05, "loss": 0.6677, "step": 91950 }, { "epoch": 1.02, "learning_rate": 3.3030125195618155e-05, "loss": 0.6811, "step": 91955 }, { "epoch": 1.02, "learning_rate": 3.302920246847964e-05, "loss": 0.6551, "step": 91960 }, { "epoch": 1.02, "learning_rate": 3.302827974134113e-05, "loss": 0.6426, "step": 91965 }, { "epoch": 1.02, "learning_rate": 3.302735701420262e-05, "loss": 0.635, "step": 91970 }, { "epoch": 1.02, "learning_rate": 3.302643428706411e-05, "loss": 0.659, "step": 91975 }, { "epoch": 1.02, "learning_rate": 3.3025511559925595e-05, "loss": 0.7485, "step": 91980 }, { "epoch": 1.02, "learning_rate": 3.3024588832787076e-05, "loss": 0.6842, "step": 91985 }, { "epoch": 1.02, "learning_rate": 3.302366610564857e-05, "loss": 0.7072, "step": 91990 }, { "epoch": 1.02, "learning_rate": 3.302274337851006e-05, "loss": 0.6595, "step": 91995 }, { "epoch": 1.02, "learning_rate": 3.3021820651371546e-05, "loss": 0.7435, "step": 92000 }, { "epoch": 1.02, "eval_loss": 0.6680505275726318, "eval_runtime": 69.6077, "eval_samples_per_second": 28.732, "eval_steps_per_second": 14.366, "step": 92000 }, { "epoch": 1.02, "learning_rate": 3.302089792423303e-05, "loss": 0.6323, "step": 92005 }, { "epoch": 1.02, "learning_rate": 3.3019975197094515e-05, "loss": 0.7155, "step": 92010 }, { "epoch": 1.02, "learning_rate": 3.301905246995601e-05, "loss": 0.6677, "step": 92015 }, { "epoch": 1.02, "learning_rate": 3.30181297428175e-05, "loss": 0.6746, "step": 92020 }, { "epoch": 1.02, "learning_rate": 3.301720701567898e-05, "loss": 0.6387, "step": 92025 }, { "epoch": 1.02, "learning_rate": 3.3016284288540466e-05, "loss": 0.6342, "step": 92030 }, { "epoch": 1.02, "learning_rate": 3.301536156140196e-05, "loss": 0.6412, "step": 92035 }, { "epoch": 1.02, "learning_rate": 3.301443883426344e-05, "loss": 0.6818, "step": 92040 }, { "epoch": 1.02, "learning_rate": 3.301351610712493e-05, "loss": 0.7378, "step": 92045 }, { "epoch": 1.02, "learning_rate": 3.301259337998642e-05, "loss": 0.705, "step": 92050 }, { "epoch": 1.02, "learning_rate": 3.301167065284791e-05, "loss": 0.7209, "step": 92055 }, { "epoch": 1.02, "learning_rate": 3.301074792570939e-05, "loss": 0.6884, "step": 92060 }, { "epoch": 1.02, "learning_rate": 3.300982519857088e-05, "loss": 0.6539, "step": 92065 }, { "epoch": 1.02, "learning_rate": 3.300890247143237e-05, "loss": 0.6931, "step": 92070 }, { "epoch": 1.02, "learning_rate": 3.300797974429386e-05, "loss": 0.7085, "step": 92075 }, { "epoch": 1.02, "learning_rate": 3.3007057017155345e-05, "loss": 0.668, "step": 92080 }, { "epoch": 1.02, "learning_rate": 3.300613429001683e-05, "loss": 0.6543, "step": 92085 }, { "epoch": 1.02, "learning_rate": 3.300521156287832e-05, "loss": 0.6654, "step": 92090 }, { "epoch": 1.02, "learning_rate": 3.300428883573981e-05, "loss": 0.6981, "step": 92095 }, { "epoch": 1.02, "learning_rate": 3.3003366108601296e-05, "loss": 0.7275, "step": 92100 }, { "epoch": 1.02, "learning_rate": 3.3002443381462784e-05, "loss": 0.6964, "step": 92105 }, { "epoch": 1.02, "learning_rate": 3.300152065432427e-05, "loss": 0.6876, "step": 92110 }, { "epoch": 1.02, "learning_rate": 3.300059792718575e-05, "loss": 0.633, "step": 92115 }, { "epoch": 1.02, "learning_rate": 3.299967520004725e-05, "loss": 0.6755, "step": 92120 }, { "epoch": 1.02, "learning_rate": 3.2998752472908735e-05, "loss": 0.6478, "step": 92125 }, { "epoch": 1.02, "learning_rate": 3.299782974577022e-05, "loss": 0.6309, "step": 92130 }, { "epoch": 1.02, "learning_rate": 3.2996907018631704e-05, "loss": 0.703, "step": 92135 }, { "epoch": 1.02, "learning_rate": 3.29959842914932e-05, "loss": 0.6848, "step": 92140 }, { "epoch": 1.02, "learning_rate": 3.299506156435469e-05, "loss": 0.7439, "step": 92145 }, { "epoch": 1.02, "learning_rate": 3.299413883721617e-05, "loss": 0.6874, "step": 92150 }, { "epoch": 1.02, "learning_rate": 3.2993216110077656e-05, "loss": 0.7025, "step": 92155 }, { "epoch": 1.02, "learning_rate": 3.2992293382939144e-05, "loss": 0.7212, "step": 92160 }, { "epoch": 1.02, "learning_rate": 3.299137065580064e-05, "loss": 0.6426, "step": 92165 }, { "epoch": 1.02, "learning_rate": 3.299044792866212e-05, "loss": 0.6798, "step": 92170 }, { "epoch": 1.02, "learning_rate": 3.298952520152361e-05, "loss": 0.6529, "step": 92175 }, { "epoch": 1.02, "learning_rate": 3.2988602474385095e-05, "loss": 0.6661, "step": 92180 }, { "epoch": 1.02, "learning_rate": 3.298767974724658e-05, "loss": 0.7506, "step": 92185 }, { "epoch": 1.02, "learning_rate": 3.298675702010807e-05, "loss": 0.6836, "step": 92190 }, { "epoch": 1.02, "learning_rate": 3.298583429296956e-05, "loss": 0.6842, "step": 92195 }, { "epoch": 1.02, "learning_rate": 3.2984911565831046e-05, "loss": 0.6814, "step": 92200 }, { "epoch": 1.02, "learning_rate": 3.2983988838692534e-05, "loss": 0.6921, "step": 92205 }, { "epoch": 1.02, "learning_rate": 3.298306611155402e-05, "loss": 0.6279, "step": 92210 }, { "epoch": 1.02, "learning_rate": 3.298214338441551e-05, "loss": 0.6708, "step": 92215 }, { "epoch": 1.02, "learning_rate": 3.2981220657277e-05, "loss": 0.7318, "step": 92220 }, { "epoch": 1.02, "learning_rate": 3.298029793013848e-05, "loss": 0.7261, "step": 92225 }, { "epoch": 1.02, "learning_rate": 3.2979375202999973e-05, "loss": 0.6294, "step": 92230 }, { "epoch": 1.02, "learning_rate": 3.297845247586146e-05, "loss": 0.6631, "step": 92235 }, { "epoch": 1.02, "learning_rate": 3.297752974872295e-05, "loss": 0.5933, "step": 92240 }, { "epoch": 1.02, "learning_rate": 3.297660702158443e-05, "loss": 0.6567, "step": 92245 }, { "epoch": 1.02, "learning_rate": 3.2975684294445925e-05, "loss": 0.6762, "step": 92250 }, { "epoch": 1.02, "learning_rate": 3.297476156730741e-05, "loss": 0.6263, "step": 92255 }, { "epoch": 1.02, "learning_rate": 3.2973838840168894e-05, "loss": 0.6393, "step": 92260 }, { "epoch": 1.02, "learning_rate": 3.297291611303038e-05, "loss": 0.6668, "step": 92265 }, { "epoch": 1.02, "learning_rate": 3.2971993385891876e-05, "loss": 0.6221, "step": 92270 }, { "epoch": 1.02, "learning_rate": 3.2971070658753364e-05, "loss": 0.6808, "step": 92275 }, { "epoch": 1.02, "learning_rate": 3.2970147931614845e-05, "loss": 0.6125, "step": 92280 }, { "epoch": 1.02, "learning_rate": 3.296922520447633e-05, "loss": 0.6917, "step": 92285 }, { "epoch": 1.02, "learning_rate": 3.296830247733783e-05, "loss": 0.6477, "step": 92290 }, { "epoch": 1.02, "learning_rate": 3.296737975019931e-05, "loss": 0.7318, "step": 92295 }, { "epoch": 1.02, "learning_rate": 3.2966457023060796e-05, "loss": 0.6316, "step": 92300 }, { "epoch": 1.02, "learning_rate": 3.2965534295922284e-05, "loss": 0.7067, "step": 92305 }, { "epoch": 1.02, "learning_rate": 3.296461156878377e-05, "loss": 0.7594, "step": 92310 }, { "epoch": 1.02, "learning_rate": 3.296368884164526e-05, "loss": 0.6877, "step": 92315 }, { "epoch": 1.02, "learning_rate": 3.296276611450675e-05, "loss": 0.6656, "step": 92320 }, { "epoch": 1.02, "learning_rate": 3.2961843387368236e-05, "loss": 0.6376, "step": 92325 }, { "epoch": 1.02, "learning_rate": 3.2960920660229724e-05, "loss": 0.6561, "step": 92330 }, { "epoch": 1.02, "learning_rate": 3.295999793309121e-05, "loss": 0.6634, "step": 92335 }, { "epoch": 1.02, "learning_rate": 3.29590752059527e-05, "loss": 0.6536, "step": 92340 }, { "epoch": 1.02, "learning_rate": 3.295815247881419e-05, "loss": 0.7445, "step": 92345 }, { "epoch": 1.02, "learning_rate": 3.2957229751675675e-05, "loss": 0.6845, "step": 92350 }, { "epoch": 1.02, "learning_rate": 3.295630702453716e-05, "loss": 0.6864, "step": 92355 }, { "epoch": 1.02, "learning_rate": 3.295538429739865e-05, "loss": 0.6986, "step": 92360 }, { "epoch": 1.02, "learning_rate": 3.295446157026014e-05, "loss": 0.6683, "step": 92365 }, { "epoch": 1.02, "learning_rate": 3.295353884312162e-05, "loss": 0.6822, "step": 92370 }, { "epoch": 1.02, "learning_rate": 3.295261611598311e-05, "loss": 0.6748, "step": 92375 }, { "epoch": 1.02, "learning_rate": 3.29516933888446e-05, "loss": 0.671, "step": 92380 }, { "epoch": 1.02, "learning_rate": 3.295077066170609e-05, "loss": 0.7407, "step": 92385 }, { "epoch": 1.02, "learning_rate": 3.294984793456757e-05, "loss": 0.6684, "step": 92390 }, { "epoch": 1.02, "learning_rate": 3.294892520742906e-05, "loss": 0.7065, "step": 92395 }, { "epoch": 1.02, "learning_rate": 3.2948002480290553e-05, "loss": 0.6669, "step": 92400 }, { "epoch": 1.02, "learning_rate": 3.294707975315204e-05, "loss": 0.7387, "step": 92405 }, { "epoch": 1.02, "learning_rate": 3.294615702601352e-05, "loss": 0.6729, "step": 92410 }, { "epoch": 1.02, "learning_rate": 3.294523429887501e-05, "loss": 0.6392, "step": 92415 }, { "epoch": 1.02, "learning_rate": 3.2944311571736505e-05, "loss": 0.6307, "step": 92420 }, { "epoch": 1.02, "learning_rate": 3.2943388844597986e-05, "loss": 0.6411, "step": 92425 }, { "epoch": 1.02, "learning_rate": 3.2942466117459474e-05, "loss": 0.6632, "step": 92430 }, { "epoch": 1.02, "learning_rate": 3.294154339032096e-05, "loss": 0.6718, "step": 92435 }, { "epoch": 1.02, "learning_rate": 3.2940620663182456e-05, "loss": 0.6342, "step": 92440 }, { "epoch": 1.02, "learning_rate": 3.293969793604394e-05, "loss": 0.6725, "step": 92445 }, { "epoch": 1.02, "learning_rate": 3.2938775208905425e-05, "loss": 0.6991, "step": 92450 }, { "epoch": 1.02, "learning_rate": 3.293785248176691e-05, "loss": 0.6616, "step": 92455 }, { "epoch": 1.02, "learning_rate": 3.29369297546284e-05, "loss": 0.7275, "step": 92460 }, { "epoch": 1.02, "learning_rate": 3.293600702748989e-05, "loss": 0.735, "step": 92465 }, { "epoch": 1.02, "learning_rate": 3.2935084300351377e-05, "loss": 0.6535, "step": 92470 }, { "epoch": 1.02, "learning_rate": 3.2934161573212864e-05, "loss": 0.6942, "step": 92475 }, { "epoch": 1.02, "learning_rate": 3.293323884607435e-05, "loss": 0.6417, "step": 92480 }, { "epoch": 1.02, "learning_rate": 3.293231611893584e-05, "loss": 0.6774, "step": 92485 }, { "epoch": 1.02, "learning_rate": 3.293139339179733e-05, "loss": 0.659, "step": 92490 }, { "epoch": 1.02, "learning_rate": 3.2930470664658816e-05, "loss": 0.6791, "step": 92495 }, { "epoch": 1.02, "learning_rate": 3.29295479375203e-05, "loss": 0.7117, "step": 92500 }, { "epoch": 1.02, "learning_rate": 3.292862521038179e-05, "loss": 0.7051, "step": 92505 }, { "epoch": 1.02, "learning_rate": 3.292770248324328e-05, "loss": 0.691, "step": 92510 }, { "epoch": 1.02, "learning_rate": 3.292677975610477e-05, "loss": 0.7376, "step": 92515 }, { "epoch": 1.02, "learning_rate": 3.292585702896625e-05, "loss": 0.6741, "step": 92520 }, { "epoch": 1.02, "learning_rate": 3.2924934301827736e-05, "loss": 0.7413, "step": 92525 }, { "epoch": 1.02, "learning_rate": 3.292401157468923e-05, "loss": 0.7243, "step": 92530 }, { "epoch": 1.02, "learning_rate": 3.292308884755071e-05, "loss": 0.6741, "step": 92535 }, { "epoch": 1.02, "learning_rate": 3.29221661204122e-05, "loss": 0.6495, "step": 92540 }, { "epoch": 1.02, "learning_rate": 3.292124339327369e-05, "loss": 0.6859, "step": 92545 }, { "epoch": 1.02, "learning_rate": 3.292032066613518e-05, "loss": 0.6517, "step": 92550 }, { "epoch": 1.02, "learning_rate": 3.291939793899666e-05, "loss": 0.7167, "step": 92555 }, { "epoch": 1.02, "learning_rate": 3.291847521185815e-05, "loss": 0.6171, "step": 92560 }, { "epoch": 1.02, "learning_rate": 3.291755248471964e-05, "loss": 0.7106, "step": 92565 }, { "epoch": 1.03, "learning_rate": 3.291662975758113e-05, "loss": 0.6458, "step": 92570 }, { "epoch": 1.03, "learning_rate": 3.2915707030442615e-05, "loss": 0.6657, "step": 92575 }, { "epoch": 1.03, "learning_rate": 3.29147843033041e-05, "loss": 0.6136, "step": 92580 }, { "epoch": 1.03, "learning_rate": 3.291386157616559e-05, "loss": 0.6591, "step": 92585 }, { "epoch": 1.03, "learning_rate": 3.291293884902708e-05, "loss": 0.7006, "step": 92590 }, { "epoch": 1.03, "learning_rate": 3.2912016121888566e-05, "loss": 0.6529, "step": 92595 }, { "epoch": 1.03, "learning_rate": 3.2911093394750054e-05, "loss": 0.7466, "step": 92600 }, { "epoch": 1.03, "learning_rate": 3.291017066761154e-05, "loss": 0.6969, "step": 92605 }, { "epoch": 1.03, "learning_rate": 3.290924794047302e-05, "loss": 0.6524, "step": 92610 }, { "epoch": 1.03, "learning_rate": 3.290832521333452e-05, "loss": 0.6791, "step": 92615 }, { "epoch": 1.03, "learning_rate": 3.2907402486196005e-05, "loss": 0.6675, "step": 92620 }, { "epoch": 1.03, "learning_rate": 3.290647975905749e-05, "loss": 0.7394, "step": 92625 }, { "epoch": 1.03, "learning_rate": 3.2905557031918974e-05, "loss": 0.7051, "step": 92630 }, { "epoch": 1.03, "learning_rate": 3.290463430478047e-05, "loss": 0.6836, "step": 92635 }, { "epoch": 1.03, "learning_rate": 3.290371157764196e-05, "loss": 0.6997, "step": 92640 }, { "epoch": 1.03, "learning_rate": 3.290278885050344e-05, "loss": 0.6981, "step": 92645 }, { "epoch": 1.03, "learning_rate": 3.2901866123364926e-05, "loss": 0.6472, "step": 92650 }, { "epoch": 1.03, "learning_rate": 3.290094339622642e-05, "loss": 0.6278, "step": 92655 }, { "epoch": 1.03, "learning_rate": 3.290002066908791e-05, "loss": 0.7031, "step": 92660 }, { "epoch": 1.03, "learning_rate": 3.289909794194939e-05, "loss": 0.6654, "step": 92665 }, { "epoch": 1.03, "learning_rate": 3.289817521481088e-05, "loss": 0.6273, "step": 92670 }, { "epoch": 1.03, "learning_rate": 3.289725248767237e-05, "loss": 0.6924, "step": 92675 }, { "epoch": 1.03, "learning_rate": 3.289632976053386e-05, "loss": 0.6453, "step": 92680 }, { "epoch": 1.03, "learning_rate": 3.289540703339534e-05, "loss": 0.6513, "step": 92685 }, { "epoch": 1.03, "learning_rate": 3.289448430625683e-05, "loss": 0.7238, "step": 92690 }, { "epoch": 1.03, "learning_rate": 3.2893561579118316e-05, "loss": 0.7413, "step": 92695 }, { "epoch": 1.03, "learning_rate": 3.2892638851979804e-05, "loss": 0.6659, "step": 92700 }, { "epoch": 1.03, "learning_rate": 3.289171612484129e-05, "loss": 0.6496, "step": 92705 }, { "epoch": 1.03, "learning_rate": 3.289079339770278e-05, "loss": 0.6472, "step": 92710 }, { "epoch": 1.03, "learning_rate": 3.288987067056427e-05, "loss": 0.6981, "step": 92715 }, { "epoch": 1.03, "learning_rate": 3.2888947943425755e-05, "loss": 0.6663, "step": 92720 }, { "epoch": 1.03, "learning_rate": 3.288802521628724e-05, "loss": 0.7004, "step": 92725 }, { "epoch": 1.03, "learning_rate": 3.288710248914873e-05, "loss": 0.6499, "step": 92730 }, { "epoch": 1.03, "learning_rate": 3.288617976201022e-05, "loss": 0.6946, "step": 92735 }, { "epoch": 1.03, "learning_rate": 3.288525703487171e-05, "loss": 0.677, "step": 92740 }, { "epoch": 1.03, "learning_rate": 3.2884334307733195e-05, "loss": 0.6657, "step": 92745 }, { "epoch": 1.03, "learning_rate": 3.288341158059468e-05, "loss": 0.6428, "step": 92750 }, { "epoch": 1.03, "learning_rate": 3.288248885345617e-05, "loss": 0.7087, "step": 92755 }, { "epoch": 1.03, "learning_rate": 3.288156612631765e-05, "loss": 0.6947, "step": 92760 }, { "epoch": 1.03, "learning_rate": 3.2880643399179146e-05, "loss": 0.6588, "step": 92765 }, { "epoch": 1.03, "learning_rate": 3.2879720672040634e-05, "loss": 0.7145, "step": 92770 }, { "epoch": 1.03, "learning_rate": 3.2878797944902115e-05, "loss": 0.6633, "step": 92775 }, { "epoch": 1.03, "learning_rate": 3.28778752177636e-05, "loss": 0.7343, "step": 92780 }, { "epoch": 1.03, "learning_rate": 3.28769524906251e-05, "loss": 0.6973, "step": 92785 }, { "epoch": 1.03, "learning_rate": 3.2876029763486585e-05, "loss": 0.6611, "step": 92790 }, { "epoch": 1.03, "learning_rate": 3.2875107036348066e-05, "loss": 0.6536, "step": 92795 }, { "epoch": 1.03, "learning_rate": 3.2874184309209554e-05, "loss": 0.6819, "step": 92800 }, { "epoch": 1.03, "learning_rate": 3.287326158207105e-05, "loss": 0.6705, "step": 92805 }, { "epoch": 1.03, "learning_rate": 3.287233885493253e-05, "loss": 0.7037, "step": 92810 }, { "epoch": 1.03, "learning_rate": 3.287141612779402e-05, "loss": 0.7058, "step": 92815 }, { "epoch": 1.03, "learning_rate": 3.2870493400655506e-05, "loss": 0.6796, "step": 92820 }, { "epoch": 1.03, "learning_rate": 3.2869570673517e-05, "loss": 0.6164, "step": 92825 }, { "epoch": 1.03, "learning_rate": 3.286864794637848e-05, "loss": 0.666, "step": 92830 }, { "epoch": 1.03, "learning_rate": 3.286772521923997e-05, "loss": 0.7317, "step": 92835 }, { "epoch": 1.03, "learning_rate": 3.286680249210146e-05, "loss": 0.6819, "step": 92840 }, { "epoch": 1.03, "learning_rate": 3.2865879764962945e-05, "loss": 0.7452, "step": 92845 }, { "epoch": 1.03, "learning_rate": 3.286495703782443e-05, "loss": 0.6641, "step": 92850 }, { "epoch": 1.03, "learning_rate": 3.286403431068592e-05, "loss": 0.7018, "step": 92855 }, { "epoch": 1.03, "learning_rate": 3.286311158354741e-05, "loss": 0.6409, "step": 92860 }, { "epoch": 1.03, "learning_rate": 3.2862188856408896e-05, "loss": 0.6601, "step": 92865 }, { "epoch": 1.03, "learning_rate": 3.2861266129270384e-05, "loss": 0.6582, "step": 92870 }, { "epoch": 1.03, "learning_rate": 3.286034340213187e-05, "loss": 0.6556, "step": 92875 }, { "epoch": 1.03, "learning_rate": 3.285942067499336e-05, "loss": 0.7073, "step": 92880 }, { "epoch": 1.03, "learning_rate": 3.285849794785484e-05, "loss": 0.6736, "step": 92885 }, { "epoch": 1.03, "learning_rate": 3.2857575220716336e-05, "loss": 0.6777, "step": 92890 }, { "epoch": 1.03, "learning_rate": 3.285665249357782e-05, "loss": 0.7138, "step": 92895 }, { "epoch": 1.03, "learning_rate": 3.285572976643931e-05, "loss": 0.7262, "step": 92900 }, { "epoch": 1.03, "learning_rate": 3.285480703930079e-05, "loss": 0.7045, "step": 92905 }, { "epoch": 1.03, "learning_rate": 3.285388431216228e-05, "loss": 0.7061, "step": 92910 }, { "epoch": 1.03, "learning_rate": 3.2852961585023775e-05, "loss": 0.6961, "step": 92915 }, { "epoch": 1.03, "learning_rate": 3.2852038857885256e-05, "loss": 0.6685, "step": 92920 }, { "epoch": 1.03, "learning_rate": 3.2851116130746744e-05, "loss": 0.6814, "step": 92925 }, { "epoch": 1.03, "learning_rate": 3.285019340360823e-05, "loss": 0.6678, "step": 92930 }, { "epoch": 1.03, "learning_rate": 3.2849270676469726e-05, "loss": 0.6702, "step": 92935 }, { "epoch": 1.03, "learning_rate": 3.284834794933121e-05, "loss": 0.7132, "step": 92940 }, { "epoch": 1.03, "learning_rate": 3.2847425222192695e-05, "loss": 0.6651, "step": 92945 }, { "epoch": 1.03, "learning_rate": 3.284650249505418e-05, "loss": 0.7104, "step": 92950 }, { "epoch": 1.03, "learning_rate": 3.284557976791567e-05, "loss": 0.6369, "step": 92955 }, { "epoch": 1.03, "learning_rate": 3.284465704077716e-05, "loss": 0.6149, "step": 92960 }, { "epoch": 1.03, "learning_rate": 3.2843734313638646e-05, "loss": 0.6289, "step": 92965 }, { "epoch": 1.03, "learning_rate": 3.2842811586500134e-05, "loss": 0.6798, "step": 92970 }, { "epoch": 1.03, "learning_rate": 3.284188885936162e-05, "loss": 0.7124, "step": 92975 }, { "epoch": 1.03, "learning_rate": 3.284096613222311e-05, "loss": 0.7661, "step": 92980 }, { "epoch": 1.03, "learning_rate": 3.28400434050846e-05, "loss": 0.6887, "step": 92985 }, { "epoch": 1.03, "learning_rate": 3.2839120677946086e-05, "loss": 0.7052, "step": 92990 }, { "epoch": 1.03, "learning_rate": 3.283819795080757e-05, "loss": 0.6419, "step": 92995 }, { "epoch": 1.03, "learning_rate": 3.283727522366906e-05, "loss": 0.7108, "step": 93000 }, { "epoch": 1.03, "eval_loss": 0.6619002223014832, "eval_runtime": 69.8061, "eval_samples_per_second": 28.651, "eval_steps_per_second": 14.325, "step": 93000 }, { "epoch": 1.03, "learning_rate": 3.283635249653055e-05, "loss": 0.7188, "step": 93005 }, { "epoch": 1.03, "learning_rate": 3.283542976939204e-05, "loss": 0.6361, "step": 93010 }, { "epoch": 1.03, "learning_rate": 3.283450704225352e-05, "loss": 0.6502, "step": 93015 }, { "epoch": 1.03, "learning_rate": 3.283358431511501e-05, "loss": 0.6563, "step": 93020 }, { "epoch": 1.03, "learning_rate": 3.28326615879765e-05, "loss": 0.668, "step": 93025 }, { "epoch": 1.03, "learning_rate": 3.283173886083798e-05, "loss": 0.6418, "step": 93030 }, { "epoch": 1.03, "learning_rate": 3.283081613369947e-05, "loss": 0.7048, "step": 93035 }, { "epoch": 1.03, "learning_rate": 3.2829893406560964e-05, "loss": 0.6828, "step": 93040 }, { "epoch": 1.03, "learning_rate": 3.282897067942245e-05, "loss": 0.6641, "step": 93045 }, { "epoch": 1.03, "learning_rate": 3.282804795228393e-05, "loss": 0.6772, "step": 93050 }, { "epoch": 1.03, "learning_rate": 3.282712522514542e-05, "loss": 0.6971, "step": 93055 }, { "epoch": 1.03, "learning_rate": 3.282620249800691e-05, "loss": 0.6898, "step": 93060 }, { "epoch": 1.03, "learning_rate": 3.2825279770868403e-05, "loss": 0.7235, "step": 93065 }, { "epoch": 1.03, "learning_rate": 3.2824357043729885e-05, "loss": 0.618, "step": 93070 }, { "epoch": 1.03, "learning_rate": 3.282343431659137e-05, "loss": 0.682, "step": 93075 }, { "epoch": 1.03, "learning_rate": 3.282251158945286e-05, "loss": 0.6685, "step": 93080 }, { "epoch": 1.03, "learning_rate": 3.282158886231435e-05, "loss": 0.72, "step": 93085 }, { "epoch": 1.03, "learning_rate": 3.2820666135175836e-05, "loss": 0.7023, "step": 93090 }, { "epoch": 1.03, "learning_rate": 3.2819743408037324e-05, "loss": 0.6683, "step": 93095 }, { "epoch": 1.03, "learning_rate": 3.281882068089881e-05, "loss": 0.6725, "step": 93100 }, { "epoch": 1.03, "learning_rate": 3.28178979537603e-05, "loss": 0.7675, "step": 93105 }, { "epoch": 1.03, "learning_rate": 3.281697522662179e-05, "loss": 0.7055, "step": 93110 }, { "epoch": 1.03, "learning_rate": 3.2816052499483275e-05, "loss": 0.6437, "step": 93115 }, { "epoch": 1.03, "learning_rate": 3.281512977234476e-05, "loss": 0.6297, "step": 93120 }, { "epoch": 1.03, "learning_rate": 3.281420704520625e-05, "loss": 0.6687, "step": 93125 }, { "epoch": 1.03, "learning_rate": 3.281328431806774e-05, "loss": 0.6662, "step": 93130 }, { "epoch": 1.03, "learning_rate": 3.2812361590929227e-05, "loss": 0.6626, "step": 93135 }, { "epoch": 1.03, "learning_rate": 3.2811438863790714e-05, "loss": 0.7237, "step": 93140 }, { "epoch": 1.03, "learning_rate": 3.2810516136652195e-05, "loss": 0.6828, "step": 93145 }, { "epoch": 1.03, "learning_rate": 3.280959340951369e-05, "loss": 0.7079, "step": 93150 }, { "epoch": 1.03, "learning_rate": 3.280867068237518e-05, "loss": 0.6767, "step": 93155 }, { "epoch": 1.03, "learning_rate": 3.280774795523666e-05, "loss": 0.6091, "step": 93160 }, { "epoch": 1.03, "learning_rate": 3.280682522809815e-05, "loss": 0.7139, "step": 93165 }, { "epoch": 1.03, "learning_rate": 3.280590250095964e-05, "loss": 0.6987, "step": 93170 }, { "epoch": 1.03, "learning_rate": 3.280497977382113e-05, "loss": 0.7302, "step": 93175 }, { "epoch": 1.03, "learning_rate": 3.280405704668261e-05, "loss": 0.7407, "step": 93180 }, { "epoch": 1.03, "learning_rate": 3.28031343195441e-05, "loss": 0.6882, "step": 93185 }, { "epoch": 1.03, "learning_rate": 3.280221159240559e-05, "loss": 0.6461, "step": 93190 }, { "epoch": 1.03, "learning_rate": 3.2801288865267074e-05, "loss": 0.6258, "step": 93195 }, { "epoch": 1.03, "learning_rate": 3.280036613812856e-05, "loss": 0.6386, "step": 93200 }, { "epoch": 1.03, "learning_rate": 3.279944341099005e-05, "loss": 0.6414, "step": 93205 }, { "epoch": 1.03, "learning_rate": 3.279852068385154e-05, "loss": 0.6577, "step": 93210 }, { "epoch": 1.03, "learning_rate": 3.2797597956713025e-05, "loss": 0.6814, "step": 93215 }, { "epoch": 1.03, "learning_rate": 3.279667522957451e-05, "loss": 0.719, "step": 93220 }, { "epoch": 1.03, "learning_rate": 3.2795752502436e-05, "loss": 0.7123, "step": 93225 }, { "epoch": 1.03, "learning_rate": 3.279482977529749e-05, "loss": 0.6279, "step": 93230 }, { "epoch": 1.03, "learning_rate": 3.279390704815898e-05, "loss": 0.6654, "step": 93235 }, { "epoch": 1.03, "learning_rate": 3.2792984321020465e-05, "loss": 0.6728, "step": 93240 }, { "epoch": 1.03, "learning_rate": 3.279206159388195e-05, "loss": 0.6287, "step": 93245 }, { "epoch": 1.03, "learning_rate": 3.279113886674344e-05, "loss": 0.6124, "step": 93250 }, { "epoch": 1.03, "learning_rate": 3.279021613960493e-05, "loss": 0.754, "step": 93255 }, { "epoch": 1.03, "learning_rate": 3.2789293412466416e-05, "loss": 0.6372, "step": 93260 }, { "epoch": 1.03, "learning_rate": 3.2788370685327904e-05, "loss": 0.7049, "step": 93265 }, { "epoch": 1.03, "learning_rate": 3.2787447958189385e-05, "loss": 0.6813, "step": 93270 }, { "epoch": 1.03, "learning_rate": 3.278652523105088e-05, "loss": 0.6862, "step": 93275 }, { "epoch": 1.03, "learning_rate": 3.278560250391237e-05, "loss": 0.6457, "step": 93280 }, { "epoch": 1.03, "learning_rate": 3.2784679776773855e-05, "loss": 0.6833, "step": 93285 }, { "epoch": 1.03, "learning_rate": 3.2783757049635336e-05, "loss": 0.6647, "step": 93290 }, { "epoch": 1.03, "learning_rate": 3.2782834322496824e-05, "loss": 0.7119, "step": 93295 }, { "epoch": 1.03, "learning_rate": 3.278191159535832e-05, "loss": 0.6453, "step": 93300 }, { "epoch": 1.03, "learning_rate": 3.27809888682198e-05, "loss": 0.6743, "step": 93305 }, { "epoch": 1.03, "learning_rate": 3.278006614108129e-05, "loss": 0.6719, "step": 93310 }, { "epoch": 1.03, "learning_rate": 3.2779143413942776e-05, "loss": 0.6968, "step": 93315 }, { "epoch": 1.03, "learning_rate": 3.277822068680427e-05, "loss": 0.6703, "step": 93320 }, { "epoch": 1.03, "learning_rate": 3.277729795966575e-05, "loss": 0.6981, "step": 93325 }, { "epoch": 1.03, "learning_rate": 3.277637523252724e-05, "loss": 0.6405, "step": 93330 }, { "epoch": 1.03, "learning_rate": 3.277545250538873e-05, "loss": 0.7313, "step": 93335 }, { "epoch": 1.03, "learning_rate": 3.2774529778250215e-05, "loss": 0.6578, "step": 93340 }, { "epoch": 1.03, "learning_rate": 3.27736070511117e-05, "loss": 0.6712, "step": 93345 }, { "epoch": 1.03, "learning_rate": 3.277268432397319e-05, "loss": 0.6105, "step": 93350 }, { "epoch": 1.03, "learning_rate": 3.277176159683468e-05, "loss": 0.7238, "step": 93355 }, { "epoch": 1.03, "learning_rate": 3.2770838869696166e-05, "loss": 0.6559, "step": 93360 }, { "epoch": 1.03, "learning_rate": 3.2769916142557654e-05, "loss": 0.7, "step": 93365 }, { "epoch": 1.03, "learning_rate": 3.276899341541914e-05, "loss": 0.6667, "step": 93370 }, { "epoch": 1.03, "learning_rate": 3.276807068828063e-05, "loss": 0.6994, "step": 93375 }, { "epoch": 1.03, "learning_rate": 3.276714796114211e-05, "loss": 0.6939, "step": 93380 }, { "epoch": 1.03, "learning_rate": 3.2766225234003605e-05, "loss": 0.6783, "step": 93385 }, { "epoch": 1.03, "learning_rate": 3.276530250686509e-05, "loss": 0.6668, "step": 93390 }, { "epoch": 1.03, "learning_rate": 3.276437977972658e-05, "loss": 0.662, "step": 93395 }, { "epoch": 1.03, "learning_rate": 3.276345705258806e-05, "loss": 0.6664, "step": 93400 }, { "epoch": 1.03, "learning_rate": 3.276253432544956e-05, "loss": 0.73, "step": 93405 }, { "epoch": 1.03, "learning_rate": 3.2761611598311045e-05, "loss": 0.6419, "step": 93410 }, { "epoch": 1.03, "learning_rate": 3.2760688871172526e-05, "loss": 0.6568, "step": 93415 }, { "epoch": 1.03, "learning_rate": 3.2759766144034014e-05, "loss": 0.6783, "step": 93420 }, { "epoch": 1.03, "learning_rate": 3.275884341689551e-05, "loss": 0.7156, "step": 93425 }, { "epoch": 1.03, "learning_rate": 3.2757920689756996e-05, "loss": 0.735, "step": 93430 }, { "epoch": 1.03, "learning_rate": 3.275699796261848e-05, "loss": 0.6973, "step": 93435 }, { "epoch": 1.03, "learning_rate": 3.2756075235479965e-05, "loss": 0.661, "step": 93440 }, { "epoch": 1.03, "learning_rate": 3.275515250834145e-05, "loss": 0.7451, "step": 93445 }, { "epoch": 1.03, "learning_rate": 3.275422978120295e-05, "loss": 0.6863, "step": 93450 }, { "epoch": 1.03, "learning_rate": 3.275330705406443e-05, "loss": 0.6917, "step": 93455 }, { "epoch": 1.03, "learning_rate": 3.2752384326925916e-05, "loss": 0.7197, "step": 93460 }, { "epoch": 1.03, "learning_rate": 3.2751461599787404e-05, "loss": 0.6444, "step": 93465 }, { "epoch": 1.03, "learning_rate": 3.275053887264889e-05, "loss": 0.6414, "step": 93470 }, { "epoch": 1.04, "learning_rate": 3.274961614551038e-05, "loss": 0.6755, "step": 93475 }, { "epoch": 1.04, "learning_rate": 3.274869341837187e-05, "loss": 0.6881, "step": 93480 }, { "epoch": 1.04, "learning_rate": 3.2747770691233356e-05, "loss": 0.7291, "step": 93485 }, { "epoch": 1.04, "learning_rate": 3.2746847964094843e-05, "loss": 0.668, "step": 93490 }, { "epoch": 1.04, "learning_rate": 3.274592523695633e-05, "loss": 0.6907, "step": 93495 }, { "epoch": 1.04, "learning_rate": 3.274500250981782e-05, "loss": 0.7008, "step": 93500 }, { "epoch": 1.04, "learning_rate": 3.274407978267931e-05, "loss": 0.7021, "step": 93505 }, { "epoch": 1.04, "learning_rate": 3.274315705554079e-05, "loss": 0.6769, "step": 93510 }, { "epoch": 1.04, "learning_rate": 3.274223432840228e-05, "loss": 0.6803, "step": 93515 }, { "epoch": 1.04, "learning_rate": 3.274131160126377e-05, "loss": 0.7203, "step": 93520 }, { "epoch": 1.04, "learning_rate": 3.274038887412526e-05, "loss": 0.6699, "step": 93525 }, { "epoch": 1.04, "learning_rate": 3.273946614698674e-05, "loss": 0.7027, "step": 93530 }, { "epoch": 1.04, "learning_rate": 3.2738543419848234e-05, "loss": 0.7067, "step": 93535 }, { "epoch": 1.04, "learning_rate": 3.273762069270972e-05, "loss": 0.6683, "step": 93540 }, { "epoch": 1.04, "learning_rate": 3.27366979655712e-05, "loss": 0.6553, "step": 93545 }, { "epoch": 1.04, "learning_rate": 3.273577523843269e-05, "loss": 0.6655, "step": 93550 }, { "epoch": 1.04, "learning_rate": 3.2734852511294186e-05, "loss": 0.6512, "step": 93555 }, { "epoch": 1.04, "learning_rate": 3.273392978415567e-05, "loss": 0.7131, "step": 93560 }, { "epoch": 1.04, "learning_rate": 3.2733007057017154e-05, "loss": 0.6302, "step": 93565 }, { "epoch": 1.04, "learning_rate": 3.273208432987864e-05, "loss": 0.672, "step": 93570 }, { "epoch": 1.04, "learning_rate": 3.273116160274014e-05, "loss": 0.6806, "step": 93575 }, { "epoch": 1.04, "learning_rate": 3.273023887560162e-05, "loss": 0.5949, "step": 93580 }, { "epoch": 1.04, "learning_rate": 3.2729316148463106e-05, "loss": 0.6431, "step": 93585 }, { "epoch": 1.04, "learning_rate": 3.2728393421324594e-05, "loss": 0.7512, "step": 93590 }, { "epoch": 1.04, "learning_rate": 3.272747069418608e-05, "loss": 0.6794, "step": 93595 }, { "epoch": 1.04, "learning_rate": 3.272654796704757e-05, "loss": 0.7028, "step": 93600 }, { "epoch": 1.04, "learning_rate": 3.272562523990906e-05, "loss": 0.6451, "step": 93605 }, { "epoch": 1.04, "learning_rate": 3.2724702512770545e-05, "loss": 0.7533, "step": 93610 }, { "epoch": 1.04, "learning_rate": 3.272377978563203e-05, "loss": 0.6797, "step": 93615 }, { "epoch": 1.04, "learning_rate": 3.272285705849352e-05, "loss": 0.7349, "step": 93620 }, { "epoch": 1.04, "learning_rate": 3.272193433135501e-05, "loss": 0.6648, "step": 93625 }, { "epoch": 1.04, "learning_rate": 3.2721011604216496e-05, "loss": 0.7373, "step": 93630 }, { "epoch": 1.04, "learning_rate": 3.2720088877077984e-05, "loss": 0.6658, "step": 93635 }, { "epoch": 1.04, "learning_rate": 3.271916614993947e-05, "loss": 0.6582, "step": 93640 }, { "epoch": 1.04, "learning_rate": 3.271824342280096e-05, "loss": 0.6204, "step": 93645 }, { "epoch": 1.04, "learning_rate": 3.271732069566245e-05, "loss": 0.7255, "step": 93650 }, { "epoch": 1.04, "learning_rate": 3.271639796852393e-05, "loss": 0.646, "step": 93655 }, { "epoch": 1.04, "learning_rate": 3.2715475241385424e-05, "loss": 0.6667, "step": 93660 }, { "epoch": 1.04, "learning_rate": 3.271455251424691e-05, "loss": 0.6933, "step": 93665 }, { "epoch": 1.04, "learning_rate": 3.27136297871084e-05, "loss": 0.6674, "step": 93670 }, { "epoch": 1.04, "learning_rate": 3.271270705996988e-05, "loss": 0.7127, "step": 93675 }, { "epoch": 1.04, "learning_rate": 3.271178433283137e-05, "loss": 0.7146, "step": 93680 }, { "epoch": 1.04, "learning_rate": 3.271086160569286e-05, "loss": 0.6368, "step": 93685 }, { "epoch": 1.04, "learning_rate": 3.2709938878554344e-05, "loss": 0.6878, "step": 93690 }, { "epoch": 1.04, "learning_rate": 3.270901615141583e-05, "loss": 0.7287, "step": 93695 }, { "epoch": 1.04, "learning_rate": 3.270809342427732e-05, "loss": 0.7124, "step": 93700 }, { "epoch": 1.04, "learning_rate": 3.2707170697138814e-05, "loss": 0.6211, "step": 93705 }, { "epoch": 1.04, "learning_rate": 3.2706247970000295e-05, "loss": 0.6453, "step": 93710 }, { "epoch": 1.04, "learning_rate": 3.270532524286178e-05, "loss": 0.6734, "step": 93715 }, { "epoch": 1.04, "learning_rate": 3.270440251572327e-05, "loss": 0.6867, "step": 93720 }, { "epoch": 1.04, "learning_rate": 3.270347978858476e-05, "loss": 0.6814, "step": 93725 }, { "epoch": 1.04, "learning_rate": 3.270255706144625e-05, "loss": 0.6536, "step": 93730 }, { "epoch": 1.04, "learning_rate": 3.2701634334307735e-05, "loss": 0.6953, "step": 93735 }, { "epoch": 1.04, "learning_rate": 3.270071160716922e-05, "loss": 0.7203, "step": 93740 }, { "epoch": 1.04, "learning_rate": 3.269978888003071e-05, "loss": 0.6983, "step": 93745 }, { "epoch": 1.04, "learning_rate": 3.26988661528922e-05, "loss": 0.6999, "step": 93750 }, { "epoch": 1.04, "learning_rate": 3.2697943425753686e-05, "loss": 0.6397, "step": 93755 }, { "epoch": 1.04, "learning_rate": 3.2697020698615174e-05, "loss": 0.6609, "step": 93760 }, { "epoch": 1.04, "learning_rate": 3.2696097971476655e-05, "loss": 0.6181, "step": 93765 }, { "epoch": 1.04, "learning_rate": 3.269517524433815e-05, "loss": 0.6857, "step": 93770 }, { "epoch": 1.04, "learning_rate": 3.269425251719964e-05, "loss": 0.7072, "step": 93775 }, { "epoch": 1.04, "learning_rate": 3.2693329790061125e-05, "loss": 0.6961, "step": 93780 }, { "epoch": 1.04, "learning_rate": 3.2692407062922606e-05, "loss": 0.6795, "step": 93785 }, { "epoch": 1.04, "learning_rate": 3.26914843357841e-05, "loss": 0.6808, "step": 93790 }, { "epoch": 1.04, "learning_rate": 3.269056160864559e-05, "loss": 0.6749, "step": 93795 }, { "epoch": 1.04, "learning_rate": 3.268963888150707e-05, "loss": 0.6367, "step": 93800 }, { "epoch": 1.04, "learning_rate": 3.268871615436856e-05, "loss": 0.6869, "step": 93805 }, { "epoch": 1.04, "learning_rate": 3.268779342723005e-05, "loss": 0.5897, "step": 93810 }, { "epoch": 1.04, "learning_rate": 3.268687070009154e-05, "loss": 0.7005, "step": 93815 }, { "epoch": 1.04, "learning_rate": 3.268594797295302e-05, "loss": 0.7053, "step": 93820 }, { "epoch": 1.04, "learning_rate": 3.268502524581451e-05, "loss": 0.7382, "step": 93825 }, { "epoch": 1.04, "learning_rate": 3.2684102518676e-05, "loss": 0.6689, "step": 93830 }, { "epoch": 1.04, "learning_rate": 3.268317979153749e-05, "loss": 0.6975, "step": 93835 }, { "epoch": 1.04, "learning_rate": 3.268225706439897e-05, "loss": 0.6646, "step": 93840 }, { "epoch": 1.04, "learning_rate": 3.268133433726046e-05, "loss": 0.6657, "step": 93845 }, { "epoch": 1.04, "learning_rate": 3.268041161012195e-05, "loss": 0.699, "step": 93850 }, { "epoch": 1.04, "learning_rate": 3.2679488882983436e-05, "loss": 0.6561, "step": 93855 }, { "epoch": 1.04, "learning_rate": 3.2678566155844924e-05, "loss": 0.6908, "step": 93860 }, { "epoch": 1.04, "learning_rate": 3.267764342870641e-05, "loss": 0.7154, "step": 93865 }, { "epoch": 1.04, "learning_rate": 3.26767207015679e-05, "loss": 0.6402, "step": 93870 }, { "epoch": 1.04, "learning_rate": 3.267579797442939e-05, "loss": 0.7222, "step": 93875 }, { "epoch": 1.04, "learning_rate": 3.2674875247290875e-05, "loss": 0.6864, "step": 93880 }, { "epoch": 1.04, "learning_rate": 3.267395252015236e-05, "loss": 0.7142, "step": 93885 }, { "epoch": 1.04, "learning_rate": 3.267302979301385e-05, "loss": 0.6777, "step": 93890 }, { "epoch": 1.04, "learning_rate": 3.267210706587533e-05, "loss": 0.7253, "step": 93895 }, { "epoch": 1.04, "learning_rate": 3.267118433873683e-05, "loss": 0.6966, "step": 93900 }, { "epoch": 1.04, "learning_rate": 3.2670261611598315e-05, "loss": 0.7032, "step": 93905 }, { "epoch": 1.04, "learning_rate": 3.26693388844598e-05, "loss": 0.6624, "step": 93910 }, { "epoch": 1.04, "learning_rate": 3.2668416157321284e-05, "loss": 0.7181, "step": 93915 }, { "epoch": 1.04, "learning_rate": 3.266749343018278e-05, "loss": 0.7047, "step": 93920 }, { "epoch": 1.04, "learning_rate": 3.2666570703044266e-05, "loss": 0.6164, "step": 93925 }, { "epoch": 1.04, "learning_rate": 3.266564797590575e-05, "loss": 0.749, "step": 93930 }, { "epoch": 1.04, "learning_rate": 3.2664725248767235e-05, "loss": 0.6774, "step": 93935 }, { "epoch": 1.04, "learning_rate": 3.266380252162873e-05, "loss": 0.6959, "step": 93940 }, { "epoch": 1.04, "learning_rate": 3.266287979449022e-05, "loss": 0.6606, "step": 93945 }, { "epoch": 1.04, "learning_rate": 3.26619570673517e-05, "loss": 0.6446, "step": 93950 }, { "epoch": 1.04, "learning_rate": 3.2661034340213186e-05, "loss": 0.6888, "step": 93955 }, { "epoch": 1.04, "learning_rate": 3.266011161307468e-05, "loss": 0.6412, "step": 93960 }, { "epoch": 1.04, "learning_rate": 3.265918888593616e-05, "loss": 0.6855, "step": 93965 }, { "epoch": 1.04, "learning_rate": 3.265826615879765e-05, "loss": 0.6979, "step": 93970 }, { "epoch": 1.04, "learning_rate": 3.265734343165914e-05, "loss": 0.6133, "step": 93975 }, { "epoch": 1.04, "learning_rate": 3.2656420704520626e-05, "loss": 0.6879, "step": 93980 }, { "epoch": 1.04, "learning_rate": 3.2655497977382113e-05, "loss": 0.6619, "step": 93985 }, { "epoch": 1.04, "learning_rate": 3.26545752502436e-05, "loss": 0.673, "step": 93990 }, { "epoch": 1.04, "learning_rate": 3.265365252310509e-05, "loss": 0.7317, "step": 93995 }, { "epoch": 1.04, "learning_rate": 3.265272979596658e-05, "loss": 0.6942, "step": 94000 }, { "epoch": 1.04, "eval_loss": 0.6305755376815796, "eval_runtime": 69.9223, "eval_samples_per_second": 28.603, "eval_steps_per_second": 14.302, "step": 94000 }, { "epoch": 1.04, "learning_rate": 3.2651807068828065e-05, "loss": 0.7436, "step": 94005 }, { "epoch": 1.04, "learning_rate": 3.265088434168955e-05, "loss": 0.6543, "step": 94010 }, { "epoch": 1.04, "learning_rate": 3.264996161455104e-05, "loss": 0.6976, "step": 94015 }, { "epoch": 1.04, "learning_rate": 3.264903888741253e-05, "loss": 0.6713, "step": 94020 }, { "epoch": 1.04, "learning_rate": 3.2648116160274016e-05, "loss": 0.6996, "step": 94025 }, { "epoch": 1.04, "learning_rate": 3.2647193433135504e-05, "loss": 0.7051, "step": 94030 }, { "epoch": 1.04, "learning_rate": 3.264627070599699e-05, "loss": 0.6828, "step": 94035 }, { "epoch": 1.04, "learning_rate": 3.264534797885847e-05, "loss": 0.6208, "step": 94040 }, { "epoch": 1.04, "learning_rate": 3.264442525171996e-05, "loss": 0.7331, "step": 94045 }, { "epoch": 1.04, "learning_rate": 3.2643502524581455e-05, "loss": 0.7321, "step": 94050 }, { "epoch": 1.04, "learning_rate": 3.264257979744294e-05, "loss": 0.6468, "step": 94055 }, { "epoch": 1.04, "learning_rate": 3.2641657070304424e-05, "loss": 0.6612, "step": 94060 }, { "epoch": 1.04, "learning_rate": 3.264073434316591e-05, "loss": 0.6914, "step": 94065 }, { "epoch": 1.04, "learning_rate": 3.263981161602741e-05, "loss": 0.6918, "step": 94070 }, { "epoch": 1.04, "learning_rate": 3.263888888888889e-05, "loss": 0.6564, "step": 94075 }, { "epoch": 1.04, "learning_rate": 3.2637966161750376e-05, "loss": 0.682, "step": 94080 }, { "epoch": 1.04, "learning_rate": 3.2637043434611864e-05, "loss": 0.6466, "step": 94085 }, { "epoch": 1.04, "learning_rate": 3.263612070747336e-05, "loss": 0.7011, "step": 94090 }, { "epoch": 1.04, "learning_rate": 3.263519798033484e-05, "loss": 0.6041, "step": 94095 }, { "epoch": 1.04, "learning_rate": 3.263427525319633e-05, "loss": 0.6745, "step": 94100 }, { "epoch": 1.04, "learning_rate": 3.2633352526057815e-05, "loss": 0.6879, "step": 94105 }, { "epoch": 1.04, "learning_rate": 3.26324297989193e-05, "loss": 0.6841, "step": 94110 }, { "epoch": 1.04, "learning_rate": 3.263150707178079e-05, "loss": 0.6702, "step": 94115 }, { "epoch": 1.04, "learning_rate": 3.263058434464228e-05, "loss": 0.6861, "step": 94120 }, { "epoch": 1.04, "learning_rate": 3.2629661617503766e-05, "loss": 0.6513, "step": 94125 }, { "epoch": 1.04, "learning_rate": 3.2628738890365254e-05, "loss": 0.6824, "step": 94130 }, { "epoch": 1.04, "learning_rate": 3.262781616322674e-05, "loss": 0.6396, "step": 94135 }, { "epoch": 1.04, "learning_rate": 3.262689343608823e-05, "loss": 0.6543, "step": 94140 }, { "epoch": 1.04, "learning_rate": 3.262597070894972e-05, "loss": 0.7597, "step": 94145 }, { "epoch": 1.04, "learning_rate": 3.26250479818112e-05, "loss": 0.7048, "step": 94150 }, { "epoch": 1.04, "learning_rate": 3.2624125254672693e-05, "loss": 0.6582, "step": 94155 }, { "epoch": 1.04, "learning_rate": 3.262320252753418e-05, "loss": 0.6973, "step": 94160 }, { "epoch": 1.04, "learning_rate": 3.262227980039567e-05, "loss": 0.6965, "step": 94165 }, { "epoch": 1.04, "learning_rate": 3.262135707325715e-05, "loss": 0.6881, "step": 94170 }, { "epoch": 1.04, "learning_rate": 3.2620434346118645e-05, "loss": 0.6916, "step": 94175 }, { "epoch": 1.04, "learning_rate": 3.261951161898013e-05, "loss": 0.6672, "step": 94180 }, { "epoch": 1.04, "learning_rate": 3.2618588891841614e-05, "loss": 0.7546, "step": 94185 }, { "epoch": 1.04, "learning_rate": 3.26176661647031e-05, "loss": 0.673, "step": 94190 }, { "epoch": 1.04, "learning_rate": 3.261674343756459e-05, "loss": 0.6196, "step": 94195 }, { "epoch": 1.04, "learning_rate": 3.2615820710426084e-05, "loss": 0.7284, "step": 94200 }, { "epoch": 1.04, "learning_rate": 3.2614897983287565e-05, "loss": 0.7468, "step": 94205 }, { "epoch": 1.04, "learning_rate": 3.261397525614905e-05, "loss": 0.7219, "step": 94210 }, { "epoch": 1.04, "learning_rate": 3.261305252901054e-05, "loss": 0.7176, "step": 94215 }, { "epoch": 1.04, "learning_rate": 3.2612129801872036e-05, "loss": 0.7014, "step": 94220 }, { "epoch": 1.04, "learning_rate": 3.2611207074733517e-05, "loss": 0.7327, "step": 94225 }, { "epoch": 1.04, "learning_rate": 3.2610284347595004e-05, "loss": 0.6966, "step": 94230 }, { "epoch": 1.04, "learning_rate": 3.260936162045649e-05, "loss": 0.7199, "step": 94235 }, { "epoch": 1.04, "learning_rate": 3.260843889331798e-05, "loss": 0.6967, "step": 94240 }, { "epoch": 1.04, "learning_rate": 3.260751616617947e-05, "loss": 0.6614, "step": 94245 }, { "epoch": 1.04, "learning_rate": 3.2606593439040956e-05, "loss": 0.6754, "step": 94250 }, { "epoch": 1.04, "learning_rate": 3.2605670711902444e-05, "loss": 0.7138, "step": 94255 }, { "epoch": 1.04, "learning_rate": 3.260474798476393e-05, "loss": 0.6314, "step": 94260 }, { "epoch": 1.04, "learning_rate": 3.260382525762542e-05, "loss": 0.6913, "step": 94265 }, { "epoch": 1.04, "learning_rate": 3.260290253048691e-05, "loss": 0.6166, "step": 94270 }, { "epoch": 1.04, "learning_rate": 3.2601979803348395e-05, "loss": 0.6139, "step": 94275 }, { "epoch": 1.04, "learning_rate": 3.2601057076209876e-05, "loss": 0.6556, "step": 94280 }, { "epoch": 1.04, "learning_rate": 3.260013434907137e-05, "loss": 0.6398, "step": 94285 }, { "epoch": 1.04, "learning_rate": 3.259921162193286e-05, "loss": 0.7081, "step": 94290 }, { "epoch": 1.04, "learning_rate": 3.2598288894794346e-05, "loss": 0.6683, "step": 94295 }, { "epoch": 1.04, "learning_rate": 3.259736616765583e-05, "loss": 0.6968, "step": 94300 }, { "epoch": 1.04, "learning_rate": 3.259644344051732e-05, "loss": 0.7121, "step": 94305 }, { "epoch": 1.04, "learning_rate": 3.259552071337881e-05, "loss": 0.6984, "step": 94310 }, { "epoch": 1.04, "learning_rate": 3.259459798624029e-05, "loss": 0.6627, "step": 94315 }, { "epoch": 1.04, "learning_rate": 3.259367525910178e-05, "loss": 0.6645, "step": 94320 }, { "epoch": 1.04, "learning_rate": 3.2592752531963274e-05, "loss": 0.773, "step": 94325 }, { "epoch": 1.04, "learning_rate": 3.259182980482476e-05, "loss": 0.6392, "step": 94330 }, { "epoch": 1.04, "learning_rate": 3.259090707768624e-05, "loss": 0.6511, "step": 94335 }, { "epoch": 1.04, "learning_rate": 3.258998435054773e-05, "loss": 0.6809, "step": 94340 }, { "epoch": 1.04, "learning_rate": 3.258906162340922e-05, "loss": 0.7203, "step": 94345 }, { "epoch": 1.04, "learning_rate": 3.2588138896270706e-05, "loss": 0.6241, "step": 94350 }, { "epoch": 1.04, "learning_rate": 3.2587216169132194e-05, "loss": 0.6115, "step": 94355 }, { "epoch": 1.04, "learning_rate": 3.258629344199368e-05, "loss": 0.7019, "step": 94360 }, { "epoch": 1.04, "learning_rate": 3.258537071485517e-05, "loss": 0.6715, "step": 94365 }, { "epoch": 1.04, "learning_rate": 3.258444798771666e-05, "loss": 0.7267, "step": 94370 }, { "epoch": 1.04, "learning_rate": 3.2583525260578145e-05, "loss": 0.6901, "step": 94375 }, { "epoch": 1.05, "learning_rate": 3.258260253343963e-05, "loss": 0.636, "step": 94380 }, { "epoch": 1.05, "learning_rate": 3.258167980630112e-05, "loss": 0.711, "step": 94385 }, { "epoch": 1.05, "learning_rate": 3.258075707916261e-05, "loss": 0.6746, "step": 94390 }, { "epoch": 1.05, "learning_rate": 3.25798343520241e-05, "loss": 0.72, "step": 94395 }, { "epoch": 1.05, "learning_rate": 3.2578911624885585e-05, "loss": 0.6276, "step": 94400 }, { "epoch": 1.05, "learning_rate": 3.257798889774707e-05, "loss": 0.6725, "step": 94405 }, { "epoch": 1.05, "learning_rate": 3.257706617060856e-05, "loss": 0.6805, "step": 94410 }, { "epoch": 1.05, "learning_rate": 3.257614344347005e-05, "loss": 0.6753, "step": 94415 }, { "epoch": 1.05, "learning_rate": 3.2575220716331536e-05, "loss": 0.7456, "step": 94420 }, { "epoch": 1.05, "learning_rate": 3.257429798919302e-05, "loss": 0.7366, "step": 94425 }, { "epoch": 1.05, "learning_rate": 3.2573375262054505e-05, "loss": 0.6433, "step": 94430 }, { "epoch": 1.05, "learning_rate": 3.2572452534916e-05, "loss": 0.6697, "step": 94435 }, { "epoch": 1.05, "learning_rate": 3.257152980777749e-05, "loss": 0.653, "step": 94440 }, { "epoch": 1.05, "learning_rate": 3.257060708063897e-05, "loss": 0.6754, "step": 94445 }, { "epoch": 1.05, "learning_rate": 3.2569684353500456e-05, "loss": 0.6691, "step": 94450 }, { "epoch": 1.05, "learning_rate": 3.256876162636195e-05, "loss": 0.6939, "step": 94455 }, { "epoch": 1.05, "learning_rate": 3.256783889922343e-05, "loss": 0.6656, "step": 94460 }, { "epoch": 1.05, "learning_rate": 3.256691617208492e-05, "loss": 0.7759, "step": 94465 }, { "epoch": 1.05, "learning_rate": 3.256599344494641e-05, "loss": 0.7071, "step": 94470 }, { "epoch": 1.05, "learning_rate": 3.25650707178079e-05, "loss": 0.7727, "step": 94475 }, { "epoch": 1.05, "learning_rate": 3.256414799066938e-05, "loss": 0.6638, "step": 94480 }, { "epoch": 1.05, "learning_rate": 3.256322526353087e-05, "loss": 0.6448, "step": 94485 }, { "epoch": 1.05, "learning_rate": 3.256230253639236e-05, "loss": 0.6724, "step": 94490 }, { "epoch": 1.05, "learning_rate": 3.256137980925385e-05, "loss": 0.6114, "step": 94495 }, { "epoch": 1.05, "learning_rate": 3.2560457082115335e-05, "loss": 0.654, "step": 94500 }, { "epoch": 1.05, "learning_rate": 3.255953435497682e-05, "loss": 0.6523, "step": 94505 }, { "epoch": 1.05, "learning_rate": 3.255861162783831e-05, "loss": 0.6568, "step": 94510 }, { "epoch": 1.05, "learning_rate": 3.25576889006998e-05, "loss": 0.6527, "step": 94515 }, { "epoch": 1.05, "learning_rate": 3.2556766173561286e-05, "loss": 0.6144, "step": 94520 }, { "epoch": 1.05, "learning_rate": 3.2555843446422774e-05, "loss": 0.6731, "step": 94525 }, { "epoch": 1.05, "learning_rate": 3.255492071928426e-05, "loss": 0.6819, "step": 94530 }, { "epoch": 1.05, "learning_rate": 3.255399799214574e-05, "loss": 0.6734, "step": 94535 }, { "epoch": 1.05, "learning_rate": 3.255307526500724e-05, "loss": 0.675, "step": 94540 }, { "epoch": 1.05, "learning_rate": 3.2552152537868725e-05, "loss": 0.6529, "step": 94545 }, { "epoch": 1.05, "learning_rate": 3.255122981073021e-05, "loss": 0.6666, "step": 94550 }, { "epoch": 1.05, "learning_rate": 3.2550307083591694e-05, "loss": 0.654, "step": 94555 }, { "epoch": 1.05, "learning_rate": 3.254938435645319e-05, "loss": 0.6694, "step": 94560 }, { "epoch": 1.05, "learning_rate": 3.254846162931468e-05, "loss": 0.6638, "step": 94565 }, { "epoch": 1.05, "learning_rate": 3.254753890217616e-05, "loss": 0.7111, "step": 94570 }, { "epoch": 1.05, "learning_rate": 3.2546616175037646e-05, "loss": 0.685, "step": 94575 }, { "epoch": 1.05, "learning_rate": 3.2545693447899134e-05, "loss": 0.7003, "step": 94580 }, { "epoch": 1.05, "learning_rate": 3.254477072076063e-05, "loss": 0.6541, "step": 94585 }, { "epoch": 1.05, "learning_rate": 3.254384799362211e-05, "loss": 0.6273, "step": 94590 }, { "epoch": 1.05, "learning_rate": 3.25429252664836e-05, "loss": 0.6987, "step": 94595 }, { "epoch": 1.05, "learning_rate": 3.2542002539345085e-05, "loss": 0.6644, "step": 94600 }, { "epoch": 1.05, "learning_rate": 3.254107981220658e-05, "loss": 0.6747, "step": 94605 }, { "epoch": 1.05, "learning_rate": 3.254015708506806e-05, "loss": 0.6518, "step": 94610 }, { "epoch": 1.05, "learning_rate": 3.253923435792955e-05, "loss": 0.6721, "step": 94615 }, { "epoch": 1.05, "learning_rate": 3.2538311630791036e-05, "loss": 0.6583, "step": 94620 }, { "epoch": 1.05, "learning_rate": 3.2537388903652524e-05, "loss": 0.6547, "step": 94625 }, { "epoch": 1.05, "learning_rate": 3.253646617651401e-05, "loss": 0.6129, "step": 94630 }, { "epoch": 1.05, "learning_rate": 3.25355434493755e-05, "loss": 0.64, "step": 94635 }, { "epoch": 1.05, "learning_rate": 3.253462072223699e-05, "loss": 0.6819, "step": 94640 }, { "epoch": 1.05, "learning_rate": 3.2533697995098476e-05, "loss": 0.7287, "step": 94645 }, { "epoch": 1.05, "learning_rate": 3.2532775267959963e-05, "loss": 0.7019, "step": 94650 }, { "epoch": 1.05, "learning_rate": 3.253185254082145e-05, "loss": 0.7265, "step": 94655 }, { "epoch": 1.05, "learning_rate": 3.253092981368294e-05, "loss": 0.6637, "step": 94660 }, { "epoch": 1.05, "learning_rate": 3.253000708654442e-05, "loss": 0.7286, "step": 94665 }, { "epoch": 1.05, "learning_rate": 3.2529084359405915e-05, "loss": 0.6555, "step": 94670 }, { "epoch": 1.05, "learning_rate": 3.25281616322674e-05, "loss": 0.7093, "step": 94675 }, { "epoch": 1.05, "learning_rate": 3.252723890512889e-05, "loss": 0.682, "step": 94680 }, { "epoch": 1.05, "learning_rate": 3.252631617799037e-05, "loss": 0.7026, "step": 94685 }, { "epoch": 1.05, "learning_rate": 3.2525393450851866e-05, "loss": 0.7061, "step": 94690 }, { "epoch": 1.05, "learning_rate": 3.2524470723713354e-05, "loss": 0.6232, "step": 94695 }, { "epoch": 1.05, "learning_rate": 3.2523547996574835e-05, "loss": 0.7295, "step": 94700 }, { "epoch": 1.05, "learning_rate": 3.252262526943632e-05, "loss": 0.6804, "step": 94705 }, { "epoch": 1.05, "learning_rate": 3.252170254229782e-05, "loss": 0.7131, "step": 94710 }, { "epoch": 1.05, "learning_rate": 3.2520779815159305e-05, "loss": 0.6308, "step": 94715 }, { "epoch": 1.05, "learning_rate": 3.2519857088020787e-05, "loss": 0.6903, "step": 94720 }, { "epoch": 1.05, "learning_rate": 3.2518934360882274e-05, "loss": 0.6788, "step": 94725 }, { "epoch": 1.05, "learning_rate": 3.251801163374376e-05, "loss": 0.6824, "step": 94730 }, { "epoch": 1.05, "learning_rate": 3.251708890660525e-05, "loss": 0.6402, "step": 94735 }, { "epoch": 1.05, "learning_rate": 3.251616617946674e-05, "loss": 0.7101, "step": 94740 }, { "epoch": 1.05, "learning_rate": 3.2515243452328226e-05, "loss": 0.7, "step": 94745 }, { "epoch": 1.05, "learning_rate": 3.2514320725189714e-05, "loss": 0.6997, "step": 94750 }, { "epoch": 1.05, "learning_rate": 3.25133979980512e-05, "loss": 0.7237, "step": 94755 }, { "epoch": 1.05, "learning_rate": 3.251247527091269e-05, "loss": 0.6522, "step": 94760 }, { "epoch": 1.05, "learning_rate": 3.251155254377418e-05, "loss": 0.7116, "step": 94765 }, { "epoch": 1.05, "learning_rate": 3.2510629816635665e-05, "loss": 0.671, "step": 94770 }, { "epoch": 1.05, "learning_rate": 3.250970708949715e-05, "loss": 0.6569, "step": 94775 }, { "epoch": 1.05, "learning_rate": 3.250878436235864e-05, "loss": 0.6894, "step": 94780 }, { "epoch": 1.05, "learning_rate": 3.250786163522013e-05, "loss": 0.6889, "step": 94785 }, { "epoch": 1.05, "learning_rate": 3.2506938908081616e-05, "loss": 0.652, "step": 94790 }, { "epoch": 1.05, "learning_rate": 3.2506016180943104e-05, "loss": 0.7202, "step": 94795 }, { "epoch": 1.05, "learning_rate": 3.250509345380459e-05, "loss": 0.736, "step": 94800 }, { "epoch": 1.05, "learning_rate": 3.250417072666608e-05, "loss": 0.7256, "step": 94805 }, { "epoch": 1.05, "learning_rate": 3.250324799952756e-05, "loss": 0.6954, "step": 94810 }, { "epoch": 1.05, "learning_rate": 3.250232527238905e-05, "loss": 0.6674, "step": 94815 }, { "epoch": 1.05, "learning_rate": 3.2501402545250543e-05, "loss": 0.6587, "step": 94820 }, { "epoch": 1.05, "learning_rate": 3.250047981811203e-05, "loss": 0.696, "step": 94825 }, { "epoch": 1.05, "learning_rate": 3.249955709097351e-05, "loss": 0.7504, "step": 94830 }, { "epoch": 1.05, "learning_rate": 3.2498634363835e-05, "loss": 0.6684, "step": 94835 }, { "epoch": 1.05, "learning_rate": 3.2497711636696495e-05, "loss": 0.6817, "step": 94840 }, { "epoch": 1.05, "learning_rate": 3.2496788909557976e-05, "loss": 0.7019, "step": 94845 }, { "epoch": 1.05, "learning_rate": 3.2495866182419464e-05, "loss": 0.6779, "step": 94850 }, { "epoch": 1.05, "learning_rate": 3.249494345528095e-05, "loss": 0.6837, "step": 94855 }, { "epoch": 1.05, "learning_rate": 3.2494020728142446e-05, "loss": 0.7256, "step": 94860 }, { "epoch": 1.05, "learning_rate": 3.249309800100393e-05, "loss": 0.6695, "step": 94865 }, { "epoch": 1.05, "learning_rate": 3.2492175273865415e-05, "loss": 0.6616, "step": 94870 }, { "epoch": 1.05, "learning_rate": 3.24912525467269e-05, "loss": 0.7448, "step": 94875 }, { "epoch": 1.05, "learning_rate": 3.249032981958839e-05, "loss": 0.6266, "step": 94880 }, { "epoch": 1.05, "learning_rate": 3.248940709244988e-05, "loss": 0.6597, "step": 94885 }, { "epoch": 1.05, "learning_rate": 3.2488484365311367e-05, "loss": 0.6259, "step": 94890 }, { "epoch": 1.05, "learning_rate": 3.2487561638172854e-05, "loss": 0.6627, "step": 94895 }, { "epoch": 1.05, "learning_rate": 3.248663891103434e-05, "loss": 0.6311, "step": 94900 }, { "epoch": 1.05, "learning_rate": 3.248571618389583e-05, "loss": 0.6809, "step": 94905 }, { "epoch": 1.05, "learning_rate": 3.248479345675732e-05, "loss": 0.6675, "step": 94910 }, { "epoch": 1.05, "learning_rate": 3.2483870729618806e-05, "loss": 0.6785, "step": 94915 }, { "epoch": 1.05, "learning_rate": 3.248294800248029e-05, "loss": 0.6359, "step": 94920 }, { "epoch": 1.05, "learning_rate": 3.248202527534178e-05, "loss": 0.689, "step": 94925 }, { "epoch": 1.05, "learning_rate": 3.248110254820327e-05, "loss": 0.6569, "step": 94930 }, { "epoch": 1.05, "learning_rate": 3.248017982106476e-05, "loss": 0.7136, "step": 94935 }, { "epoch": 1.05, "learning_rate": 3.247925709392624e-05, "loss": 0.6787, "step": 94940 }, { "epoch": 1.05, "learning_rate": 3.247833436678773e-05, "loss": 0.6817, "step": 94945 }, { "epoch": 1.05, "learning_rate": 3.247741163964922e-05, "loss": 0.6763, "step": 94950 }, { "epoch": 1.05, "learning_rate": 3.247648891251071e-05, "loss": 0.6225, "step": 94955 }, { "epoch": 1.05, "learning_rate": 3.247556618537219e-05, "loss": 0.6333, "step": 94960 }, { "epoch": 1.05, "learning_rate": 3.247464345823368e-05, "loss": 0.6643, "step": 94965 }, { "epoch": 1.05, "learning_rate": 3.247372073109517e-05, "loss": 0.6716, "step": 94970 }, { "epoch": 1.05, "learning_rate": 3.247279800395665e-05, "loss": 0.7184, "step": 94975 }, { "epoch": 1.05, "learning_rate": 3.247187527681814e-05, "loss": 0.6685, "step": 94980 }, { "epoch": 1.05, "learning_rate": 3.247095254967963e-05, "loss": 0.6419, "step": 94985 }, { "epoch": 1.05, "learning_rate": 3.2470029822541124e-05, "loss": 0.6902, "step": 94990 }, { "epoch": 1.05, "learning_rate": 3.2469107095402605e-05, "loss": 0.6921, "step": 94995 }, { "epoch": 1.05, "learning_rate": 3.246818436826409e-05, "loss": 0.6998, "step": 95000 }, { "epoch": 1.05, "eval_loss": 0.6408737897872925, "eval_runtime": 70.1239, "eval_samples_per_second": 28.521, "eval_steps_per_second": 14.26, "step": 95000 }, { "epoch": 1.05, "learning_rate": 3.246726164112558e-05, "loss": 0.6995, "step": 95005 }, { "epoch": 1.05, "learning_rate": 3.246633891398707e-05, "loss": 0.7116, "step": 95010 }, { "epoch": 1.05, "learning_rate": 3.2465416186848556e-05, "loss": 0.7418, "step": 95015 }, { "epoch": 1.05, "learning_rate": 3.2464493459710044e-05, "loss": 0.7023, "step": 95020 }, { "epoch": 1.05, "learning_rate": 3.246357073257153e-05, "loss": 0.6979, "step": 95025 }, { "epoch": 1.05, "learning_rate": 3.246264800543302e-05, "loss": 0.6877, "step": 95030 }, { "epoch": 1.05, "learning_rate": 3.246172527829451e-05, "loss": 0.6852, "step": 95035 }, { "epoch": 1.05, "learning_rate": 3.2460802551155995e-05, "loss": 0.6855, "step": 95040 }, { "epoch": 1.05, "learning_rate": 3.245987982401748e-05, "loss": 0.6233, "step": 95045 }, { "epoch": 1.05, "learning_rate": 3.2458957096878964e-05, "loss": 0.6253, "step": 95050 }, { "epoch": 1.05, "learning_rate": 3.245803436974046e-05, "loss": 0.6502, "step": 95055 }, { "epoch": 1.05, "learning_rate": 3.245711164260195e-05, "loss": 0.7011, "step": 95060 }, { "epoch": 1.05, "learning_rate": 3.2456188915463435e-05, "loss": 0.7235, "step": 95065 }, { "epoch": 1.05, "learning_rate": 3.2455266188324916e-05, "loss": 0.6452, "step": 95070 }, { "epoch": 1.05, "learning_rate": 3.245434346118641e-05, "loss": 0.6642, "step": 95075 }, { "epoch": 1.05, "learning_rate": 3.24534207340479e-05, "loss": 0.7427, "step": 95080 }, { "epoch": 1.05, "learning_rate": 3.245249800690938e-05, "loss": 0.6896, "step": 95085 }, { "epoch": 1.05, "learning_rate": 3.245157527977087e-05, "loss": 0.6978, "step": 95090 }, { "epoch": 1.05, "learning_rate": 3.245065255263236e-05, "loss": 0.6478, "step": 95095 }, { "epoch": 1.05, "learning_rate": 3.244972982549385e-05, "loss": 0.6563, "step": 95100 }, { "epoch": 1.05, "learning_rate": 3.244880709835533e-05, "loss": 0.6784, "step": 95105 }, { "epoch": 1.05, "learning_rate": 3.244788437121682e-05, "loss": 0.654, "step": 95110 }, { "epoch": 1.05, "learning_rate": 3.2446961644078306e-05, "loss": 0.7268, "step": 95115 }, { "epoch": 1.05, "learning_rate": 3.2446038916939794e-05, "loss": 0.6525, "step": 95120 }, { "epoch": 1.05, "learning_rate": 3.244511618980128e-05, "loss": 0.7134, "step": 95125 }, { "epoch": 1.05, "learning_rate": 3.244419346266277e-05, "loss": 0.6764, "step": 95130 }, { "epoch": 1.05, "learning_rate": 3.244327073552426e-05, "loss": 0.7334, "step": 95135 }, { "epoch": 1.05, "learning_rate": 3.2442348008385745e-05, "loss": 0.679, "step": 95140 }, { "epoch": 1.05, "learning_rate": 3.244142528124723e-05, "loss": 0.6036, "step": 95145 }, { "epoch": 1.05, "learning_rate": 3.244050255410872e-05, "loss": 0.7351, "step": 95150 }, { "epoch": 1.05, "learning_rate": 3.243957982697021e-05, "loss": 0.6699, "step": 95155 }, { "epoch": 1.05, "learning_rate": 3.24386570998317e-05, "loss": 0.706, "step": 95160 }, { "epoch": 1.05, "learning_rate": 3.2437734372693185e-05, "loss": 0.7032, "step": 95165 }, { "epoch": 1.05, "learning_rate": 3.243681164555467e-05, "loss": 0.7177, "step": 95170 }, { "epoch": 1.05, "learning_rate": 3.243588891841616e-05, "loss": 0.6904, "step": 95175 }, { "epoch": 1.05, "learning_rate": 3.243496619127764e-05, "loss": 0.6702, "step": 95180 }, { "epoch": 1.05, "learning_rate": 3.2434043464139136e-05, "loss": 0.7051, "step": 95185 }, { "epoch": 1.05, "learning_rate": 3.2433120737000624e-05, "loss": 0.6374, "step": 95190 }, { "epoch": 1.05, "learning_rate": 3.2432198009862105e-05, "loss": 0.6556, "step": 95195 }, { "epoch": 1.05, "learning_rate": 3.243127528272359e-05, "loss": 0.6866, "step": 95200 }, { "epoch": 1.05, "learning_rate": 3.243035255558509e-05, "loss": 0.6955, "step": 95205 }, { "epoch": 1.05, "learning_rate": 3.2429429828446575e-05, "loss": 0.7669, "step": 95210 }, { "epoch": 1.05, "learning_rate": 3.2428507101308056e-05, "loss": 0.6913, "step": 95215 }, { "epoch": 1.05, "learning_rate": 3.2427584374169544e-05, "loss": 0.6821, "step": 95220 }, { "epoch": 1.05, "learning_rate": 3.242666164703104e-05, "loss": 0.6674, "step": 95225 }, { "epoch": 1.05, "learning_rate": 3.242573891989252e-05, "loss": 0.648, "step": 95230 }, { "epoch": 1.05, "learning_rate": 3.242481619275401e-05, "loss": 0.6924, "step": 95235 }, { "epoch": 1.05, "learning_rate": 3.2423893465615496e-05, "loss": 0.739, "step": 95240 }, { "epoch": 1.05, "learning_rate": 3.242297073847699e-05, "loss": 0.6044, "step": 95245 }, { "epoch": 1.05, "learning_rate": 3.242204801133847e-05, "loss": 0.7361, "step": 95250 }, { "epoch": 1.05, "learning_rate": 3.242112528419996e-05, "loss": 0.7321, "step": 95255 }, { "epoch": 1.05, "learning_rate": 3.242020255706145e-05, "loss": 0.7014, "step": 95260 }, { "epoch": 1.05, "learning_rate": 3.2419279829922935e-05, "loss": 0.7114, "step": 95265 }, { "epoch": 1.05, "learning_rate": 3.241835710278442e-05, "loss": 0.7074, "step": 95270 }, { "epoch": 1.05, "learning_rate": 3.241743437564591e-05, "loss": 0.6604, "step": 95275 }, { "epoch": 1.06, "learning_rate": 3.24165116485074e-05, "loss": 0.6963, "step": 95280 }, { "epoch": 1.06, "learning_rate": 3.2415588921368886e-05, "loss": 0.6116, "step": 95285 }, { "epoch": 1.06, "learning_rate": 3.2414666194230374e-05, "loss": 0.6825, "step": 95290 }, { "epoch": 1.06, "learning_rate": 3.241374346709186e-05, "loss": 0.6365, "step": 95295 }, { "epoch": 1.06, "learning_rate": 3.241282073995335e-05, "loss": 0.7012, "step": 95300 }, { "epoch": 1.06, "learning_rate": 3.241189801281483e-05, "loss": 0.7179, "step": 95305 }, { "epoch": 1.06, "learning_rate": 3.2410975285676326e-05, "loss": 0.6852, "step": 95310 }, { "epoch": 1.06, "learning_rate": 3.2410052558537813e-05, "loss": 0.7198, "step": 95315 }, { "epoch": 1.06, "learning_rate": 3.24091298313993e-05, "loss": 0.6681, "step": 95320 }, { "epoch": 1.06, "learning_rate": 3.240820710426078e-05, "loss": 0.6684, "step": 95325 }, { "epoch": 1.06, "learning_rate": 3.240728437712228e-05, "loss": 0.6573, "step": 95330 }, { "epoch": 1.06, "learning_rate": 3.2406361649983765e-05, "loss": 0.6823, "step": 95335 }, { "epoch": 1.06, "learning_rate": 3.240543892284525e-05, "loss": 0.6481, "step": 95340 }, { "epoch": 1.06, "learning_rate": 3.2404516195706734e-05, "loss": 0.7191, "step": 95345 }, { "epoch": 1.06, "learning_rate": 3.240359346856822e-05, "loss": 0.6781, "step": 95350 }, { "epoch": 1.06, "learning_rate": 3.2402670741429716e-05, "loss": 0.626, "step": 95355 }, { "epoch": 1.06, "learning_rate": 3.24017480142912e-05, "loss": 0.7032, "step": 95360 }, { "epoch": 1.06, "learning_rate": 3.2400825287152685e-05, "loss": 0.7169, "step": 95365 }, { "epoch": 1.06, "learning_rate": 3.239990256001417e-05, "loss": 0.7023, "step": 95370 }, { "epoch": 1.06, "learning_rate": 3.239897983287567e-05, "loss": 0.6486, "step": 95375 }, { "epoch": 1.06, "learning_rate": 3.239805710573715e-05, "loss": 0.7286, "step": 95380 }, { "epoch": 1.06, "learning_rate": 3.2397134378598637e-05, "loss": 0.7182, "step": 95385 }, { "epoch": 1.06, "learning_rate": 3.2396211651460124e-05, "loss": 0.6638, "step": 95390 }, { "epoch": 1.06, "learning_rate": 3.239528892432161e-05, "loss": 0.7286, "step": 95395 }, { "epoch": 1.06, "learning_rate": 3.23943661971831e-05, "loss": 0.6059, "step": 95400 }, { "epoch": 1.06, "learning_rate": 3.239344347004459e-05, "loss": 0.6862, "step": 95405 }, { "epoch": 1.06, "learning_rate": 3.2392520742906076e-05, "loss": 0.6665, "step": 95410 }, { "epoch": 1.06, "learning_rate": 3.2391598015767564e-05, "loss": 0.7216, "step": 95415 }, { "epoch": 1.06, "learning_rate": 3.239067528862905e-05, "loss": 0.6798, "step": 95420 }, { "epoch": 1.06, "learning_rate": 3.238975256149054e-05, "loss": 0.6284, "step": 95425 }, { "epoch": 1.06, "learning_rate": 3.238882983435203e-05, "loss": 0.6564, "step": 95430 }, { "epoch": 1.06, "learning_rate": 3.238790710721351e-05, "loss": 0.6398, "step": 95435 }, { "epoch": 1.06, "learning_rate": 3.2386984380075e-05, "loss": 0.6969, "step": 95440 }, { "epoch": 1.06, "learning_rate": 3.238606165293649e-05, "loss": 0.6662, "step": 95445 }, { "epoch": 1.06, "learning_rate": 3.238513892579798e-05, "loss": 0.6981, "step": 95450 }, { "epoch": 1.06, "learning_rate": 3.238421619865946e-05, "loss": 0.6827, "step": 95455 }, { "epoch": 1.06, "learning_rate": 3.2383293471520954e-05, "loss": 0.6587, "step": 95460 }, { "epoch": 1.06, "learning_rate": 3.238237074438244e-05, "loss": 0.6838, "step": 95465 }, { "epoch": 1.06, "learning_rate": 3.238144801724392e-05, "loss": 0.6779, "step": 95470 }, { "epoch": 1.06, "learning_rate": 3.238052529010541e-05, "loss": 0.7042, "step": 95475 }, { "epoch": 1.06, "learning_rate": 3.2379602562966906e-05, "loss": 0.6607, "step": 95480 }, { "epoch": 1.06, "learning_rate": 3.2378679835828393e-05, "loss": 0.6649, "step": 95485 }, { "epoch": 1.06, "learning_rate": 3.2377757108689875e-05, "loss": 0.7059, "step": 95490 }, { "epoch": 1.06, "learning_rate": 3.237683438155136e-05, "loss": 0.6671, "step": 95495 }, { "epoch": 1.06, "learning_rate": 3.237591165441285e-05, "loss": 0.7395, "step": 95500 }, { "epoch": 1.06, "learning_rate": 3.237498892727434e-05, "loss": 0.6902, "step": 95505 }, { "epoch": 1.06, "learning_rate": 3.2374066200135826e-05, "loss": 0.6653, "step": 95510 }, { "epoch": 1.06, "learning_rate": 3.2373143472997314e-05, "loss": 0.6572, "step": 95515 }, { "epoch": 1.06, "learning_rate": 3.23722207458588e-05, "loss": 0.6536, "step": 95520 }, { "epoch": 1.06, "learning_rate": 3.237129801872029e-05, "loss": 0.6673, "step": 95525 }, { "epoch": 1.06, "learning_rate": 3.237037529158178e-05, "loss": 0.645, "step": 95530 }, { "epoch": 1.06, "learning_rate": 3.2369452564443265e-05, "loss": 0.6248, "step": 95535 }, { "epoch": 1.06, "learning_rate": 3.236852983730475e-05, "loss": 0.6914, "step": 95540 }, { "epoch": 1.06, "learning_rate": 3.236760711016624e-05, "loss": 0.673, "step": 95545 }, { "epoch": 1.06, "learning_rate": 3.236668438302773e-05, "loss": 0.6553, "step": 95550 }, { "epoch": 1.06, "learning_rate": 3.2365761655889217e-05, "loss": 0.6216, "step": 95555 }, { "epoch": 1.06, "learning_rate": 3.2364838928750704e-05, "loss": 0.6533, "step": 95560 }, { "epoch": 1.06, "learning_rate": 3.2363916201612186e-05, "loss": 0.7174, "step": 95565 }, { "epoch": 1.06, "learning_rate": 3.236299347447368e-05, "loss": 0.7099, "step": 95570 }, { "epoch": 1.06, "learning_rate": 3.236207074733517e-05, "loss": 0.639, "step": 95575 }, { "epoch": 1.06, "learning_rate": 3.236114802019665e-05, "loss": 0.6673, "step": 95580 }, { "epoch": 1.06, "learning_rate": 3.236022529305814e-05, "loss": 0.7029, "step": 95585 }, { "epoch": 1.06, "learning_rate": 3.235930256591963e-05, "loss": 0.6561, "step": 95590 }, { "epoch": 1.06, "learning_rate": 3.235837983878112e-05, "loss": 0.6802, "step": 95595 }, { "epoch": 1.06, "learning_rate": 3.23574571116426e-05, "loss": 0.7052, "step": 95600 }, { "epoch": 1.06, "learning_rate": 3.235653438450409e-05, "loss": 0.701, "step": 95605 }, { "epoch": 1.06, "learning_rate": 3.235561165736558e-05, "loss": 0.6954, "step": 95610 }, { "epoch": 1.06, "learning_rate": 3.2354688930227064e-05, "loss": 0.6603, "step": 95615 }, { "epoch": 1.06, "learning_rate": 3.235376620308855e-05, "loss": 0.665, "step": 95620 }, { "epoch": 1.06, "learning_rate": 3.235284347595004e-05, "loss": 0.6635, "step": 95625 }, { "epoch": 1.06, "learning_rate": 3.2351920748811534e-05, "loss": 0.7419, "step": 95630 }, { "epoch": 1.06, "learning_rate": 3.2350998021673015e-05, "loss": 0.6568, "step": 95635 }, { "epoch": 1.06, "learning_rate": 3.23500752945345e-05, "loss": 0.6827, "step": 95640 }, { "epoch": 1.06, "learning_rate": 3.234915256739599e-05, "loss": 0.6964, "step": 95645 }, { "epoch": 1.06, "learning_rate": 3.234822984025748e-05, "loss": 0.6727, "step": 95650 }, { "epoch": 1.06, "learning_rate": 3.234730711311897e-05, "loss": 0.6745, "step": 95655 }, { "epoch": 1.06, "learning_rate": 3.2346384385980455e-05, "loss": 0.672, "step": 95660 }, { "epoch": 1.06, "learning_rate": 3.234546165884194e-05, "loss": 0.6243, "step": 95665 }, { "epoch": 1.06, "learning_rate": 3.234453893170343e-05, "loss": 0.6746, "step": 95670 }, { "epoch": 1.06, "learning_rate": 3.234361620456492e-05, "loss": 0.6581, "step": 95675 }, { "epoch": 1.06, "learning_rate": 3.2342693477426406e-05, "loss": 0.6998, "step": 95680 }, { "epoch": 1.06, "learning_rate": 3.2341770750287894e-05, "loss": 0.6434, "step": 95685 }, { "epoch": 1.06, "learning_rate": 3.2340848023149375e-05, "loss": 0.6273, "step": 95690 }, { "epoch": 1.06, "learning_rate": 3.233992529601087e-05, "loss": 0.6528, "step": 95695 }, { "epoch": 1.06, "learning_rate": 3.233900256887236e-05, "loss": 0.6879, "step": 95700 }, { "epoch": 1.06, "learning_rate": 3.2338079841733845e-05, "loss": 0.6334, "step": 95705 }, { "epoch": 1.06, "learning_rate": 3.2337157114595326e-05, "loss": 0.6655, "step": 95710 }, { "epoch": 1.06, "learning_rate": 3.2336234387456814e-05, "loss": 0.6428, "step": 95715 }, { "epoch": 1.06, "learning_rate": 3.233531166031831e-05, "loss": 0.7185, "step": 95720 }, { "epoch": 1.06, "learning_rate": 3.23343889331798e-05, "loss": 0.6793, "step": 95725 }, { "epoch": 1.06, "learning_rate": 3.233346620604128e-05, "loss": 0.7253, "step": 95730 }, { "epoch": 1.06, "learning_rate": 3.2332543478902766e-05, "loss": 0.6939, "step": 95735 }, { "epoch": 1.06, "learning_rate": 3.233162075176426e-05, "loss": 0.6767, "step": 95740 }, { "epoch": 1.06, "learning_rate": 3.233069802462574e-05, "loss": 0.6671, "step": 95745 }, { "epoch": 1.06, "learning_rate": 3.232977529748723e-05, "loss": 0.7109, "step": 95750 }, { "epoch": 1.06, "learning_rate": 3.232885257034872e-05, "loss": 0.6886, "step": 95755 }, { "epoch": 1.06, "learning_rate": 3.232792984321021e-05, "loss": 0.6465, "step": 95760 }, { "epoch": 1.06, "learning_rate": 3.232700711607169e-05, "loss": 0.6691, "step": 95765 }, { "epoch": 1.06, "learning_rate": 3.232608438893318e-05, "loss": 0.7187, "step": 95770 }, { "epoch": 1.06, "learning_rate": 3.232516166179467e-05, "loss": 0.6911, "step": 95775 }, { "epoch": 1.06, "learning_rate": 3.2324238934656156e-05, "loss": 0.7051, "step": 95780 }, { "epoch": 1.06, "learning_rate": 3.2323316207517644e-05, "loss": 0.6827, "step": 95785 }, { "epoch": 1.06, "learning_rate": 3.232239348037913e-05, "loss": 0.6354, "step": 95790 }, { "epoch": 1.06, "learning_rate": 3.232147075324062e-05, "loss": 0.6465, "step": 95795 }, { "epoch": 1.06, "learning_rate": 3.232054802610211e-05, "loss": 0.6538, "step": 95800 }, { "epoch": 1.06, "learning_rate": 3.2319625298963595e-05, "loss": 0.6346, "step": 95805 }, { "epoch": 1.06, "learning_rate": 3.231870257182508e-05, "loss": 0.6627, "step": 95810 }, { "epoch": 1.06, "learning_rate": 3.231777984468657e-05, "loss": 0.6642, "step": 95815 }, { "epoch": 1.06, "learning_rate": 3.231685711754805e-05, "loss": 0.6177, "step": 95820 }, { "epoch": 1.06, "learning_rate": 3.231593439040955e-05, "loss": 0.6097, "step": 95825 }, { "epoch": 1.06, "learning_rate": 3.2315011663271035e-05, "loss": 0.6729, "step": 95830 }, { "epoch": 1.06, "learning_rate": 3.231408893613252e-05, "loss": 0.6249, "step": 95835 }, { "epoch": 1.06, "learning_rate": 3.2313166208994004e-05, "loss": 0.6628, "step": 95840 }, { "epoch": 1.06, "learning_rate": 3.23122434818555e-05, "loss": 0.6724, "step": 95845 }, { "epoch": 1.06, "learning_rate": 3.2311320754716986e-05, "loss": 0.6741, "step": 95850 }, { "epoch": 1.06, "learning_rate": 3.231039802757847e-05, "loss": 0.6714, "step": 95855 }, { "epoch": 1.06, "learning_rate": 3.2309475300439955e-05, "loss": 0.625, "step": 95860 }, { "epoch": 1.06, "learning_rate": 3.230855257330144e-05, "loss": 0.6347, "step": 95865 }, { "epoch": 1.06, "learning_rate": 3.230762984616294e-05, "loss": 0.6328, "step": 95870 }, { "epoch": 1.06, "learning_rate": 3.230670711902442e-05, "loss": 0.68, "step": 95875 }, { "epoch": 1.06, "learning_rate": 3.2305784391885906e-05, "loss": 0.6158, "step": 95880 }, { "epoch": 1.06, "learning_rate": 3.2304861664747394e-05, "loss": 0.6342, "step": 95885 }, { "epoch": 1.06, "learning_rate": 3.230393893760888e-05, "loss": 0.6671, "step": 95890 }, { "epoch": 1.06, "learning_rate": 3.230301621047037e-05, "loss": 0.6697, "step": 95895 }, { "epoch": 1.06, "learning_rate": 3.230209348333186e-05, "loss": 0.6589, "step": 95900 }, { "epoch": 1.06, "learning_rate": 3.2301170756193346e-05, "loss": 0.6977, "step": 95905 }, { "epoch": 1.06, "learning_rate": 3.2300248029054834e-05, "loss": 0.6718, "step": 95910 }, { "epoch": 1.06, "learning_rate": 3.229932530191632e-05, "loss": 0.6995, "step": 95915 }, { "epoch": 1.06, "learning_rate": 3.229840257477781e-05, "loss": 0.7253, "step": 95920 }, { "epoch": 1.06, "learning_rate": 3.22974798476393e-05, "loss": 0.6714, "step": 95925 }, { "epoch": 1.06, "learning_rate": 3.2296557120500785e-05, "loss": 0.6721, "step": 95930 }, { "epoch": 1.06, "learning_rate": 3.229563439336227e-05, "loss": 0.6362, "step": 95935 }, { "epoch": 1.06, "learning_rate": 3.229471166622376e-05, "loss": 0.6866, "step": 95940 }, { "epoch": 1.06, "learning_rate": 3.229378893908525e-05, "loss": 0.6918, "step": 95945 }, { "epoch": 1.06, "learning_rate": 3.229286621194673e-05, "loss": 0.7475, "step": 95950 }, { "epoch": 1.06, "learning_rate": 3.2291943484808224e-05, "loss": 0.6303, "step": 95955 }, { "epoch": 1.06, "learning_rate": 3.229102075766971e-05, "loss": 0.6481, "step": 95960 }, { "epoch": 1.06, "learning_rate": 3.229009803053119e-05, "loss": 0.6571, "step": 95965 }, { "epoch": 1.06, "learning_rate": 3.228917530339268e-05, "loss": 0.669, "step": 95970 }, { "epoch": 1.06, "learning_rate": 3.2288252576254176e-05, "loss": 0.6628, "step": 95975 }, { "epoch": 1.06, "learning_rate": 3.2287329849115663e-05, "loss": 0.6659, "step": 95980 }, { "epoch": 1.06, "learning_rate": 3.2286407121977144e-05, "loss": 0.682, "step": 95985 }, { "epoch": 1.06, "learning_rate": 3.228548439483863e-05, "loss": 0.7211, "step": 95990 }, { "epoch": 1.06, "learning_rate": 3.228456166770013e-05, "loss": 0.6751, "step": 95995 }, { "epoch": 1.06, "learning_rate": 3.228363894056161e-05, "loss": 0.6481, "step": 96000 }, { "epoch": 1.06, "eval_loss": 0.6476021409034729, "eval_runtime": 70.0887, "eval_samples_per_second": 28.535, "eval_steps_per_second": 14.268, "step": 96000 }, { "epoch": 1.06, "learning_rate": 3.2282716213423096e-05, "loss": 0.637, "step": 96005 }, { "epoch": 1.06, "learning_rate": 3.2281793486284584e-05, "loss": 0.6671, "step": 96010 }, { "epoch": 1.06, "learning_rate": 3.228087075914607e-05, "loss": 0.6811, "step": 96015 }, { "epoch": 1.06, "learning_rate": 3.227994803200756e-05, "loss": 0.6406, "step": 96020 }, { "epoch": 1.06, "learning_rate": 3.227902530486905e-05, "loss": 0.6449, "step": 96025 }, { "epoch": 1.06, "learning_rate": 3.2278102577730535e-05, "loss": 0.6474, "step": 96030 }, { "epoch": 1.06, "learning_rate": 3.227717985059202e-05, "loss": 0.7135, "step": 96035 }, { "epoch": 1.06, "learning_rate": 3.227625712345351e-05, "loss": 0.6416, "step": 96040 }, { "epoch": 1.06, "learning_rate": 3.2275334396315e-05, "loss": 0.6357, "step": 96045 }, { "epoch": 1.06, "learning_rate": 3.2274411669176486e-05, "loss": 0.7004, "step": 96050 }, { "epoch": 1.06, "learning_rate": 3.2273488942037974e-05, "loss": 0.7069, "step": 96055 }, { "epoch": 1.06, "learning_rate": 3.227256621489946e-05, "loss": 0.6479, "step": 96060 }, { "epoch": 1.06, "learning_rate": 3.227164348776095e-05, "loss": 0.6529, "step": 96065 }, { "epoch": 1.06, "learning_rate": 3.227072076062244e-05, "loss": 0.5798, "step": 96070 }, { "epoch": 1.06, "learning_rate": 3.226979803348392e-05, "loss": 0.616, "step": 96075 }, { "epoch": 1.06, "learning_rate": 3.2268875306345414e-05, "loss": 0.6712, "step": 96080 }, { "epoch": 1.06, "learning_rate": 3.22679525792069e-05, "loss": 0.6601, "step": 96085 }, { "epoch": 1.06, "learning_rate": 3.226702985206839e-05, "loss": 0.6816, "step": 96090 }, { "epoch": 1.06, "learning_rate": 3.226610712492987e-05, "loss": 0.696, "step": 96095 }, { "epoch": 1.06, "learning_rate": 3.226518439779136e-05, "loss": 0.6773, "step": 96100 }, { "epoch": 1.06, "learning_rate": 3.226426167065285e-05, "loss": 0.6761, "step": 96105 }, { "epoch": 1.06, "learning_rate": 3.226333894351434e-05, "loss": 0.697, "step": 96110 }, { "epoch": 1.06, "learning_rate": 3.226241621637582e-05, "loss": 0.7111, "step": 96115 }, { "epoch": 1.06, "learning_rate": 3.226149348923731e-05, "loss": 0.6576, "step": 96120 }, { "epoch": 1.06, "learning_rate": 3.2260570762098804e-05, "loss": 0.667, "step": 96125 }, { "epoch": 1.06, "learning_rate": 3.2259648034960285e-05, "loss": 0.7057, "step": 96130 }, { "epoch": 1.06, "learning_rate": 3.225872530782177e-05, "loss": 0.6152, "step": 96135 }, { "epoch": 1.06, "learning_rate": 3.225780258068326e-05, "loss": 0.7115, "step": 96140 }, { "epoch": 1.06, "learning_rate": 3.2256879853544756e-05, "loss": 0.6857, "step": 96145 }, { "epoch": 1.06, "learning_rate": 3.225595712640624e-05, "loss": 0.6803, "step": 96150 }, { "epoch": 1.06, "learning_rate": 3.2255034399267725e-05, "loss": 0.6455, "step": 96155 }, { "epoch": 1.06, "learning_rate": 3.225411167212921e-05, "loss": 0.7409, "step": 96160 }, { "epoch": 1.06, "learning_rate": 3.22531889449907e-05, "loss": 0.648, "step": 96165 }, { "epoch": 1.06, "learning_rate": 3.225226621785219e-05, "loss": 0.6605, "step": 96170 }, { "epoch": 1.06, "learning_rate": 3.2251343490713676e-05, "loss": 0.7377, "step": 96175 }, { "epoch": 1.06, "learning_rate": 3.2250420763575164e-05, "loss": 0.6939, "step": 96180 }, { "epoch": 1.07, "learning_rate": 3.224949803643665e-05, "loss": 0.6831, "step": 96185 }, { "epoch": 1.07, "learning_rate": 3.224857530929814e-05, "loss": 0.6416, "step": 96190 }, { "epoch": 1.07, "learning_rate": 3.224765258215963e-05, "loss": 0.7127, "step": 96195 }, { "epoch": 1.07, "learning_rate": 3.2246729855021115e-05, "loss": 0.6456, "step": 96200 }, { "epoch": 1.07, "learning_rate": 3.2245807127882596e-05, "loss": 0.6289, "step": 96205 }, { "epoch": 1.07, "learning_rate": 3.224488440074409e-05, "loss": 0.6808, "step": 96210 }, { "epoch": 1.07, "learning_rate": 3.224396167360558e-05, "loss": 0.6571, "step": 96215 }, { "epoch": 1.07, "learning_rate": 3.2243038946467067e-05, "loss": 0.6011, "step": 96220 }, { "epoch": 1.07, "learning_rate": 3.224211621932855e-05, "loss": 0.594, "step": 96225 }, { "epoch": 1.07, "learning_rate": 3.224119349219004e-05, "loss": 0.6893, "step": 96230 }, { "epoch": 1.07, "learning_rate": 3.224027076505153e-05, "loss": 0.6698, "step": 96235 }, { "epoch": 1.07, "learning_rate": 3.223934803791301e-05, "loss": 0.6091, "step": 96240 }, { "epoch": 1.07, "learning_rate": 3.22384253107745e-05, "loss": 0.679, "step": 96245 }, { "epoch": 1.07, "learning_rate": 3.223750258363599e-05, "loss": 0.7253, "step": 96250 }, { "epoch": 1.07, "learning_rate": 3.223657985649748e-05, "loss": 0.6417, "step": 96255 }, { "epoch": 1.07, "learning_rate": 3.223565712935896e-05, "loss": 0.7137, "step": 96260 }, { "epoch": 1.07, "learning_rate": 3.223473440222045e-05, "loss": 0.6617, "step": 96265 }, { "epoch": 1.07, "learning_rate": 3.223381167508194e-05, "loss": 0.6406, "step": 96270 }, { "epoch": 1.07, "learning_rate": 3.2232888947943426e-05, "loss": 0.6873, "step": 96275 }, { "epoch": 1.07, "learning_rate": 3.2231966220804914e-05, "loss": 0.6945, "step": 96280 }, { "epoch": 1.07, "learning_rate": 3.22310434936664e-05, "loss": 0.6802, "step": 96285 }, { "epoch": 1.07, "learning_rate": 3.223012076652789e-05, "loss": 0.686, "step": 96290 }, { "epoch": 1.07, "learning_rate": 3.222919803938938e-05, "loss": 0.6527, "step": 96295 }, { "epoch": 1.07, "learning_rate": 3.2228275312250865e-05, "loss": 0.6938, "step": 96300 }, { "epoch": 1.07, "learning_rate": 3.222735258511235e-05, "loss": 0.7218, "step": 96305 }, { "epoch": 1.07, "learning_rate": 3.222642985797384e-05, "loss": 0.6274, "step": 96310 }, { "epoch": 1.07, "learning_rate": 3.222550713083533e-05, "loss": 0.6834, "step": 96315 }, { "epoch": 1.07, "learning_rate": 3.222458440369682e-05, "loss": 0.6801, "step": 96320 }, { "epoch": 1.07, "learning_rate": 3.2223661676558305e-05, "loss": 0.6473, "step": 96325 }, { "epoch": 1.07, "learning_rate": 3.222273894941979e-05, "loss": 0.6436, "step": 96330 }, { "epoch": 1.07, "learning_rate": 3.2221816222281274e-05, "loss": 0.6989, "step": 96335 }, { "epoch": 1.07, "learning_rate": 3.222089349514277e-05, "loss": 0.7002, "step": 96340 }, { "epoch": 1.07, "learning_rate": 3.2219970768004256e-05, "loss": 0.6915, "step": 96345 }, { "epoch": 1.07, "learning_rate": 3.221904804086574e-05, "loss": 0.6806, "step": 96350 }, { "epoch": 1.07, "learning_rate": 3.2218125313727225e-05, "loss": 0.7132, "step": 96355 }, { "epoch": 1.07, "learning_rate": 3.221720258658872e-05, "loss": 0.6618, "step": 96360 }, { "epoch": 1.07, "learning_rate": 3.221627985945021e-05, "loss": 0.6169, "step": 96365 }, { "epoch": 1.07, "learning_rate": 3.221535713231169e-05, "loss": 0.6982, "step": 96370 }, { "epoch": 1.07, "learning_rate": 3.2214434405173176e-05, "loss": 0.7064, "step": 96375 }, { "epoch": 1.07, "learning_rate": 3.221351167803467e-05, "loss": 0.7222, "step": 96380 }, { "epoch": 1.07, "learning_rate": 3.221258895089615e-05, "loss": 0.6876, "step": 96385 }, { "epoch": 1.07, "learning_rate": 3.221166622375764e-05, "loss": 0.6792, "step": 96390 }, { "epoch": 1.07, "learning_rate": 3.221074349661913e-05, "loss": 0.6566, "step": 96395 }, { "epoch": 1.07, "learning_rate": 3.2209820769480616e-05, "loss": 0.6556, "step": 96400 }, { "epoch": 1.07, "learning_rate": 3.2208898042342103e-05, "loss": 0.6575, "step": 96405 }, { "epoch": 1.07, "learning_rate": 3.220797531520359e-05, "loss": 0.688, "step": 96410 }, { "epoch": 1.07, "learning_rate": 3.220705258806508e-05, "loss": 0.6853, "step": 96415 }, { "epoch": 1.07, "learning_rate": 3.220612986092657e-05, "loss": 0.6251, "step": 96420 }, { "epoch": 1.07, "learning_rate": 3.2205207133788055e-05, "loss": 0.6363, "step": 96425 }, { "epoch": 1.07, "learning_rate": 3.220428440664954e-05, "loss": 0.6492, "step": 96430 }, { "epoch": 1.07, "learning_rate": 3.220336167951103e-05, "loss": 0.6857, "step": 96435 }, { "epoch": 1.07, "learning_rate": 3.220243895237252e-05, "loss": 0.6988, "step": 96440 }, { "epoch": 1.07, "learning_rate": 3.2201516225234006e-05, "loss": 0.7191, "step": 96445 }, { "epoch": 1.07, "learning_rate": 3.2200593498095494e-05, "loss": 0.7001, "step": 96450 }, { "epoch": 1.07, "learning_rate": 3.219967077095698e-05, "loss": 0.7401, "step": 96455 }, { "epoch": 1.07, "learning_rate": 3.219874804381846e-05, "loss": 0.6288, "step": 96460 }, { "epoch": 1.07, "learning_rate": 3.219782531667996e-05, "loss": 0.6915, "step": 96465 }, { "epoch": 1.07, "learning_rate": 3.2196902589541445e-05, "loss": 0.6471, "step": 96470 }, { "epoch": 1.07, "learning_rate": 3.219597986240293e-05, "loss": 0.6598, "step": 96475 }, { "epoch": 1.07, "learning_rate": 3.2195057135264414e-05, "loss": 0.7255, "step": 96480 }, { "epoch": 1.07, "learning_rate": 3.21941344081259e-05, "loss": 0.6551, "step": 96485 }, { "epoch": 1.07, "learning_rate": 3.21932116809874e-05, "loss": 0.6818, "step": 96490 }, { "epoch": 1.07, "learning_rate": 3.2192288953848885e-05, "loss": 0.6541, "step": 96495 }, { "epoch": 1.07, "learning_rate": 3.2191366226710366e-05, "loss": 0.7226, "step": 96500 }, { "epoch": 1.07, "learning_rate": 3.2190443499571854e-05, "loss": 0.6547, "step": 96505 }, { "epoch": 1.07, "learning_rate": 3.218952077243335e-05, "loss": 0.6625, "step": 96510 }, { "epoch": 1.07, "learning_rate": 3.218859804529483e-05, "loss": 0.7127, "step": 96515 }, { "epoch": 1.07, "learning_rate": 3.218767531815632e-05, "loss": 0.6811, "step": 96520 }, { "epoch": 1.07, "learning_rate": 3.2186752591017805e-05, "loss": 0.7392, "step": 96525 }, { "epoch": 1.07, "learning_rate": 3.21858298638793e-05, "loss": 0.6691, "step": 96530 }, { "epoch": 1.07, "learning_rate": 3.218490713674078e-05, "loss": 0.7295, "step": 96535 }, { "epoch": 1.07, "learning_rate": 3.218398440960227e-05, "loss": 0.7075, "step": 96540 }, { "epoch": 1.07, "learning_rate": 3.2183061682463756e-05, "loss": 0.6943, "step": 96545 }, { "epoch": 1.07, "learning_rate": 3.2182138955325244e-05, "loss": 0.6967, "step": 96550 }, { "epoch": 1.07, "learning_rate": 3.218121622818673e-05, "loss": 0.6649, "step": 96555 }, { "epoch": 1.07, "learning_rate": 3.218029350104822e-05, "loss": 0.6274, "step": 96560 }, { "epoch": 1.07, "learning_rate": 3.217937077390971e-05, "loss": 0.7087, "step": 96565 }, { "epoch": 1.07, "learning_rate": 3.2178448046771196e-05, "loss": 0.6334, "step": 96570 }, { "epoch": 1.07, "learning_rate": 3.2177525319632684e-05, "loss": 0.7417, "step": 96575 }, { "epoch": 1.07, "learning_rate": 3.217660259249417e-05, "loss": 0.6568, "step": 96580 }, { "epoch": 1.07, "learning_rate": 3.217567986535566e-05, "loss": 0.7299, "step": 96585 }, { "epoch": 1.07, "learning_rate": 3.217475713821714e-05, "loss": 0.6365, "step": 96590 }, { "epoch": 1.07, "learning_rate": 3.2173834411078635e-05, "loss": 0.6943, "step": 96595 }, { "epoch": 1.07, "learning_rate": 3.217291168394012e-05, "loss": 0.6425, "step": 96600 }, { "epoch": 1.07, "learning_rate": 3.217198895680161e-05, "loss": 0.763, "step": 96605 }, { "epoch": 1.07, "learning_rate": 3.217106622966309e-05, "loss": 0.7153, "step": 96610 }, { "epoch": 1.07, "learning_rate": 3.2170143502524586e-05, "loss": 0.726, "step": 96615 }, { "epoch": 1.07, "learning_rate": 3.2169220775386074e-05, "loss": 0.6511, "step": 96620 }, { "epoch": 1.07, "learning_rate": 3.2168298048247555e-05, "loss": 0.7034, "step": 96625 }, { "epoch": 1.07, "learning_rate": 3.216737532110904e-05, "loss": 0.6198, "step": 96630 }, { "epoch": 1.07, "learning_rate": 3.216645259397053e-05, "loss": 0.7294, "step": 96635 }, { "epoch": 1.07, "learning_rate": 3.2165529866832026e-05, "loss": 0.7052, "step": 96640 }, { "epoch": 1.07, "learning_rate": 3.2164607139693507e-05, "loss": 0.7483, "step": 96645 }, { "epoch": 1.07, "learning_rate": 3.2163684412554994e-05, "loss": 0.6549, "step": 96650 }, { "epoch": 1.07, "learning_rate": 3.216276168541648e-05, "loss": 0.6907, "step": 96655 }, { "epoch": 1.07, "learning_rate": 3.216183895827797e-05, "loss": 0.6138, "step": 96660 }, { "epoch": 1.07, "learning_rate": 3.216091623113946e-05, "loss": 0.7133, "step": 96665 }, { "epoch": 1.07, "learning_rate": 3.2159993504000946e-05, "loss": 0.67, "step": 96670 }, { "epoch": 1.07, "learning_rate": 3.2159070776862434e-05, "loss": 0.6571, "step": 96675 }, { "epoch": 1.07, "learning_rate": 3.215814804972392e-05, "loss": 0.6768, "step": 96680 }, { "epoch": 1.07, "learning_rate": 3.215722532258541e-05, "loss": 0.7183, "step": 96685 }, { "epoch": 1.07, "learning_rate": 3.21563025954469e-05, "loss": 0.6408, "step": 96690 }, { "epoch": 1.07, "learning_rate": 3.2155379868308385e-05, "loss": 0.6418, "step": 96695 }, { "epoch": 1.07, "learning_rate": 3.2154457141169866e-05, "loss": 0.6409, "step": 96700 }, { "epoch": 1.07, "learning_rate": 3.215353441403136e-05, "loss": 0.6363, "step": 96705 }, { "epoch": 1.07, "learning_rate": 3.215261168689285e-05, "loss": 0.7127, "step": 96710 }, { "epoch": 1.07, "learning_rate": 3.2151688959754336e-05, "loss": 0.6543, "step": 96715 }, { "epoch": 1.07, "learning_rate": 3.215076623261582e-05, "loss": 0.6548, "step": 96720 }, { "epoch": 1.07, "learning_rate": 3.214984350547731e-05, "loss": 0.6661, "step": 96725 }, { "epoch": 1.07, "learning_rate": 3.21489207783388e-05, "loss": 0.6337, "step": 96730 }, { "epoch": 1.07, "learning_rate": 3.214799805120028e-05, "loss": 0.6668, "step": 96735 }, { "epoch": 1.07, "learning_rate": 3.214707532406177e-05, "loss": 0.6687, "step": 96740 }, { "epoch": 1.07, "learning_rate": 3.2146152596923264e-05, "loss": 0.6536, "step": 96745 }, { "epoch": 1.07, "learning_rate": 3.214522986978475e-05, "loss": 0.6504, "step": 96750 }, { "epoch": 1.07, "learning_rate": 3.214430714264623e-05, "loss": 0.6662, "step": 96755 }, { "epoch": 1.07, "learning_rate": 3.214338441550772e-05, "loss": 0.6874, "step": 96760 }, { "epoch": 1.07, "learning_rate": 3.2142461688369215e-05, "loss": 0.6464, "step": 96765 }, { "epoch": 1.07, "learning_rate": 3.2141538961230696e-05, "loss": 0.6068, "step": 96770 }, { "epoch": 1.07, "learning_rate": 3.2140616234092184e-05, "loss": 0.6895, "step": 96775 }, { "epoch": 1.07, "learning_rate": 3.213969350695367e-05, "loss": 0.6666, "step": 96780 }, { "epoch": 1.07, "learning_rate": 3.213877077981516e-05, "loss": 0.6956, "step": 96785 }, { "epoch": 1.07, "learning_rate": 3.213784805267665e-05, "loss": 0.6732, "step": 96790 }, { "epoch": 1.07, "learning_rate": 3.2136925325538135e-05, "loss": 0.7329, "step": 96795 }, { "epoch": 1.07, "learning_rate": 3.213600259839962e-05, "loss": 0.6495, "step": 96800 }, { "epoch": 1.07, "learning_rate": 3.213507987126111e-05, "loss": 0.6788, "step": 96805 }, { "epoch": 1.07, "learning_rate": 3.21341571441226e-05, "loss": 0.683, "step": 96810 }, { "epoch": 1.07, "learning_rate": 3.213323441698409e-05, "loss": 0.6353, "step": 96815 }, { "epoch": 1.07, "learning_rate": 3.2132311689845575e-05, "loss": 0.6788, "step": 96820 }, { "epoch": 1.07, "learning_rate": 3.213138896270706e-05, "loss": 0.7031, "step": 96825 }, { "epoch": 1.07, "learning_rate": 3.213046623556855e-05, "loss": 0.6785, "step": 96830 }, { "epoch": 1.07, "learning_rate": 3.212954350843004e-05, "loss": 0.7007, "step": 96835 }, { "epoch": 1.07, "learning_rate": 3.2128620781291526e-05, "loss": 0.679, "step": 96840 }, { "epoch": 1.07, "learning_rate": 3.212769805415301e-05, "loss": 0.7219, "step": 96845 }, { "epoch": 1.07, "learning_rate": 3.2126775327014495e-05, "loss": 0.6632, "step": 96850 }, { "epoch": 1.07, "learning_rate": 3.212585259987599e-05, "loss": 0.6919, "step": 96855 }, { "epoch": 1.07, "learning_rate": 3.212492987273748e-05, "loss": 0.6847, "step": 96860 }, { "epoch": 1.07, "learning_rate": 3.212400714559896e-05, "loss": 0.6527, "step": 96865 }, { "epoch": 1.07, "learning_rate": 3.2123084418460446e-05, "loss": 0.6528, "step": 96870 }, { "epoch": 1.07, "learning_rate": 3.212216169132194e-05, "loss": 0.6861, "step": 96875 }, { "epoch": 1.07, "learning_rate": 3.212123896418343e-05, "loss": 0.6578, "step": 96880 }, { "epoch": 1.07, "learning_rate": 3.212031623704491e-05, "loss": 0.6509, "step": 96885 }, { "epoch": 1.07, "learning_rate": 3.21193935099064e-05, "loss": 0.6687, "step": 96890 }, { "epoch": 1.07, "learning_rate": 3.211847078276789e-05, "loss": 0.6645, "step": 96895 }, { "epoch": 1.07, "learning_rate": 3.211754805562937e-05, "loss": 0.6166, "step": 96900 }, { "epoch": 1.07, "learning_rate": 3.211662532849086e-05, "loss": 0.6684, "step": 96905 }, { "epoch": 1.07, "learning_rate": 3.211570260135235e-05, "loss": 0.6822, "step": 96910 }, { "epoch": 1.07, "learning_rate": 3.2114779874213844e-05, "loss": 0.7482, "step": 96915 }, { "epoch": 1.07, "learning_rate": 3.2113857147075325e-05, "loss": 0.6163, "step": 96920 }, { "epoch": 1.07, "learning_rate": 3.211293441993681e-05, "loss": 0.6085, "step": 96925 }, { "epoch": 1.07, "learning_rate": 3.21120116927983e-05, "loss": 0.6471, "step": 96930 }, { "epoch": 1.07, "learning_rate": 3.211108896565979e-05, "loss": 0.6564, "step": 96935 }, { "epoch": 1.07, "learning_rate": 3.2110166238521276e-05, "loss": 0.7107, "step": 96940 }, { "epoch": 1.07, "learning_rate": 3.2109243511382764e-05, "loss": 0.6815, "step": 96945 }, { "epoch": 1.07, "learning_rate": 3.210832078424425e-05, "loss": 0.7143, "step": 96950 }, { "epoch": 1.07, "learning_rate": 3.210739805710574e-05, "loss": 0.6532, "step": 96955 }, { "epoch": 1.07, "learning_rate": 3.210647532996723e-05, "loss": 0.7202, "step": 96960 }, { "epoch": 1.07, "learning_rate": 3.2105552602828715e-05, "loss": 0.6827, "step": 96965 }, { "epoch": 1.07, "learning_rate": 3.21046298756902e-05, "loss": 0.6506, "step": 96970 }, { "epoch": 1.07, "learning_rate": 3.2103707148551684e-05, "loss": 0.6001, "step": 96975 }, { "epoch": 1.07, "learning_rate": 3.210278442141318e-05, "loss": 0.6581, "step": 96980 }, { "epoch": 1.07, "learning_rate": 3.210186169427467e-05, "loss": 0.6501, "step": 96985 }, { "epoch": 1.07, "learning_rate": 3.2100938967136155e-05, "loss": 0.7136, "step": 96990 }, { "epoch": 1.07, "learning_rate": 3.2100016239997636e-05, "loss": 0.709, "step": 96995 }, { "epoch": 1.07, "learning_rate": 3.209909351285913e-05, "loss": 0.727, "step": 97000 }, { "epoch": 1.07, "eval_loss": 0.6354342103004456, "eval_runtime": 70.0847, "eval_samples_per_second": 28.537, "eval_steps_per_second": 14.268, "step": 97000 }, { "epoch": 1.07, "learning_rate": 3.209817078572062e-05, "loss": 0.6652, "step": 97005 }, { "epoch": 1.07, "learning_rate": 3.20972480585821e-05, "loss": 0.6998, "step": 97010 }, { "epoch": 1.07, "learning_rate": 3.209632533144359e-05, "loss": 0.6584, "step": 97015 }, { "epoch": 1.07, "learning_rate": 3.2095402604305075e-05, "loss": 0.6573, "step": 97020 }, { "epoch": 1.07, "learning_rate": 3.209447987716657e-05, "loss": 0.7831, "step": 97025 }, { "epoch": 1.07, "learning_rate": 3.209355715002805e-05, "loss": 0.6515, "step": 97030 }, { "epoch": 1.07, "learning_rate": 3.209263442288954e-05, "loss": 0.7404, "step": 97035 }, { "epoch": 1.07, "learning_rate": 3.2091711695751026e-05, "loss": 0.7345, "step": 97040 }, { "epoch": 1.07, "learning_rate": 3.2090788968612514e-05, "loss": 0.7005, "step": 97045 }, { "epoch": 1.07, "learning_rate": 3.2089866241474e-05, "loss": 0.6678, "step": 97050 }, { "epoch": 1.07, "learning_rate": 3.208894351433549e-05, "loss": 0.6711, "step": 97055 }, { "epoch": 1.07, "learning_rate": 3.208802078719698e-05, "loss": 0.7059, "step": 97060 }, { "epoch": 1.07, "learning_rate": 3.2087098060058466e-05, "loss": 0.69, "step": 97065 }, { "epoch": 1.07, "learning_rate": 3.2086175332919953e-05, "loss": 0.6666, "step": 97070 }, { "epoch": 1.07, "learning_rate": 3.208525260578144e-05, "loss": 0.6772, "step": 97075 }, { "epoch": 1.07, "learning_rate": 3.208432987864293e-05, "loss": 0.6452, "step": 97080 }, { "epoch": 1.07, "learning_rate": 3.208340715150441e-05, "loss": 0.7043, "step": 97085 }, { "epoch": 1.08, "learning_rate": 3.2082484424365905e-05, "loss": 0.6774, "step": 97090 }, { "epoch": 1.08, "learning_rate": 3.208156169722739e-05, "loss": 0.6875, "step": 97095 }, { "epoch": 1.08, "learning_rate": 3.208063897008888e-05, "loss": 0.6185, "step": 97100 }, { "epoch": 1.08, "learning_rate": 3.207971624295036e-05, "loss": 0.6762, "step": 97105 }, { "epoch": 1.08, "learning_rate": 3.2078793515811856e-05, "loss": 0.6775, "step": 97110 }, { "epoch": 1.08, "learning_rate": 3.2077870788673344e-05, "loss": 0.6957, "step": 97115 }, { "epoch": 1.08, "learning_rate": 3.2076948061534825e-05, "loss": 0.6822, "step": 97120 }, { "epoch": 1.08, "learning_rate": 3.207602533439631e-05, "loss": 0.6637, "step": 97125 }, { "epoch": 1.08, "learning_rate": 3.207510260725781e-05, "loss": 0.6783, "step": 97130 }, { "epoch": 1.08, "learning_rate": 3.2074179880119295e-05, "loss": 0.6494, "step": 97135 }, { "epoch": 1.08, "learning_rate": 3.2073257152980777e-05, "loss": 0.6645, "step": 97140 }, { "epoch": 1.08, "learning_rate": 3.2072334425842264e-05, "loss": 0.7033, "step": 97145 }, { "epoch": 1.08, "learning_rate": 3.207141169870376e-05, "loss": 0.7061, "step": 97150 }, { "epoch": 1.08, "learning_rate": 3.207048897156525e-05, "loss": 0.7266, "step": 97155 }, { "epoch": 1.08, "learning_rate": 3.206956624442673e-05, "loss": 0.6931, "step": 97160 }, { "epoch": 1.08, "learning_rate": 3.2068643517288216e-05, "loss": 0.6507, "step": 97165 }, { "epoch": 1.08, "learning_rate": 3.2067720790149704e-05, "loss": 0.6921, "step": 97170 }, { "epoch": 1.08, "learning_rate": 3.206679806301119e-05, "loss": 0.6279, "step": 97175 }, { "epoch": 1.08, "learning_rate": 3.206587533587268e-05, "loss": 0.7508, "step": 97180 }, { "epoch": 1.08, "learning_rate": 3.206495260873417e-05, "loss": 0.741, "step": 97185 }, { "epoch": 1.08, "learning_rate": 3.2064029881595655e-05, "loss": 0.6699, "step": 97190 }, { "epoch": 1.08, "learning_rate": 3.206310715445714e-05, "loss": 0.6704, "step": 97195 }, { "epoch": 1.08, "learning_rate": 3.206218442731863e-05, "loss": 0.6611, "step": 97200 }, { "epoch": 1.08, "learning_rate": 3.206126170018012e-05, "loss": 0.6625, "step": 97205 }, { "epoch": 1.08, "learning_rate": 3.2060338973041606e-05, "loss": 0.7248, "step": 97210 }, { "epoch": 1.08, "learning_rate": 3.2059416245903094e-05, "loss": 0.6748, "step": 97215 }, { "epoch": 1.08, "learning_rate": 3.205849351876458e-05, "loss": 0.6915, "step": 97220 }, { "epoch": 1.08, "learning_rate": 3.205757079162607e-05, "loss": 0.7402, "step": 97225 }, { "epoch": 1.08, "learning_rate": 3.205664806448756e-05, "loss": 0.67, "step": 97230 }, { "epoch": 1.08, "learning_rate": 3.205572533734904e-05, "loss": 0.6793, "step": 97235 }, { "epoch": 1.08, "learning_rate": 3.2054802610210533e-05, "loss": 0.6605, "step": 97240 }, { "epoch": 1.08, "learning_rate": 3.205387988307202e-05, "loss": 0.6783, "step": 97245 }, { "epoch": 1.08, "learning_rate": 3.20529571559335e-05, "loss": 0.7048, "step": 97250 }, { "epoch": 1.08, "learning_rate": 3.205203442879499e-05, "loss": 0.6566, "step": 97255 }, { "epoch": 1.08, "learning_rate": 3.2051111701656485e-05, "loss": 0.6309, "step": 97260 }, { "epoch": 1.08, "learning_rate": 3.205018897451797e-05, "loss": 0.6775, "step": 97265 }, { "epoch": 1.08, "learning_rate": 3.2049266247379454e-05, "loss": 0.6276, "step": 97270 }, { "epoch": 1.08, "learning_rate": 3.204834352024094e-05, "loss": 0.7276, "step": 97275 }, { "epoch": 1.08, "learning_rate": 3.2047420793102436e-05, "loss": 0.6138, "step": 97280 }, { "epoch": 1.08, "learning_rate": 3.204649806596392e-05, "loss": 0.6943, "step": 97285 }, { "epoch": 1.08, "learning_rate": 3.2045575338825405e-05, "loss": 0.6278, "step": 97290 }, { "epoch": 1.08, "learning_rate": 3.204465261168689e-05, "loss": 0.6525, "step": 97295 }, { "epoch": 1.08, "learning_rate": 3.204372988454839e-05, "loss": 0.6474, "step": 97300 }, { "epoch": 1.08, "learning_rate": 3.204280715740987e-05, "loss": 0.7262, "step": 97305 }, { "epoch": 1.08, "learning_rate": 3.2041884430271357e-05, "loss": 0.6941, "step": 97310 }, { "epoch": 1.08, "learning_rate": 3.2040961703132844e-05, "loss": 0.667, "step": 97315 }, { "epoch": 1.08, "learning_rate": 3.204003897599433e-05, "loss": 0.6609, "step": 97320 }, { "epoch": 1.08, "learning_rate": 3.203911624885582e-05, "loss": 0.7198, "step": 97325 }, { "epoch": 1.08, "learning_rate": 3.203819352171731e-05, "loss": 0.6528, "step": 97330 }, { "epoch": 1.08, "learning_rate": 3.2037270794578796e-05, "loss": 0.6536, "step": 97335 }, { "epoch": 1.08, "learning_rate": 3.2036348067440284e-05, "loss": 0.7165, "step": 97340 }, { "epoch": 1.08, "learning_rate": 3.203542534030177e-05, "loss": 0.6325, "step": 97345 }, { "epoch": 1.08, "learning_rate": 3.203450261316326e-05, "loss": 0.6684, "step": 97350 }, { "epoch": 1.08, "learning_rate": 3.203357988602475e-05, "loss": 0.6518, "step": 97355 }, { "epoch": 1.08, "learning_rate": 3.203265715888623e-05, "loss": 0.6473, "step": 97360 }, { "epoch": 1.08, "learning_rate": 3.203173443174772e-05, "loss": 0.6401, "step": 97365 }, { "epoch": 1.08, "learning_rate": 3.203081170460921e-05, "loss": 0.6504, "step": 97370 }, { "epoch": 1.08, "learning_rate": 3.20298889774707e-05, "loss": 0.7027, "step": 97375 }, { "epoch": 1.08, "learning_rate": 3.202896625033218e-05, "loss": 0.699, "step": 97380 }, { "epoch": 1.08, "learning_rate": 3.202804352319367e-05, "loss": 0.7425, "step": 97385 }, { "epoch": 1.08, "learning_rate": 3.202712079605516e-05, "loss": 0.6778, "step": 97390 }, { "epoch": 1.08, "learning_rate": 3.202619806891664e-05, "loss": 0.6948, "step": 97395 }, { "epoch": 1.08, "learning_rate": 3.202527534177813e-05, "loss": 0.6627, "step": 97400 }, { "epoch": 1.08, "learning_rate": 3.202435261463962e-05, "loss": 0.7445, "step": 97405 }, { "epoch": 1.08, "learning_rate": 3.2023429887501114e-05, "loss": 0.7029, "step": 97410 }, { "epoch": 1.08, "learning_rate": 3.2022507160362595e-05, "loss": 0.6551, "step": 97415 }, { "epoch": 1.08, "learning_rate": 3.202158443322408e-05, "loss": 0.6734, "step": 97420 }, { "epoch": 1.08, "learning_rate": 3.202066170608557e-05, "loss": 0.6282, "step": 97425 }, { "epoch": 1.08, "learning_rate": 3.201973897894706e-05, "loss": 0.735, "step": 97430 }, { "epoch": 1.08, "learning_rate": 3.2018816251808546e-05, "loss": 0.7013, "step": 97435 }, { "epoch": 1.08, "learning_rate": 3.2017893524670034e-05, "loss": 0.7778, "step": 97440 }, { "epoch": 1.08, "learning_rate": 3.201697079753152e-05, "loss": 0.6769, "step": 97445 }, { "epoch": 1.08, "learning_rate": 3.201604807039301e-05, "loss": 0.6834, "step": 97450 }, { "epoch": 1.08, "learning_rate": 3.20151253432545e-05, "loss": 0.6399, "step": 97455 }, { "epoch": 1.08, "learning_rate": 3.2014202616115985e-05, "loss": 0.7094, "step": 97460 }, { "epoch": 1.08, "learning_rate": 3.201327988897747e-05, "loss": 0.6722, "step": 97465 }, { "epoch": 1.08, "learning_rate": 3.2012357161838954e-05, "loss": 0.7053, "step": 97470 }, { "epoch": 1.08, "learning_rate": 3.201143443470045e-05, "loss": 0.6742, "step": 97475 }, { "epoch": 1.08, "learning_rate": 3.201051170756194e-05, "loss": 0.7108, "step": 97480 }, { "epoch": 1.08, "learning_rate": 3.2009588980423425e-05, "loss": 0.692, "step": 97485 }, { "epoch": 1.08, "learning_rate": 3.2008666253284906e-05, "loss": 0.6962, "step": 97490 }, { "epoch": 1.08, "learning_rate": 3.20077435261464e-05, "loss": 0.6908, "step": 97495 }, { "epoch": 1.08, "learning_rate": 3.200682079900789e-05, "loss": 0.6756, "step": 97500 }, { "epoch": 1.08, "learning_rate": 3.200589807186937e-05, "loss": 0.6041, "step": 97505 }, { "epoch": 1.08, "learning_rate": 3.200497534473086e-05, "loss": 0.6477, "step": 97510 }, { "epoch": 1.08, "learning_rate": 3.200405261759235e-05, "loss": 0.7079, "step": 97515 }, { "epoch": 1.08, "learning_rate": 3.200312989045384e-05, "loss": 0.6113, "step": 97520 }, { "epoch": 1.08, "learning_rate": 3.200220716331532e-05, "loss": 0.6759, "step": 97525 }, { "epoch": 1.08, "learning_rate": 3.200128443617681e-05, "loss": 0.6583, "step": 97530 }, { "epoch": 1.08, "learning_rate": 3.2000361709038296e-05, "loss": 0.6883, "step": 97535 }, { "epoch": 1.08, "learning_rate": 3.199943898189979e-05, "loss": 0.7146, "step": 97540 }, { "epoch": 1.08, "learning_rate": 3.199851625476127e-05, "loss": 0.6961, "step": 97545 }, { "epoch": 1.08, "learning_rate": 3.199759352762276e-05, "loss": 0.6956, "step": 97550 }, { "epoch": 1.08, "learning_rate": 3.199667080048425e-05, "loss": 0.6844, "step": 97555 }, { "epoch": 1.08, "learning_rate": 3.1995748073345735e-05, "loss": 0.6884, "step": 97560 }, { "epoch": 1.08, "learning_rate": 3.199482534620722e-05, "loss": 0.6528, "step": 97565 }, { "epoch": 1.08, "learning_rate": 3.199390261906871e-05, "loss": 0.652, "step": 97570 }, { "epoch": 1.08, "learning_rate": 3.19929798919302e-05, "loss": 0.6943, "step": 97575 }, { "epoch": 1.08, "learning_rate": 3.199205716479169e-05, "loss": 0.6495, "step": 97580 }, { "epoch": 1.08, "learning_rate": 3.1991134437653175e-05, "loss": 0.6722, "step": 97585 }, { "epoch": 1.08, "learning_rate": 3.199021171051466e-05, "loss": 0.6496, "step": 97590 }, { "epoch": 1.08, "learning_rate": 3.198928898337615e-05, "loss": 0.6301, "step": 97595 }, { "epoch": 1.08, "learning_rate": 3.198836625623764e-05, "loss": 0.6461, "step": 97600 }, { "epoch": 1.08, "learning_rate": 3.1987443529099126e-05, "loss": 0.6895, "step": 97605 }, { "epoch": 1.08, "learning_rate": 3.1986520801960614e-05, "loss": 0.6663, "step": 97610 }, { "epoch": 1.08, "learning_rate": 3.19855980748221e-05, "loss": 0.6528, "step": 97615 }, { "epoch": 1.08, "learning_rate": 3.198467534768358e-05, "loss": 0.7034, "step": 97620 }, { "epoch": 1.08, "learning_rate": 3.198375262054508e-05, "loss": 0.6976, "step": 97625 }, { "epoch": 1.08, "learning_rate": 3.1982829893406565e-05, "loss": 0.6382, "step": 97630 }, { "epoch": 1.08, "learning_rate": 3.1981907166268046e-05, "loss": 0.6267, "step": 97635 }, { "epoch": 1.08, "learning_rate": 3.1980984439129534e-05, "loss": 0.6446, "step": 97640 }, { "epoch": 1.08, "learning_rate": 3.198006171199103e-05, "loss": 0.6644, "step": 97645 }, { "epoch": 1.08, "learning_rate": 3.197913898485252e-05, "loss": 0.6892, "step": 97650 }, { "epoch": 1.08, "learning_rate": 3.1978216257714e-05, "loss": 0.6874, "step": 97655 }, { "epoch": 1.08, "learning_rate": 3.1977293530575486e-05, "loss": 0.7051, "step": 97660 }, { "epoch": 1.08, "learning_rate": 3.197637080343698e-05, "loss": 0.6667, "step": 97665 }, { "epoch": 1.08, "learning_rate": 3.197544807629846e-05, "loss": 0.7092, "step": 97670 }, { "epoch": 1.08, "learning_rate": 3.197452534915995e-05, "loss": 0.7559, "step": 97675 }, { "epoch": 1.08, "learning_rate": 3.197360262202144e-05, "loss": 0.6416, "step": 97680 }, { "epoch": 1.08, "learning_rate": 3.1972679894882925e-05, "loss": 0.7132, "step": 97685 }, { "epoch": 1.08, "learning_rate": 3.197175716774441e-05, "loss": 0.6249, "step": 97690 }, { "epoch": 1.08, "learning_rate": 3.19708344406059e-05, "loss": 0.7278, "step": 97695 }, { "epoch": 1.08, "learning_rate": 3.196991171346739e-05, "loss": 0.6511, "step": 97700 }, { "epoch": 1.08, "learning_rate": 3.1968988986328876e-05, "loss": 0.7271, "step": 97705 }, { "epoch": 1.08, "learning_rate": 3.1968066259190364e-05, "loss": 0.6902, "step": 97710 }, { "epoch": 1.08, "learning_rate": 3.196714353205185e-05, "loss": 0.712, "step": 97715 }, { "epoch": 1.08, "learning_rate": 3.196622080491334e-05, "loss": 0.6948, "step": 97720 }, { "epoch": 1.08, "learning_rate": 3.196529807777483e-05, "loss": 0.6645, "step": 97725 }, { "epoch": 1.08, "learning_rate": 3.1964375350636316e-05, "loss": 0.6818, "step": 97730 }, { "epoch": 1.08, "learning_rate": 3.1963452623497803e-05, "loss": 0.6888, "step": 97735 }, { "epoch": 1.08, "learning_rate": 3.196252989635929e-05, "loss": 0.6551, "step": 97740 }, { "epoch": 1.08, "learning_rate": 3.196160716922077e-05, "loss": 0.6512, "step": 97745 }, { "epoch": 1.08, "learning_rate": 3.196068444208227e-05, "loss": 0.6876, "step": 97750 }, { "epoch": 1.08, "learning_rate": 3.1959761714943755e-05, "loss": 0.7177, "step": 97755 }, { "epoch": 1.08, "learning_rate": 3.195883898780524e-05, "loss": 0.6634, "step": 97760 }, { "epoch": 1.08, "learning_rate": 3.1957916260666724e-05, "loss": 0.7335, "step": 97765 }, { "epoch": 1.08, "learning_rate": 3.195699353352821e-05, "loss": 0.6899, "step": 97770 }, { "epoch": 1.08, "learning_rate": 3.1956070806389706e-05, "loss": 0.6876, "step": 97775 }, { "epoch": 1.08, "learning_rate": 3.195514807925119e-05, "loss": 0.6564, "step": 97780 }, { "epoch": 1.08, "learning_rate": 3.1954225352112675e-05, "loss": 0.6325, "step": 97785 }, { "epoch": 1.08, "learning_rate": 3.195330262497416e-05, "loss": 0.6757, "step": 97790 }, { "epoch": 1.08, "learning_rate": 3.195237989783566e-05, "loss": 0.7348, "step": 97795 }, { "epoch": 1.08, "learning_rate": 3.195145717069714e-05, "loss": 0.6459, "step": 97800 }, { "epoch": 1.08, "learning_rate": 3.1950534443558627e-05, "loss": 0.6515, "step": 97805 }, { "epoch": 1.08, "learning_rate": 3.1949611716420114e-05, "loss": 0.7063, "step": 97810 }, { "epoch": 1.08, "learning_rate": 3.19486889892816e-05, "loss": 0.7055, "step": 97815 }, { "epoch": 1.08, "learning_rate": 3.194776626214309e-05, "loss": 0.6646, "step": 97820 }, { "epoch": 1.08, "learning_rate": 3.194684353500458e-05, "loss": 0.6469, "step": 97825 }, { "epoch": 1.08, "learning_rate": 3.1945920807866066e-05, "loss": 0.6933, "step": 97830 }, { "epoch": 1.08, "learning_rate": 3.1944998080727554e-05, "loss": 0.6709, "step": 97835 }, { "epoch": 1.08, "learning_rate": 3.194407535358904e-05, "loss": 0.6944, "step": 97840 }, { "epoch": 1.08, "learning_rate": 3.194315262645053e-05, "loss": 0.6427, "step": 97845 }, { "epoch": 1.08, "learning_rate": 3.194222989931202e-05, "loss": 0.6021, "step": 97850 }, { "epoch": 1.08, "learning_rate": 3.19413071721735e-05, "loss": 0.7488, "step": 97855 }, { "epoch": 1.08, "learning_rate": 3.194038444503499e-05, "loss": 0.6996, "step": 97860 }, { "epoch": 1.08, "learning_rate": 3.193946171789648e-05, "loss": 0.6627, "step": 97865 }, { "epoch": 1.08, "learning_rate": 3.193853899075797e-05, "loss": 0.7214, "step": 97870 }, { "epoch": 1.08, "learning_rate": 3.193761626361945e-05, "loss": 0.6528, "step": 97875 }, { "epoch": 1.08, "learning_rate": 3.1936693536480944e-05, "loss": 0.6822, "step": 97880 }, { "epoch": 1.08, "learning_rate": 3.193577080934243e-05, "loss": 0.6759, "step": 97885 }, { "epoch": 1.08, "learning_rate": 3.193484808220391e-05, "loss": 0.6243, "step": 97890 }, { "epoch": 1.08, "learning_rate": 3.19339253550654e-05, "loss": 0.6565, "step": 97895 }, { "epoch": 1.08, "learning_rate": 3.1933002627926896e-05, "loss": 0.6694, "step": 97900 }, { "epoch": 1.08, "learning_rate": 3.1932079900788383e-05, "loss": 0.6689, "step": 97905 }, { "epoch": 1.08, "learning_rate": 3.1931157173649865e-05, "loss": 0.6942, "step": 97910 }, { "epoch": 1.08, "learning_rate": 3.193023444651135e-05, "loss": 0.689, "step": 97915 }, { "epoch": 1.08, "learning_rate": 3.192931171937284e-05, "loss": 0.7077, "step": 97920 }, { "epoch": 1.08, "learning_rate": 3.1928388992234335e-05, "loss": 0.7042, "step": 97925 }, { "epoch": 1.08, "learning_rate": 3.1927466265095816e-05, "loss": 0.6796, "step": 97930 }, { "epoch": 1.08, "learning_rate": 3.1926543537957304e-05, "loss": 0.7079, "step": 97935 }, { "epoch": 1.08, "learning_rate": 3.192562081081879e-05, "loss": 0.6913, "step": 97940 }, { "epoch": 1.08, "learning_rate": 3.192469808368028e-05, "loss": 0.6534, "step": 97945 }, { "epoch": 1.08, "learning_rate": 3.192377535654177e-05, "loss": 0.6776, "step": 97950 }, { "epoch": 1.08, "learning_rate": 3.1922852629403255e-05, "loss": 0.6593, "step": 97955 }, { "epoch": 1.08, "learning_rate": 3.192192990226474e-05, "loss": 0.6885, "step": 97960 }, { "epoch": 1.08, "learning_rate": 3.192100717512623e-05, "loss": 0.7038, "step": 97965 }, { "epoch": 1.08, "learning_rate": 3.192008444798772e-05, "loss": 0.6395, "step": 97970 }, { "epoch": 1.08, "learning_rate": 3.1919161720849207e-05, "loss": 0.6935, "step": 97975 }, { "epoch": 1.08, "learning_rate": 3.1918238993710694e-05, "loss": 0.6127, "step": 97980 }, { "epoch": 1.08, "learning_rate": 3.191731626657218e-05, "loss": 0.6713, "step": 97985 }, { "epoch": 1.09, "learning_rate": 3.191639353943367e-05, "loss": 0.7235, "step": 97990 }, { "epoch": 1.09, "learning_rate": 3.191547081229516e-05, "loss": 0.6951, "step": 97995 }, { "epoch": 1.09, "learning_rate": 3.1914548085156646e-05, "loss": 0.647, "step": 98000 }, { "epoch": 1.09, "eval_loss": 0.6221500039100647, "eval_runtime": 70.5115, "eval_samples_per_second": 28.364, "eval_steps_per_second": 14.182, "step": 98000 }, { "epoch": 1.09, "learning_rate": 3.191362535801813e-05, "loss": 0.645, "step": 98005 }, { "epoch": 1.09, "learning_rate": 3.191270263087962e-05, "loss": 0.6552, "step": 98010 }, { "epoch": 1.09, "learning_rate": 3.191177990374111e-05, "loss": 0.6731, "step": 98015 }, { "epoch": 1.09, "learning_rate": 3.191085717660259e-05, "loss": 0.6891, "step": 98020 }, { "epoch": 1.09, "learning_rate": 3.190993444946408e-05, "loss": 0.693, "step": 98025 }, { "epoch": 1.09, "learning_rate": 3.190901172232557e-05, "loss": 0.6289, "step": 98030 }, { "epoch": 1.09, "learning_rate": 3.190808899518706e-05, "loss": 0.6861, "step": 98035 }, { "epoch": 1.09, "learning_rate": 3.190716626804854e-05, "loss": 0.7165, "step": 98040 }, { "epoch": 1.09, "learning_rate": 3.190624354091003e-05, "loss": 0.6982, "step": 98045 }, { "epoch": 1.09, "learning_rate": 3.1905320813771524e-05, "loss": 0.652, "step": 98050 }, { "epoch": 1.09, "learning_rate": 3.1904398086633005e-05, "loss": 0.7198, "step": 98055 }, { "epoch": 1.09, "learning_rate": 3.190347535949449e-05, "loss": 0.6671, "step": 98060 }, { "epoch": 1.09, "learning_rate": 3.190255263235598e-05, "loss": 0.7399, "step": 98065 }, { "epoch": 1.09, "learning_rate": 3.190162990521747e-05, "loss": 0.6476, "step": 98070 }, { "epoch": 1.09, "learning_rate": 3.190070717807896e-05, "loss": 0.6833, "step": 98075 }, { "epoch": 1.09, "learning_rate": 3.1899784450940445e-05, "loss": 0.6847, "step": 98080 }, { "epoch": 1.09, "learning_rate": 3.189886172380193e-05, "loss": 0.6157, "step": 98085 }, { "epoch": 1.09, "learning_rate": 3.189793899666342e-05, "loss": 0.6427, "step": 98090 }, { "epoch": 1.09, "learning_rate": 3.189701626952491e-05, "loss": 0.6882, "step": 98095 }, { "epoch": 1.09, "learning_rate": 3.1896093542386396e-05, "loss": 0.7081, "step": 98100 }, { "epoch": 1.09, "learning_rate": 3.1895170815247884e-05, "loss": 0.6964, "step": 98105 }, { "epoch": 1.09, "learning_rate": 3.189424808810937e-05, "loss": 0.6919, "step": 98110 }, { "epoch": 1.09, "learning_rate": 3.189332536097086e-05, "loss": 0.7223, "step": 98115 }, { "epoch": 1.09, "learning_rate": 3.189240263383235e-05, "loss": 0.6077, "step": 98120 }, { "epoch": 1.09, "learning_rate": 3.1891479906693835e-05, "loss": 0.664, "step": 98125 }, { "epoch": 1.09, "learning_rate": 3.1890557179555316e-05, "loss": 0.7268, "step": 98130 }, { "epoch": 1.09, "learning_rate": 3.188963445241681e-05, "loss": 0.6423, "step": 98135 }, { "epoch": 1.09, "learning_rate": 3.18887117252783e-05, "loss": 0.7183, "step": 98140 }, { "epoch": 1.09, "learning_rate": 3.188778899813979e-05, "loss": 0.7333, "step": 98145 }, { "epoch": 1.09, "learning_rate": 3.188686627100127e-05, "loss": 0.667, "step": 98150 }, { "epoch": 1.09, "learning_rate": 3.1885943543862756e-05, "loss": 0.611, "step": 98155 }, { "epoch": 1.09, "learning_rate": 3.188502081672425e-05, "loss": 0.6877, "step": 98160 }, { "epoch": 1.09, "learning_rate": 3.188409808958573e-05, "loss": 0.6794, "step": 98165 }, { "epoch": 1.09, "learning_rate": 3.188317536244722e-05, "loss": 0.6388, "step": 98170 }, { "epoch": 1.09, "learning_rate": 3.188225263530871e-05, "loss": 0.6621, "step": 98175 }, { "epoch": 1.09, "learning_rate": 3.18813299081702e-05, "loss": 0.6386, "step": 98180 }, { "epoch": 1.09, "learning_rate": 3.188040718103168e-05, "loss": 0.691, "step": 98185 }, { "epoch": 1.09, "learning_rate": 3.187948445389317e-05, "loss": 0.7042, "step": 98190 }, { "epoch": 1.09, "learning_rate": 3.187856172675466e-05, "loss": 0.6756, "step": 98195 }, { "epoch": 1.09, "learning_rate": 3.1877638999616146e-05, "loss": 0.6535, "step": 98200 }, { "epoch": 1.09, "learning_rate": 3.1876716272477634e-05, "loss": 0.6595, "step": 98205 }, { "epoch": 1.09, "learning_rate": 3.187579354533912e-05, "loss": 0.6466, "step": 98210 }, { "epoch": 1.09, "learning_rate": 3.187487081820061e-05, "loss": 0.6686, "step": 98215 }, { "epoch": 1.09, "learning_rate": 3.18739480910621e-05, "loss": 0.7301, "step": 98220 }, { "epoch": 1.09, "learning_rate": 3.1873025363923585e-05, "loss": 0.6814, "step": 98225 }, { "epoch": 1.09, "learning_rate": 3.187210263678507e-05, "loss": 0.6516, "step": 98230 }, { "epoch": 1.09, "learning_rate": 3.187117990964656e-05, "loss": 0.7017, "step": 98235 }, { "epoch": 1.09, "learning_rate": 3.187025718250804e-05, "loss": 0.7211, "step": 98240 }, { "epoch": 1.09, "learning_rate": 3.186933445536954e-05, "loss": 0.7347, "step": 98245 }, { "epoch": 1.09, "learning_rate": 3.1868411728231025e-05, "loss": 0.6938, "step": 98250 }, { "epoch": 1.09, "learning_rate": 3.186748900109251e-05, "loss": 0.7068, "step": 98255 }, { "epoch": 1.09, "learning_rate": 3.1866566273953994e-05, "loss": 0.644, "step": 98260 }, { "epoch": 1.09, "learning_rate": 3.186564354681549e-05, "loss": 0.6817, "step": 98265 }, { "epoch": 1.09, "learning_rate": 3.1864720819676976e-05, "loss": 0.7048, "step": 98270 }, { "epoch": 1.09, "learning_rate": 3.186379809253846e-05, "loss": 0.7092, "step": 98275 }, { "epoch": 1.09, "learning_rate": 3.1862875365399945e-05, "loss": 0.7268, "step": 98280 }, { "epoch": 1.09, "learning_rate": 3.186195263826144e-05, "loss": 0.7219, "step": 98285 }, { "epoch": 1.09, "learning_rate": 3.186102991112293e-05, "loss": 0.6805, "step": 98290 }, { "epoch": 1.09, "learning_rate": 3.186010718398441e-05, "loss": 0.5921, "step": 98295 }, { "epoch": 1.09, "learning_rate": 3.1859184456845896e-05, "loss": 0.6856, "step": 98300 }, { "epoch": 1.09, "learning_rate": 3.1858261729707384e-05, "loss": 0.6706, "step": 98305 }, { "epoch": 1.09, "learning_rate": 3.185733900256888e-05, "loss": 0.7188, "step": 98310 }, { "epoch": 1.09, "learning_rate": 3.185641627543036e-05, "loss": 0.6855, "step": 98315 }, { "epoch": 1.09, "learning_rate": 3.185549354829185e-05, "loss": 0.6938, "step": 98320 }, { "epoch": 1.09, "learning_rate": 3.1854570821153336e-05, "loss": 0.6705, "step": 98325 }, { "epoch": 1.09, "learning_rate": 3.1853648094014824e-05, "loss": 0.654, "step": 98330 }, { "epoch": 1.09, "learning_rate": 3.185272536687631e-05, "loss": 0.7063, "step": 98335 }, { "epoch": 1.09, "learning_rate": 3.18518026397378e-05, "loss": 0.677, "step": 98340 }, { "epoch": 1.09, "learning_rate": 3.185087991259929e-05, "loss": 0.6947, "step": 98345 }, { "epoch": 1.09, "learning_rate": 3.1849957185460775e-05, "loss": 0.6542, "step": 98350 }, { "epoch": 1.09, "learning_rate": 3.184903445832226e-05, "loss": 0.7099, "step": 98355 }, { "epoch": 1.09, "learning_rate": 3.184811173118375e-05, "loss": 0.6759, "step": 98360 }, { "epoch": 1.09, "learning_rate": 3.184718900404524e-05, "loss": 0.6554, "step": 98365 }, { "epoch": 1.09, "learning_rate": 3.184626627690672e-05, "loss": 0.6175, "step": 98370 }, { "epoch": 1.09, "learning_rate": 3.1845343549768214e-05, "loss": 0.7126, "step": 98375 }, { "epoch": 1.09, "learning_rate": 3.18444208226297e-05, "loss": 0.6967, "step": 98380 }, { "epoch": 1.09, "learning_rate": 3.184349809549119e-05, "loss": 0.6432, "step": 98385 }, { "epoch": 1.09, "learning_rate": 3.184257536835267e-05, "loss": 0.7137, "step": 98390 }, { "epoch": 1.09, "learning_rate": 3.1841652641214166e-05, "loss": 0.627, "step": 98395 }, { "epoch": 1.09, "learning_rate": 3.1840729914075653e-05, "loss": 0.6139, "step": 98400 }, { "epoch": 1.09, "learning_rate": 3.1839807186937134e-05, "loss": 0.6542, "step": 98405 }, { "epoch": 1.09, "learning_rate": 3.183888445979862e-05, "loss": 0.6473, "step": 98410 }, { "epoch": 1.09, "learning_rate": 3.183796173266012e-05, "loss": 0.707, "step": 98415 }, { "epoch": 1.09, "learning_rate": 3.1837039005521605e-05, "loss": 0.6385, "step": 98420 }, { "epoch": 1.09, "learning_rate": 3.1836116278383086e-05, "loss": 0.654, "step": 98425 }, { "epoch": 1.09, "learning_rate": 3.1835193551244574e-05, "loss": 0.6451, "step": 98430 }, { "epoch": 1.09, "learning_rate": 3.183427082410607e-05, "loss": 0.6378, "step": 98435 }, { "epoch": 1.09, "learning_rate": 3.183334809696755e-05, "loss": 0.6374, "step": 98440 }, { "epoch": 1.09, "learning_rate": 3.183242536982904e-05, "loss": 0.6928, "step": 98445 }, { "epoch": 1.09, "learning_rate": 3.1831502642690525e-05, "loss": 0.6119, "step": 98450 }, { "epoch": 1.09, "learning_rate": 3.183057991555201e-05, "loss": 0.6573, "step": 98455 }, { "epoch": 1.09, "learning_rate": 3.18296571884135e-05, "loss": 0.6485, "step": 98460 }, { "epoch": 1.09, "learning_rate": 3.182873446127499e-05, "loss": 0.7274, "step": 98465 }, { "epoch": 1.09, "learning_rate": 3.1827811734136477e-05, "loss": 0.7497, "step": 98470 }, { "epoch": 1.09, "learning_rate": 3.1826889006997964e-05, "loss": 0.6264, "step": 98475 }, { "epoch": 1.09, "learning_rate": 3.182596627985945e-05, "loss": 0.7281, "step": 98480 }, { "epoch": 1.09, "learning_rate": 3.182504355272094e-05, "loss": 0.6677, "step": 98485 }, { "epoch": 1.09, "learning_rate": 3.182412082558243e-05, "loss": 0.7083, "step": 98490 }, { "epoch": 1.09, "learning_rate": 3.1823198098443916e-05, "loss": 0.7139, "step": 98495 }, { "epoch": 1.09, "learning_rate": 3.1822275371305404e-05, "loss": 0.6695, "step": 98500 }, { "epoch": 1.09, "learning_rate": 3.182135264416689e-05, "loss": 0.7088, "step": 98505 }, { "epoch": 1.09, "learning_rate": 3.182042991702838e-05, "loss": 0.6935, "step": 98510 }, { "epoch": 1.09, "learning_rate": 3.181950718988986e-05, "loss": 0.6365, "step": 98515 }, { "epoch": 1.09, "learning_rate": 3.181858446275135e-05, "loss": 0.7292, "step": 98520 }, { "epoch": 1.09, "learning_rate": 3.181766173561284e-05, "loss": 0.6742, "step": 98525 }, { "epoch": 1.09, "learning_rate": 3.181673900847433e-05, "loss": 0.6992, "step": 98530 }, { "epoch": 1.09, "learning_rate": 3.181581628133581e-05, "loss": 0.6637, "step": 98535 }, { "epoch": 1.09, "learning_rate": 3.18148935541973e-05, "loss": 0.6039, "step": 98540 }, { "epoch": 1.09, "learning_rate": 3.1813970827058794e-05, "loss": 0.6483, "step": 98545 }, { "epoch": 1.09, "learning_rate": 3.1813048099920275e-05, "loss": 0.7133, "step": 98550 }, { "epoch": 1.09, "learning_rate": 3.181212537278176e-05, "loss": 0.6754, "step": 98555 }, { "epoch": 1.09, "learning_rate": 3.181120264564325e-05, "loss": 0.7179, "step": 98560 }, { "epoch": 1.09, "learning_rate": 3.1810279918504746e-05, "loss": 0.6287, "step": 98565 }, { "epoch": 1.09, "learning_rate": 3.180935719136623e-05, "loss": 0.662, "step": 98570 }, { "epoch": 1.09, "learning_rate": 3.1808434464227715e-05, "loss": 0.6783, "step": 98575 }, { "epoch": 1.09, "learning_rate": 3.18075117370892e-05, "loss": 0.6823, "step": 98580 }, { "epoch": 1.09, "learning_rate": 3.180658900995069e-05, "loss": 0.6273, "step": 98585 }, { "epoch": 1.09, "learning_rate": 3.180566628281218e-05, "loss": 0.6823, "step": 98590 }, { "epoch": 1.09, "learning_rate": 3.1804743555673666e-05, "loss": 0.6488, "step": 98595 }, { "epoch": 1.09, "learning_rate": 3.1803820828535154e-05, "loss": 0.7419, "step": 98600 }, { "epoch": 1.09, "learning_rate": 3.180289810139664e-05, "loss": 0.6436, "step": 98605 }, { "epoch": 1.09, "learning_rate": 3.180197537425813e-05, "loss": 0.7069, "step": 98610 }, { "epoch": 1.09, "learning_rate": 3.180105264711962e-05, "loss": 0.6825, "step": 98615 }, { "epoch": 1.09, "learning_rate": 3.1800129919981105e-05, "loss": 0.7148, "step": 98620 }, { "epoch": 1.09, "learning_rate": 3.1799207192842586e-05, "loss": 0.7326, "step": 98625 }, { "epoch": 1.09, "learning_rate": 3.179828446570408e-05, "loss": 0.6972, "step": 98630 }, { "epoch": 1.09, "learning_rate": 3.179736173856557e-05, "loss": 0.7112, "step": 98635 }, { "epoch": 1.09, "learning_rate": 3.1796439011427057e-05, "loss": 0.6518, "step": 98640 }, { "epoch": 1.09, "learning_rate": 3.179551628428854e-05, "loss": 0.6257, "step": 98645 }, { "epoch": 1.09, "learning_rate": 3.179459355715003e-05, "loss": 0.6445, "step": 98650 }, { "epoch": 1.09, "learning_rate": 3.179367083001152e-05, "loss": 0.6961, "step": 98655 }, { "epoch": 1.09, "learning_rate": 3.1792748102873e-05, "loss": 0.7053, "step": 98660 }, { "epoch": 1.09, "learning_rate": 3.179182537573449e-05, "loss": 0.6617, "step": 98665 }, { "epoch": 1.09, "learning_rate": 3.1790902648595984e-05, "loss": 0.6953, "step": 98670 }, { "epoch": 1.09, "learning_rate": 3.178997992145747e-05, "loss": 0.7019, "step": 98675 }, { "epoch": 1.09, "learning_rate": 3.178905719431895e-05, "loss": 0.704, "step": 98680 }, { "epoch": 1.09, "learning_rate": 3.178813446718044e-05, "loss": 0.6516, "step": 98685 }, { "epoch": 1.09, "learning_rate": 3.178721174004193e-05, "loss": 0.7053, "step": 98690 }, { "epoch": 1.09, "learning_rate": 3.178628901290342e-05, "loss": 0.6806, "step": 98695 }, { "epoch": 1.09, "learning_rate": 3.1785366285764904e-05, "loss": 0.6925, "step": 98700 }, { "epoch": 1.09, "learning_rate": 3.178444355862639e-05, "loss": 0.6832, "step": 98705 }, { "epoch": 1.09, "learning_rate": 3.178352083148788e-05, "loss": 0.6213, "step": 98710 }, { "epoch": 1.09, "learning_rate": 3.178259810434937e-05, "loss": 0.6825, "step": 98715 }, { "epoch": 1.09, "learning_rate": 3.1781675377210855e-05, "loss": 0.728, "step": 98720 }, { "epoch": 1.09, "learning_rate": 3.178075265007234e-05, "loss": 0.6514, "step": 98725 }, { "epoch": 1.09, "learning_rate": 3.177982992293383e-05, "loss": 0.618, "step": 98730 }, { "epoch": 1.09, "learning_rate": 3.177890719579532e-05, "loss": 0.682, "step": 98735 }, { "epoch": 1.09, "learning_rate": 3.177798446865681e-05, "loss": 0.6945, "step": 98740 }, { "epoch": 1.09, "learning_rate": 3.1777061741518295e-05, "loss": 0.7025, "step": 98745 }, { "epoch": 1.09, "learning_rate": 3.177613901437978e-05, "loss": 0.7456, "step": 98750 }, { "epoch": 1.09, "learning_rate": 3.1775216287241264e-05, "loss": 0.6416, "step": 98755 }, { "epoch": 1.09, "learning_rate": 3.177429356010276e-05, "loss": 0.6495, "step": 98760 }, { "epoch": 1.09, "learning_rate": 3.1773370832964246e-05, "loss": 0.6376, "step": 98765 }, { "epoch": 1.09, "learning_rate": 3.1772448105825734e-05, "loss": 0.6817, "step": 98770 }, { "epoch": 1.09, "learning_rate": 3.1771525378687215e-05, "loss": 0.6275, "step": 98775 }, { "epoch": 1.09, "learning_rate": 3.177060265154871e-05, "loss": 0.6309, "step": 98780 }, { "epoch": 1.09, "learning_rate": 3.17696799244102e-05, "loss": 0.6767, "step": 98785 }, { "epoch": 1.09, "learning_rate": 3.176875719727168e-05, "loss": 0.713, "step": 98790 }, { "epoch": 1.09, "learning_rate": 3.1767834470133166e-05, "loss": 0.6572, "step": 98795 }, { "epoch": 1.09, "learning_rate": 3.176691174299466e-05, "loss": 0.683, "step": 98800 }, { "epoch": 1.09, "learning_rate": 3.176598901585615e-05, "loss": 0.6749, "step": 98805 }, { "epoch": 1.09, "learning_rate": 3.176506628871763e-05, "loss": 0.7281, "step": 98810 }, { "epoch": 1.09, "learning_rate": 3.176414356157912e-05, "loss": 0.6805, "step": 98815 }, { "epoch": 1.09, "learning_rate": 3.176322083444061e-05, "loss": 0.6869, "step": 98820 }, { "epoch": 1.09, "learning_rate": 3.1762298107302093e-05, "loss": 0.691, "step": 98825 }, { "epoch": 1.09, "learning_rate": 3.176137538016358e-05, "loss": 0.663, "step": 98830 }, { "epoch": 1.09, "learning_rate": 3.176045265302507e-05, "loss": 0.6984, "step": 98835 }, { "epoch": 1.09, "learning_rate": 3.175952992588656e-05, "loss": 0.6972, "step": 98840 }, { "epoch": 1.09, "learning_rate": 3.1758607198748045e-05, "loss": 0.6616, "step": 98845 }, { "epoch": 1.09, "learning_rate": 3.175768447160953e-05, "loss": 0.675, "step": 98850 }, { "epoch": 1.09, "learning_rate": 3.175676174447102e-05, "loss": 0.6944, "step": 98855 }, { "epoch": 1.09, "learning_rate": 3.175583901733251e-05, "loss": 0.7613, "step": 98860 }, { "epoch": 1.09, "learning_rate": 3.1754916290193996e-05, "loss": 0.6701, "step": 98865 }, { "epoch": 1.09, "learning_rate": 3.1753993563055484e-05, "loss": 0.6525, "step": 98870 }, { "epoch": 1.09, "learning_rate": 3.175307083591697e-05, "loss": 0.6765, "step": 98875 }, { "epoch": 1.09, "learning_rate": 3.175214810877846e-05, "loss": 0.6947, "step": 98880 }, { "epoch": 1.09, "learning_rate": 3.175122538163995e-05, "loss": 0.6542, "step": 98885 }, { "epoch": 1.09, "learning_rate": 3.1750302654501435e-05, "loss": 0.7289, "step": 98890 }, { "epoch": 1.1, "learning_rate": 3.174937992736292e-05, "loss": 0.6984, "step": 98895 }, { "epoch": 1.1, "learning_rate": 3.1748457200224404e-05, "loss": 0.6826, "step": 98900 }, { "epoch": 1.1, "learning_rate": 3.174753447308589e-05, "loss": 0.6334, "step": 98905 }, { "epoch": 1.1, "learning_rate": 3.174661174594739e-05, "loss": 0.6395, "step": 98910 }, { "epoch": 1.1, "learning_rate": 3.1745689018808875e-05, "loss": 0.652, "step": 98915 }, { "epoch": 1.1, "learning_rate": 3.1744766291670356e-05, "loss": 0.6555, "step": 98920 }, { "epoch": 1.1, "learning_rate": 3.1743843564531844e-05, "loss": 0.6854, "step": 98925 }, { "epoch": 1.1, "learning_rate": 3.174292083739334e-05, "loss": 0.6841, "step": 98930 }, { "epoch": 1.1, "learning_rate": 3.174199811025482e-05, "loss": 0.68, "step": 98935 }, { "epoch": 1.1, "learning_rate": 3.174107538311631e-05, "loss": 0.7243, "step": 98940 }, { "epoch": 1.1, "learning_rate": 3.1740152655977795e-05, "loss": 0.6393, "step": 98945 }, { "epoch": 1.1, "learning_rate": 3.173922992883929e-05, "loss": 0.6614, "step": 98950 }, { "epoch": 1.1, "learning_rate": 3.173830720170077e-05, "loss": 0.6761, "step": 98955 }, { "epoch": 1.1, "learning_rate": 3.173738447456226e-05, "loss": 0.6409, "step": 98960 }, { "epoch": 1.1, "learning_rate": 3.1736461747423746e-05, "loss": 0.6915, "step": 98965 }, { "epoch": 1.1, "learning_rate": 3.1735539020285234e-05, "loss": 0.7072, "step": 98970 }, { "epoch": 1.1, "learning_rate": 3.173461629314672e-05, "loss": 0.6777, "step": 98975 }, { "epoch": 1.1, "learning_rate": 3.173369356600821e-05, "loss": 0.7121, "step": 98980 }, { "epoch": 1.1, "learning_rate": 3.17327708388697e-05, "loss": 0.6737, "step": 98985 }, { "epoch": 1.1, "learning_rate": 3.1731848111731186e-05, "loss": 0.645, "step": 98990 }, { "epoch": 1.1, "learning_rate": 3.1730925384592674e-05, "loss": 0.6311, "step": 98995 }, { "epoch": 1.1, "learning_rate": 3.173000265745416e-05, "loss": 0.6622, "step": 99000 }, { "epoch": 1.1, "eval_loss": 0.6119351387023926, "eval_runtime": 70.3899, "eval_samples_per_second": 28.413, "eval_steps_per_second": 14.207, "step": 99000 }, { "epoch": 1.1, "learning_rate": 3.172907993031565e-05, "loss": 0.6319, "step": 99005 }, { "epoch": 1.1, "learning_rate": 3.172815720317713e-05, "loss": 0.6998, "step": 99010 }, { "epoch": 1.1, "learning_rate": 3.1727234476038625e-05, "loss": 0.6929, "step": 99015 }, { "epoch": 1.1, "learning_rate": 3.172631174890011e-05, "loss": 0.7266, "step": 99020 }, { "epoch": 1.1, "learning_rate": 3.17253890217616e-05, "loss": 0.6302, "step": 99025 }, { "epoch": 1.1, "learning_rate": 3.172446629462308e-05, "loss": 0.6955, "step": 99030 }, { "epoch": 1.1, "learning_rate": 3.1723543567484576e-05, "loss": 0.6582, "step": 99035 }, { "epoch": 1.1, "learning_rate": 3.1722620840346064e-05, "loss": 0.7003, "step": 99040 }, { "epoch": 1.1, "learning_rate": 3.1721698113207545e-05, "loss": 0.6697, "step": 99045 }, { "epoch": 1.1, "learning_rate": 3.172077538606903e-05, "loss": 0.6353, "step": 99050 }, { "epoch": 1.1, "learning_rate": 3.171985265893052e-05, "loss": 0.7147, "step": 99055 }, { "epoch": 1.1, "learning_rate": 3.1718929931792016e-05, "loss": 0.7057, "step": 99060 }, { "epoch": 1.1, "learning_rate": 3.17180072046535e-05, "loss": 0.7055, "step": 99065 }, { "epoch": 1.1, "learning_rate": 3.1717084477514984e-05, "loss": 0.6822, "step": 99070 }, { "epoch": 1.1, "learning_rate": 3.171616175037647e-05, "loss": 0.6482, "step": 99075 }, { "epoch": 1.1, "learning_rate": 3.171523902323797e-05, "loss": 0.696, "step": 99080 }, { "epoch": 1.1, "learning_rate": 3.171431629609945e-05, "loss": 0.6242, "step": 99085 }, { "epoch": 1.1, "learning_rate": 3.1713393568960936e-05, "loss": 0.668, "step": 99090 }, { "epoch": 1.1, "learning_rate": 3.1712470841822424e-05, "loss": 0.6176, "step": 99095 }, { "epoch": 1.1, "learning_rate": 3.171154811468391e-05, "loss": 0.7138, "step": 99100 }, { "epoch": 1.1, "learning_rate": 3.17106253875454e-05, "loss": 0.6288, "step": 99105 }, { "epoch": 1.1, "learning_rate": 3.170970266040689e-05, "loss": 0.6422, "step": 99110 }, { "epoch": 1.1, "learning_rate": 3.1708779933268375e-05, "loss": 0.6348, "step": 99115 }, { "epoch": 1.1, "learning_rate": 3.170785720612986e-05, "loss": 0.6634, "step": 99120 }, { "epoch": 1.1, "learning_rate": 3.170693447899135e-05, "loss": 0.683, "step": 99125 }, { "epoch": 1.1, "learning_rate": 3.170601175185284e-05, "loss": 0.6597, "step": 99130 }, { "epoch": 1.1, "learning_rate": 3.1705089024714326e-05, "loss": 0.6616, "step": 99135 }, { "epoch": 1.1, "learning_rate": 3.170416629757581e-05, "loss": 0.7156, "step": 99140 }, { "epoch": 1.1, "learning_rate": 3.17032435704373e-05, "loss": 0.6777, "step": 99145 }, { "epoch": 1.1, "learning_rate": 3.170232084329879e-05, "loss": 0.6943, "step": 99150 }, { "epoch": 1.1, "learning_rate": 3.170139811616028e-05, "loss": 0.6705, "step": 99155 }, { "epoch": 1.1, "learning_rate": 3.170047538902176e-05, "loss": 0.7104, "step": 99160 }, { "epoch": 1.1, "learning_rate": 3.1699552661883254e-05, "loss": 0.6713, "step": 99165 }, { "epoch": 1.1, "learning_rate": 3.169862993474474e-05, "loss": 0.6823, "step": 99170 }, { "epoch": 1.1, "learning_rate": 3.169770720760622e-05, "loss": 0.6531, "step": 99175 }, { "epoch": 1.1, "learning_rate": 3.169678448046771e-05, "loss": 0.6477, "step": 99180 }, { "epoch": 1.1, "learning_rate": 3.1695861753329205e-05, "loss": 0.6766, "step": 99185 }, { "epoch": 1.1, "learning_rate": 3.169493902619069e-05, "loss": 0.6658, "step": 99190 }, { "epoch": 1.1, "learning_rate": 3.1694016299052174e-05, "loss": 0.6284, "step": 99195 }, { "epoch": 1.1, "learning_rate": 3.169309357191366e-05, "loss": 0.6725, "step": 99200 }, { "epoch": 1.1, "learning_rate": 3.169217084477515e-05, "loss": 0.6633, "step": 99205 }, { "epoch": 1.1, "learning_rate": 3.169124811763664e-05, "loss": 0.691, "step": 99210 }, { "epoch": 1.1, "learning_rate": 3.1690325390498125e-05, "loss": 0.6527, "step": 99215 }, { "epoch": 1.1, "learning_rate": 3.168940266335961e-05, "loss": 0.6624, "step": 99220 }, { "epoch": 1.1, "learning_rate": 3.16884799362211e-05, "loss": 0.6568, "step": 99225 }, { "epoch": 1.1, "learning_rate": 3.168755720908259e-05, "loss": 0.662, "step": 99230 }, { "epoch": 1.1, "learning_rate": 3.168663448194408e-05, "loss": 0.6918, "step": 99235 }, { "epoch": 1.1, "learning_rate": 3.1685711754805565e-05, "loss": 0.6885, "step": 99240 }, { "epoch": 1.1, "learning_rate": 3.168478902766705e-05, "loss": 0.7053, "step": 99245 }, { "epoch": 1.1, "learning_rate": 3.168386630052854e-05, "loss": 0.6712, "step": 99250 }, { "epoch": 1.1, "learning_rate": 3.168294357339003e-05, "loss": 0.6497, "step": 99255 }, { "epoch": 1.1, "learning_rate": 3.1682020846251516e-05, "loss": 0.7075, "step": 99260 }, { "epoch": 1.1, "learning_rate": 3.1681098119113004e-05, "loss": 0.6436, "step": 99265 }, { "epoch": 1.1, "learning_rate": 3.168017539197449e-05, "loss": 0.6752, "step": 99270 }, { "epoch": 1.1, "learning_rate": 3.167925266483598e-05, "loss": 0.6641, "step": 99275 }, { "epoch": 1.1, "learning_rate": 3.167832993769747e-05, "loss": 0.7042, "step": 99280 }, { "epoch": 1.1, "learning_rate": 3.167740721055895e-05, "loss": 0.6034, "step": 99285 }, { "epoch": 1.1, "learning_rate": 3.1676484483420436e-05, "loss": 0.6962, "step": 99290 }, { "epoch": 1.1, "learning_rate": 3.167556175628193e-05, "loss": 0.6734, "step": 99295 }, { "epoch": 1.1, "learning_rate": 3.167463902914342e-05, "loss": 0.6725, "step": 99300 }, { "epoch": 1.1, "learning_rate": 3.16737163020049e-05, "loss": 0.6569, "step": 99305 }, { "epoch": 1.1, "learning_rate": 3.167279357486639e-05, "loss": 0.6729, "step": 99310 }, { "epoch": 1.1, "learning_rate": 3.167187084772788e-05, "loss": 0.6559, "step": 99315 }, { "epoch": 1.1, "learning_rate": 3.167094812058936e-05, "loss": 0.6912, "step": 99320 }, { "epoch": 1.1, "learning_rate": 3.167002539345085e-05, "loss": 0.6766, "step": 99325 }, { "epoch": 1.1, "learning_rate": 3.166910266631234e-05, "loss": 0.6146, "step": 99330 }, { "epoch": 1.1, "learning_rate": 3.1668179939173834e-05, "loss": 0.695, "step": 99335 }, { "epoch": 1.1, "learning_rate": 3.1667257212035315e-05, "loss": 0.7255, "step": 99340 }, { "epoch": 1.1, "learning_rate": 3.16663344848968e-05, "loss": 0.6604, "step": 99345 }, { "epoch": 1.1, "learning_rate": 3.166541175775829e-05, "loss": 0.7115, "step": 99350 }, { "epoch": 1.1, "learning_rate": 3.166448903061978e-05, "loss": 0.6644, "step": 99355 }, { "epoch": 1.1, "learning_rate": 3.1663566303481266e-05, "loss": 0.6378, "step": 99360 }, { "epoch": 1.1, "learning_rate": 3.1662643576342754e-05, "loss": 0.6389, "step": 99365 }, { "epoch": 1.1, "learning_rate": 3.166172084920424e-05, "loss": 0.6915, "step": 99370 }, { "epoch": 1.1, "learning_rate": 3.166079812206573e-05, "loss": 0.6927, "step": 99375 }, { "epoch": 1.1, "learning_rate": 3.165987539492722e-05, "loss": 0.6893, "step": 99380 }, { "epoch": 1.1, "learning_rate": 3.1658952667788705e-05, "loss": 0.7633, "step": 99385 }, { "epoch": 1.1, "learning_rate": 3.165802994065019e-05, "loss": 0.6637, "step": 99390 }, { "epoch": 1.1, "learning_rate": 3.1657107213511674e-05, "loss": 0.6955, "step": 99395 }, { "epoch": 1.1, "learning_rate": 3.165618448637317e-05, "loss": 0.6841, "step": 99400 }, { "epoch": 1.1, "learning_rate": 3.165526175923466e-05, "loss": 0.6736, "step": 99405 }, { "epoch": 1.1, "learning_rate": 3.1654339032096145e-05, "loss": 0.7385, "step": 99410 }, { "epoch": 1.1, "learning_rate": 3.1653416304957626e-05, "loss": 0.6831, "step": 99415 }, { "epoch": 1.1, "learning_rate": 3.165249357781912e-05, "loss": 0.6583, "step": 99420 }, { "epoch": 1.1, "learning_rate": 3.165157085068061e-05, "loss": 0.64, "step": 99425 }, { "epoch": 1.1, "learning_rate": 3.1650648123542096e-05, "loss": 0.6105, "step": 99430 }, { "epoch": 1.1, "learning_rate": 3.164972539640358e-05, "loss": 0.6907, "step": 99435 }, { "epoch": 1.1, "learning_rate": 3.1648802669265065e-05, "loss": 0.6567, "step": 99440 }, { "epoch": 1.1, "learning_rate": 3.164787994212656e-05, "loss": 0.683, "step": 99445 }, { "epoch": 1.1, "learning_rate": 3.164695721498804e-05, "loss": 0.6993, "step": 99450 }, { "epoch": 1.1, "learning_rate": 3.164603448784953e-05, "loss": 0.6735, "step": 99455 }, { "epoch": 1.1, "learning_rate": 3.1645111760711016e-05, "loss": 0.6407, "step": 99460 }, { "epoch": 1.1, "learning_rate": 3.164418903357251e-05, "loss": 0.6179, "step": 99465 }, { "epoch": 1.1, "learning_rate": 3.164326630643399e-05, "loss": 0.6574, "step": 99470 }, { "epoch": 1.1, "learning_rate": 3.164234357929548e-05, "loss": 0.6396, "step": 99475 }, { "epoch": 1.1, "learning_rate": 3.164142085215697e-05, "loss": 0.6721, "step": 99480 }, { "epoch": 1.1, "learning_rate": 3.1640498125018456e-05, "loss": 0.641, "step": 99485 }, { "epoch": 1.1, "learning_rate": 3.1639575397879943e-05, "loss": 0.692, "step": 99490 }, { "epoch": 1.1, "learning_rate": 3.163865267074143e-05, "loss": 0.6619, "step": 99495 }, { "epoch": 1.1, "learning_rate": 3.163772994360292e-05, "loss": 0.7185, "step": 99500 }, { "epoch": 1.1, "learning_rate": 3.163680721646441e-05, "loss": 0.6961, "step": 99505 }, { "epoch": 1.1, "learning_rate": 3.1635884489325895e-05, "loss": 0.7282, "step": 99510 }, { "epoch": 1.1, "learning_rate": 3.163496176218738e-05, "loss": 0.6518, "step": 99515 }, { "epoch": 1.1, "learning_rate": 3.163403903504887e-05, "loss": 0.6503, "step": 99520 }, { "epoch": 1.1, "learning_rate": 3.163311630791035e-05, "loss": 0.6465, "step": 99525 }, { "epoch": 1.1, "learning_rate": 3.1632193580771846e-05, "loss": 0.6917, "step": 99530 }, { "epoch": 1.1, "learning_rate": 3.1631270853633334e-05, "loss": 0.6237, "step": 99535 }, { "epoch": 1.1, "learning_rate": 3.163034812649482e-05, "loss": 0.665, "step": 99540 }, { "epoch": 1.1, "learning_rate": 3.16294253993563e-05, "loss": 0.7768, "step": 99545 }, { "epoch": 1.1, "learning_rate": 3.16285026722178e-05, "loss": 0.6634, "step": 99550 }, { "epoch": 1.1, "learning_rate": 3.1627579945079285e-05, "loss": 0.6338, "step": 99555 }, { "epoch": 1.1, "learning_rate": 3.1626657217940767e-05, "loss": 0.6535, "step": 99560 }, { "epoch": 1.1, "learning_rate": 3.1625734490802254e-05, "loss": 0.7016, "step": 99565 }, { "epoch": 1.1, "learning_rate": 3.162481176366375e-05, "loss": 0.7422, "step": 99570 }, { "epoch": 1.1, "learning_rate": 3.162388903652524e-05, "loss": 0.6863, "step": 99575 }, { "epoch": 1.1, "learning_rate": 3.162296630938672e-05, "loss": 0.634, "step": 99580 }, { "epoch": 1.1, "learning_rate": 3.1622043582248206e-05, "loss": 0.6802, "step": 99585 }, { "epoch": 1.1, "learning_rate": 3.1621120855109694e-05, "loss": 0.632, "step": 99590 }, { "epoch": 1.1, "learning_rate": 3.162019812797118e-05, "loss": 0.6796, "step": 99595 }, { "epoch": 1.1, "learning_rate": 3.161927540083267e-05, "loss": 0.6251, "step": 99600 }, { "epoch": 1.1, "learning_rate": 3.161835267369416e-05, "loss": 0.6582, "step": 99605 }, { "epoch": 1.1, "learning_rate": 3.1617429946555645e-05, "loss": 0.6471, "step": 99610 }, { "epoch": 1.1, "learning_rate": 3.161650721941713e-05, "loss": 0.6619, "step": 99615 }, { "epoch": 1.1, "learning_rate": 3.161558449227862e-05, "loss": 0.6187, "step": 99620 }, { "epoch": 1.1, "learning_rate": 3.161466176514011e-05, "loss": 0.663, "step": 99625 }, { "epoch": 1.1, "learning_rate": 3.1613739038001596e-05, "loss": 0.7281, "step": 99630 }, { "epoch": 1.1, "learning_rate": 3.1612816310863084e-05, "loss": 0.6364, "step": 99635 }, { "epoch": 1.1, "learning_rate": 3.161189358372457e-05, "loss": 0.6852, "step": 99640 }, { "epoch": 1.1, "learning_rate": 3.161097085658606e-05, "loss": 0.6468, "step": 99645 }, { "epoch": 1.1, "learning_rate": 3.161004812944755e-05, "loss": 0.6975, "step": 99650 }, { "epoch": 1.1, "learning_rate": 3.1609125402309036e-05, "loss": 0.6813, "step": 99655 }, { "epoch": 1.1, "learning_rate": 3.1608202675170524e-05, "loss": 0.6648, "step": 99660 }, { "epoch": 1.1, "learning_rate": 3.160727994803201e-05, "loss": 0.6685, "step": 99665 }, { "epoch": 1.1, "learning_rate": 3.160635722089349e-05, "loss": 0.6382, "step": 99670 }, { "epoch": 1.1, "learning_rate": 3.160543449375498e-05, "loss": 0.7201, "step": 99675 }, { "epoch": 1.1, "learning_rate": 3.1604511766616475e-05, "loss": 0.7065, "step": 99680 }, { "epoch": 1.1, "learning_rate": 3.160358903947796e-05, "loss": 0.64, "step": 99685 }, { "epoch": 1.1, "learning_rate": 3.1602666312339444e-05, "loss": 0.6353, "step": 99690 }, { "epoch": 1.1, "learning_rate": 3.160174358520093e-05, "loss": 0.6493, "step": 99695 }, { "epoch": 1.1, "learning_rate": 3.1600820858062426e-05, "loss": 0.6833, "step": 99700 }, { "epoch": 1.1, "learning_rate": 3.159989813092391e-05, "loss": 0.7256, "step": 99705 }, { "epoch": 1.1, "learning_rate": 3.1598975403785395e-05, "loss": 0.6848, "step": 99710 }, { "epoch": 1.1, "learning_rate": 3.159805267664688e-05, "loss": 0.6125, "step": 99715 }, { "epoch": 1.1, "learning_rate": 3.159712994950838e-05, "loss": 0.7117, "step": 99720 }, { "epoch": 1.1, "learning_rate": 3.159620722236986e-05, "loss": 0.6697, "step": 99725 }, { "epoch": 1.1, "learning_rate": 3.1595284495231347e-05, "loss": 0.7105, "step": 99730 }, { "epoch": 1.1, "learning_rate": 3.1594361768092834e-05, "loss": 0.6874, "step": 99735 }, { "epoch": 1.1, "learning_rate": 3.159343904095432e-05, "loss": 0.6073, "step": 99740 }, { "epoch": 1.1, "learning_rate": 3.159251631381581e-05, "loss": 0.6417, "step": 99745 }, { "epoch": 1.1, "learning_rate": 3.15915935866773e-05, "loss": 0.7812, "step": 99750 }, { "epoch": 1.1, "learning_rate": 3.1590670859538786e-05, "loss": 0.6699, "step": 99755 }, { "epoch": 1.1, "learning_rate": 3.1589748132400274e-05, "loss": 0.6389, "step": 99760 }, { "epoch": 1.1, "learning_rate": 3.158882540526176e-05, "loss": 0.6533, "step": 99765 }, { "epoch": 1.1, "learning_rate": 3.158790267812325e-05, "loss": 0.674, "step": 99770 }, { "epoch": 1.1, "learning_rate": 3.158697995098474e-05, "loss": 0.6629, "step": 99775 }, { "epoch": 1.1, "learning_rate": 3.158605722384622e-05, "loss": 0.5986, "step": 99780 }, { "epoch": 1.1, "learning_rate": 3.158513449670771e-05, "loss": 0.6683, "step": 99785 }, { "epoch": 1.1, "learning_rate": 3.15842117695692e-05, "loss": 0.6609, "step": 99790 }, { "epoch": 1.11, "learning_rate": 3.158328904243069e-05, "loss": 0.6129, "step": 99795 }, { "epoch": 1.11, "learning_rate": 3.158236631529217e-05, "loss": 0.6894, "step": 99800 }, { "epoch": 1.11, "learning_rate": 3.1581443588153664e-05, "loss": 0.6465, "step": 99805 }, { "epoch": 1.11, "learning_rate": 3.158052086101515e-05, "loss": 0.6673, "step": 99810 }, { "epoch": 1.11, "learning_rate": 3.157959813387664e-05, "loss": 0.6125, "step": 99815 }, { "epoch": 1.11, "learning_rate": 3.157867540673812e-05, "loss": 0.676, "step": 99820 }, { "epoch": 1.11, "learning_rate": 3.157775267959961e-05, "loss": 0.7009, "step": 99825 }, { "epoch": 1.11, "learning_rate": 3.1576829952461104e-05, "loss": 0.7173, "step": 99830 }, { "epoch": 1.11, "learning_rate": 3.1575907225322585e-05, "loss": 0.5978, "step": 99835 }, { "epoch": 1.11, "learning_rate": 3.157498449818407e-05, "loss": 0.6986, "step": 99840 }, { "epoch": 1.11, "learning_rate": 3.157406177104556e-05, "loss": 0.709, "step": 99845 }, { "epoch": 1.11, "learning_rate": 3.1573139043907055e-05, "loss": 0.6635, "step": 99850 }, { "epoch": 1.11, "learning_rate": 3.1572216316768536e-05, "loss": 0.7022, "step": 99855 }, { "epoch": 1.11, "learning_rate": 3.1571293589630024e-05, "loss": 0.6372, "step": 99860 }, { "epoch": 1.11, "learning_rate": 3.157037086249151e-05, "loss": 0.6355, "step": 99865 }, { "epoch": 1.11, "learning_rate": 3.1569448135353e-05, "loss": 0.6293, "step": 99870 }, { "epoch": 1.11, "learning_rate": 3.156852540821449e-05, "loss": 0.6845, "step": 99875 }, { "epoch": 1.11, "learning_rate": 3.1567602681075975e-05, "loss": 0.6418, "step": 99880 }, { "epoch": 1.11, "learning_rate": 3.156667995393746e-05, "loss": 0.6274, "step": 99885 }, { "epoch": 1.11, "learning_rate": 3.156575722679895e-05, "loss": 0.6875, "step": 99890 }, { "epoch": 1.11, "learning_rate": 3.156483449966044e-05, "loss": 0.6578, "step": 99895 }, { "epoch": 1.11, "learning_rate": 3.156391177252193e-05, "loss": 0.672, "step": 99900 }, { "epoch": 1.11, "learning_rate": 3.1562989045383415e-05, "loss": 0.6376, "step": 99905 }, { "epoch": 1.11, "learning_rate": 3.1562066318244896e-05, "loss": 0.6764, "step": 99910 }, { "epoch": 1.11, "learning_rate": 3.156114359110639e-05, "loss": 0.6575, "step": 99915 }, { "epoch": 1.11, "learning_rate": 3.156022086396788e-05, "loss": 0.6966, "step": 99920 }, { "epoch": 1.11, "learning_rate": 3.1559298136829366e-05, "loss": 0.6739, "step": 99925 }, { "epoch": 1.11, "learning_rate": 3.155837540969085e-05, "loss": 0.6741, "step": 99930 }, { "epoch": 1.11, "learning_rate": 3.155745268255234e-05, "loss": 0.7083, "step": 99935 }, { "epoch": 1.11, "learning_rate": 3.155652995541383e-05, "loss": 0.7005, "step": 99940 }, { "epoch": 1.11, "learning_rate": 3.155560722827531e-05, "loss": 0.6796, "step": 99945 }, { "epoch": 1.11, "learning_rate": 3.15546845011368e-05, "loss": 0.642, "step": 99950 }, { "epoch": 1.11, "learning_rate": 3.155376177399829e-05, "loss": 0.6697, "step": 99955 }, { "epoch": 1.11, "learning_rate": 3.155283904685978e-05, "loss": 0.6743, "step": 99960 }, { "epoch": 1.11, "learning_rate": 3.155191631972126e-05, "loss": 0.6643, "step": 99965 }, { "epoch": 1.11, "learning_rate": 3.155099359258275e-05, "loss": 0.6548, "step": 99970 }, { "epoch": 1.11, "learning_rate": 3.155007086544424e-05, "loss": 0.7061, "step": 99975 }, { "epoch": 1.11, "learning_rate": 3.1549148138305725e-05, "loss": 0.6199, "step": 99980 }, { "epoch": 1.11, "learning_rate": 3.154822541116721e-05, "loss": 0.6293, "step": 99985 }, { "epoch": 1.11, "learning_rate": 3.15473026840287e-05, "loss": 0.703, "step": 99990 }, { "epoch": 1.11, "learning_rate": 3.154637995689019e-05, "loss": 0.6918, "step": 99995 }, { "epoch": 1.11, "learning_rate": 3.154545722975168e-05, "loss": 0.6346, "step": 100000 }, { "epoch": 1.11, "eval_loss": 0.647095799446106, "eval_runtime": 70.4989, "eval_samples_per_second": 28.369, "eval_steps_per_second": 14.185, "step": 100000 }, { "epoch": 1.11, "learning_rate": 3.1544534502613165e-05, "loss": 0.6648, "step": 100005 }, { "epoch": 1.11, "learning_rate": 3.154361177547465e-05, "loss": 0.6758, "step": 100010 }, { "epoch": 1.11, "learning_rate": 3.154268904833614e-05, "loss": 0.6217, "step": 100015 }, { "epoch": 1.11, "learning_rate": 3.154176632119763e-05, "loss": 0.6449, "step": 100020 }, { "epoch": 1.11, "learning_rate": 3.1540843594059116e-05, "loss": 0.6711, "step": 100025 }, { "epoch": 1.11, "learning_rate": 3.1539920866920604e-05, "loss": 0.6347, "step": 100030 }, { "epoch": 1.11, "learning_rate": 3.153899813978209e-05, "loss": 0.6992, "step": 100035 }, { "epoch": 1.11, "learning_rate": 3.153807541264357e-05, "loss": 0.6241, "step": 100040 }, { "epoch": 1.11, "learning_rate": 3.153715268550507e-05, "loss": 0.6652, "step": 100045 }, { "epoch": 1.11, "learning_rate": 3.1536229958366555e-05, "loss": 0.6771, "step": 100050 }, { "epoch": 1.11, "learning_rate": 3.1535307231228036e-05, "loss": 0.6852, "step": 100055 }, { "epoch": 1.11, "learning_rate": 3.1534384504089524e-05, "loss": 0.7166, "step": 100060 }, { "epoch": 1.11, "learning_rate": 3.153346177695102e-05, "loss": 0.6725, "step": 100065 }, { "epoch": 1.11, "learning_rate": 3.153253904981251e-05, "loss": 0.7147, "step": 100070 }, { "epoch": 1.11, "learning_rate": 3.153161632267399e-05, "loss": 0.6295, "step": 100075 }, { "epoch": 1.11, "learning_rate": 3.1530693595535476e-05, "loss": 0.6532, "step": 100080 }, { "epoch": 1.11, "learning_rate": 3.152977086839697e-05, "loss": 0.6933, "step": 100085 }, { "epoch": 1.11, "learning_rate": 3.152884814125845e-05, "loss": 0.6444, "step": 100090 }, { "epoch": 1.11, "learning_rate": 3.152792541411994e-05, "loss": 0.6254, "step": 100095 }, { "epoch": 1.11, "learning_rate": 3.152700268698143e-05, "loss": 0.7434, "step": 100100 }, { "epoch": 1.11, "learning_rate": 3.152607995984292e-05, "loss": 0.7529, "step": 100105 }, { "epoch": 1.11, "learning_rate": 3.15251572327044e-05, "loss": 0.7255, "step": 100110 }, { "epoch": 1.11, "learning_rate": 3.152423450556589e-05, "loss": 0.6568, "step": 100115 }, { "epoch": 1.11, "learning_rate": 3.152331177842738e-05, "loss": 0.6413, "step": 100120 }, { "epoch": 1.11, "learning_rate": 3.1522389051288866e-05, "loss": 0.6801, "step": 100125 }, { "epoch": 1.11, "learning_rate": 3.1521466324150354e-05, "loss": 0.673, "step": 100130 }, { "epoch": 1.11, "learning_rate": 3.152054359701184e-05, "loss": 0.6881, "step": 100135 }, { "epoch": 1.11, "learning_rate": 3.151962086987333e-05, "loss": 0.7226, "step": 100140 }, { "epoch": 1.11, "learning_rate": 3.151869814273482e-05, "loss": 0.6552, "step": 100145 }, { "epoch": 1.11, "learning_rate": 3.1517775415596306e-05, "loss": 0.613, "step": 100150 }, { "epoch": 1.11, "learning_rate": 3.1516852688457793e-05, "loss": 0.6582, "step": 100155 }, { "epoch": 1.11, "learning_rate": 3.151592996131928e-05, "loss": 0.6786, "step": 100160 }, { "epoch": 1.11, "learning_rate": 3.151500723418076e-05, "loss": 0.7467, "step": 100165 }, { "epoch": 1.11, "learning_rate": 3.151408450704226e-05, "loss": 0.7021, "step": 100170 }, { "epoch": 1.11, "learning_rate": 3.1513161779903745e-05, "loss": 0.745, "step": 100175 }, { "epoch": 1.11, "learning_rate": 3.151223905276523e-05, "loss": 0.6248, "step": 100180 }, { "epoch": 1.11, "learning_rate": 3.1511316325626714e-05, "loss": 0.6723, "step": 100185 }, { "epoch": 1.11, "learning_rate": 3.15103935984882e-05, "loss": 0.6891, "step": 100190 }, { "epoch": 1.11, "learning_rate": 3.1509470871349696e-05, "loss": 0.6496, "step": 100195 }, { "epoch": 1.11, "learning_rate": 3.1508548144211184e-05, "loss": 0.7171, "step": 100200 }, { "epoch": 1.11, "learning_rate": 3.1507625417072665e-05, "loss": 0.6731, "step": 100205 }, { "epoch": 1.11, "learning_rate": 3.150670268993415e-05, "loss": 0.7013, "step": 100210 }, { "epoch": 1.11, "learning_rate": 3.150577996279565e-05, "loss": 0.6634, "step": 100215 }, { "epoch": 1.11, "learning_rate": 3.150485723565713e-05, "loss": 0.7104, "step": 100220 }, { "epoch": 1.11, "learning_rate": 3.1503934508518617e-05, "loss": 0.6556, "step": 100225 }, { "epoch": 1.11, "learning_rate": 3.1503011781380104e-05, "loss": 0.6405, "step": 100230 }, { "epoch": 1.11, "learning_rate": 3.15020890542416e-05, "loss": 0.7342, "step": 100235 }, { "epoch": 1.11, "learning_rate": 3.150116632710308e-05, "loss": 0.691, "step": 100240 }, { "epoch": 1.11, "learning_rate": 3.150024359996457e-05, "loss": 0.6637, "step": 100245 }, { "epoch": 1.11, "learning_rate": 3.1499320872826056e-05, "loss": 0.6872, "step": 100250 }, { "epoch": 1.11, "learning_rate": 3.1498398145687544e-05, "loss": 0.6859, "step": 100255 }, { "epoch": 1.11, "learning_rate": 3.149747541854903e-05, "loss": 0.6931, "step": 100260 }, { "epoch": 1.11, "learning_rate": 3.149655269141052e-05, "loss": 0.6809, "step": 100265 }, { "epoch": 1.11, "learning_rate": 3.149562996427201e-05, "loss": 0.6698, "step": 100270 }, { "epoch": 1.11, "learning_rate": 3.1494707237133495e-05, "loss": 0.6908, "step": 100275 }, { "epoch": 1.11, "learning_rate": 3.149378450999498e-05, "loss": 0.7176, "step": 100280 }, { "epoch": 1.11, "learning_rate": 3.149286178285647e-05, "loss": 0.6663, "step": 100285 }, { "epoch": 1.11, "learning_rate": 3.149193905571796e-05, "loss": 0.6694, "step": 100290 }, { "epoch": 1.11, "learning_rate": 3.149101632857944e-05, "loss": 0.6456, "step": 100295 }, { "epoch": 1.11, "learning_rate": 3.1490093601440934e-05, "loss": 0.6537, "step": 100300 }, { "epoch": 1.11, "learning_rate": 3.148917087430242e-05, "loss": 0.6155, "step": 100305 }, { "epoch": 1.11, "learning_rate": 3.148824814716391e-05, "loss": 0.7323, "step": 100310 }, { "epoch": 1.11, "learning_rate": 3.148732542002539e-05, "loss": 0.6882, "step": 100315 }, { "epoch": 1.11, "learning_rate": 3.1486402692886886e-05, "loss": 0.6132, "step": 100320 }, { "epoch": 1.11, "learning_rate": 3.1485479965748373e-05, "loss": 0.7148, "step": 100325 }, { "epoch": 1.11, "learning_rate": 3.1484557238609855e-05, "loss": 0.6263, "step": 100330 }, { "epoch": 1.11, "learning_rate": 3.148363451147134e-05, "loss": 0.6645, "step": 100335 }, { "epoch": 1.11, "learning_rate": 3.148271178433284e-05, "loss": 0.697, "step": 100340 }, { "epoch": 1.11, "learning_rate": 3.1481789057194325e-05, "loss": 0.6937, "step": 100345 }, { "epoch": 1.11, "learning_rate": 3.1480866330055806e-05, "loss": 0.6612, "step": 100350 }, { "epoch": 1.11, "learning_rate": 3.1479943602917294e-05, "loss": 0.6543, "step": 100355 }, { "epoch": 1.11, "learning_rate": 3.147902087577878e-05, "loss": 0.739, "step": 100360 }, { "epoch": 1.11, "learning_rate": 3.147809814864027e-05, "loss": 0.7002, "step": 100365 }, { "epoch": 1.11, "learning_rate": 3.147717542150176e-05, "loss": 0.6458, "step": 100370 }, { "epoch": 1.11, "learning_rate": 3.1476252694363245e-05, "loss": 0.6659, "step": 100375 }, { "epoch": 1.11, "learning_rate": 3.147532996722473e-05, "loss": 0.6387, "step": 100380 }, { "epoch": 1.11, "learning_rate": 3.147440724008622e-05, "loss": 0.652, "step": 100385 }, { "epoch": 1.11, "learning_rate": 3.147348451294771e-05, "loss": 0.6882, "step": 100390 }, { "epoch": 1.11, "learning_rate": 3.1472561785809197e-05, "loss": 0.6937, "step": 100395 }, { "epoch": 1.11, "learning_rate": 3.1471639058670684e-05, "loss": 0.6147, "step": 100400 }, { "epoch": 1.11, "learning_rate": 3.147071633153217e-05, "loss": 0.6524, "step": 100405 }, { "epoch": 1.11, "learning_rate": 3.146979360439366e-05, "loss": 0.6558, "step": 100410 }, { "epoch": 1.11, "learning_rate": 3.146887087725515e-05, "loss": 0.6653, "step": 100415 }, { "epoch": 1.11, "learning_rate": 3.1467948150116636e-05, "loss": 0.6364, "step": 100420 }, { "epoch": 1.11, "learning_rate": 3.146702542297812e-05, "loss": 0.6574, "step": 100425 }, { "epoch": 1.11, "learning_rate": 3.146610269583961e-05, "loss": 0.6754, "step": 100430 }, { "epoch": 1.11, "learning_rate": 3.14651799687011e-05, "loss": 0.6177, "step": 100435 }, { "epoch": 1.11, "learning_rate": 3.146425724156258e-05, "loss": 0.6546, "step": 100440 }, { "epoch": 1.11, "learning_rate": 3.146333451442407e-05, "loss": 0.6896, "step": 100445 }, { "epoch": 1.11, "learning_rate": 3.146241178728556e-05, "loss": 0.6158, "step": 100450 }, { "epoch": 1.11, "learning_rate": 3.146148906014705e-05, "loss": 0.7158, "step": 100455 }, { "epoch": 1.11, "learning_rate": 3.146056633300853e-05, "loss": 0.6969, "step": 100460 }, { "epoch": 1.11, "learning_rate": 3.145964360587002e-05, "loss": 0.7226, "step": 100465 }, { "epoch": 1.11, "learning_rate": 3.1458720878731514e-05, "loss": 0.6478, "step": 100470 }, { "epoch": 1.11, "learning_rate": 3.1457798151592995e-05, "loss": 0.6275, "step": 100475 }, { "epoch": 1.11, "learning_rate": 3.145687542445448e-05, "loss": 0.6282, "step": 100480 }, { "epoch": 1.11, "learning_rate": 3.145595269731597e-05, "loss": 0.6803, "step": 100485 }, { "epoch": 1.11, "learning_rate": 3.1455029970177466e-05, "loss": 0.6438, "step": 100490 }, { "epoch": 1.11, "learning_rate": 3.145410724303895e-05, "loss": 0.644, "step": 100495 }, { "epoch": 1.11, "learning_rate": 3.1453184515900435e-05, "loss": 0.6991, "step": 100500 }, { "epoch": 1.11, "learning_rate": 3.145226178876192e-05, "loss": 0.6819, "step": 100505 }, { "epoch": 1.11, "learning_rate": 3.145133906162341e-05, "loss": 0.6196, "step": 100510 }, { "epoch": 1.11, "learning_rate": 3.14504163344849e-05, "loss": 0.6708, "step": 100515 }, { "epoch": 1.11, "learning_rate": 3.1449493607346386e-05, "loss": 0.646, "step": 100520 }, { "epoch": 1.11, "learning_rate": 3.1448570880207874e-05, "loss": 0.7164, "step": 100525 }, { "epoch": 1.11, "learning_rate": 3.144764815306936e-05, "loss": 0.7262, "step": 100530 }, { "epoch": 1.11, "learning_rate": 3.144672542593085e-05, "loss": 0.6102, "step": 100535 }, { "epoch": 1.11, "learning_rate": 3.144580269879234e-05, "loss": 0.6736, "step": 100540 }, { "epoch": 1.11, "learning_rate": 3.1444879971653825e-05, "loss": 0.6937, "step": 100545 }, { "epoch": 1.11, "learning_rate": 3.1443957244515306e-05, "loss": 0.6728, "step": 100550 }, { "epoch": 1.11, "learning_rate": 3.14430345173768e-05, "loss": 0.6176, "step": 100555 }, { "epoch": 1.11, "learning_rate": 3.144211179023829e-05, "loss": 0.6928, "step": 100560 }, { "epoch": 1.11, "learning_rate": 3.144118906309978e-05, "loss": 0.6811, "step": 100565 }, { "epoch": 1.11, "learning_rate": 3.144026633596126e-05, "loss": 0.711, "step": 100570 }, { "epoch": 1.11, "learning_rate": 3.1439343608822746e-05, "loss": 0.7064, "step": 100575 }, { "epoch": 1.11, "learning_rate": 3.143842088168424e-05, "loss": 0.663, "step": 100580 }, { "epoch": 1.11, "learning_rate": 3.143749815454573e-05, "loss": 0.7147, "step": 100585 }, { "epoch": 1.11, "learning_rate": 3.143657542740721e-05, "loss": 0.6811, "step": 100590 }, { "epoch": 1.11, "learning_rate": 3.14356527002687e-05, "loss": 0.7387, "step": 100595 }, { "epoch": 1.11, "learning_rate": 3.143472997313019e-05, "loss": 0.6532, "step": 100600 }, { "epoch": 1.11, "learning_rate": 3.143380724599167e-05, "loss": 0.7018, "step": 100605 }, { "epoch": 1.11, "learning_rate": 3.143288451885316e-05, "loss": 0.6933, "step": 100610 }, { "epoch": 1.11, "learning_rate": 3.143196179171465e-05, "loss": 0.6648, "step": 100615 }, { "epoch": 1.11, "learning_rate": 3.143103906457614e-05, "loss": 0.6414, "step": 100620 }, { "epoch": 1.11, "learning_rate": 3.1430116337437624e-05, "loss": 0.6481, "step": 100625 }, { "epoch": 1.11, "learning_rate": 3.142919361029911e-05, "loss": 0.6286, "step": 100630 }, { "epoch": 1.11, "learning_rate": 3.14282708831606e-05, "loss": 0.6632, "step": 100635 }, { "epoch": 1.11, "learning_rate": 3.142734815602209e-05, "loss": 0.6896, "step": 100640 }, { "epoch": 1.11, "learning_rate": 3.1426425428883575e-05, "loss": 0.6511, "step": 100645 }, { "epoch": 1.11, "learning_rate": 3.142550270174506e-05, "loss": 0.612, "step": 100650 }, { "epoch": 1.11, "learning_rate": 3.142457997460655e-05, "loss": 0.6328, "step": 100655 }, { "epoch": 1.11, "learning_rate": 3.142365724746804e-05, "loss": 0.6442, "step": 100660 }, { "epoch": 1.11, "learning_rate": 3.142273452032953e-05, "loss": 0.6117, "step": 100665 }, { "epoch": 1.11, "learning_rate": 3.1421811793191015e-05, "loss": 0.6787, "step": 100670 }, { "epoch": 1.11, "learning_rate": 3.14208890660525e-05, "loss": 0.6783, "step": 100675 }, { "epoch": 1.11, "learning_rate": 3.1419966338913984e-05, "loss": 0.6809, "step": 100680 }, { "epoch": 1.11, "learning_rate": 3.141904361177548e-05, "loss": 0.6604, "step": 100685 }, { "epoch": 1.11, "learning_rate": 3.1418120884636966e-05, "loss": 0.611, "step": 100690 }, { "epoch": 1.11, "learning_rate": 3.1417198157498454e-05, "loss": 0.7103, "step": 100695 }, { "epoch": 1.12, "learning_rate": 3.1416275430359935e-05, "loss": 0.6988, "step": 100700 }, { "epoch": 1.12, "learning_rate": 3.141535270322143e-05, "loss": 0.6469, "step": 100705 }, { "epoch": 1.12, "learning_rate": 3.141442997608292e-05, "loss": 0.6662, "step": 100710 }, { "epoch": 1.12, "learning_rate": 3.14135072489444e-05, "loss": 0.6386, "step": 100715 }, { "epoch": 1.12, "learning_rate": 3.1412584521805886e-05, "loss": 0.6953, "step": 100720 }, { "epoch": 1.12, "learning_rate": 3.1411661794667374e-05, "loss": 0.635, "step": 100725 }, { "epoch": 1.12, "learning_rate": 3.141073906752887e-05, "loss": 0.6152, "step": 100730 }, { "epoch": 1.12, "learning_rate": 3.140981634039035e-05, "loss": 0.6221, "step": 100735 }, { "epoch": 1.12, "learning_rate": 3.140889361325184e-05, "loss": 0.6864, "step": 100740 }, { "epoch": 1.12, "learning_rate": 3.1407970886113326e-05, "loss": 0.6768, "step": 100745 }, { "epoch": 1.12, "learning_rate": 3.1407048158974814e-05, "loss": 0.651, "step": 100750 }, { "epoch": 1.12, "learning_rate": 3.14061254318363e-05, "loss": 0.7118, "step": 100755 }, { "epoch": 1.12, "learning_rate": 3.140520270469779e-05, "loss": 0.7341, "step": 100760 }, { "epoch": 1.12, "learning_rate": 3.140427997755928e-05, "loss": 0.7436, "step": 100765 }, { "epoch": 1.12, "learning_rate": 3.1403357250420765e-05, "loss": 0.6843, "step": 100770 }, { "epoch": 1.12, "learning_rate": 3.140243452328225e-05, "loss": 0.6965, "step": 100775 }, { "epoch": 1.12, "learning_rate": 3.140151179614374e-05, "loss": 0.7315, "step": 100780 }, { "epoch": 1.12, "learning_rate": 3.140058906900523e-05, "loss": 0.6536, "step": 100785 }, { "epoch": 1.12, "learning_rate": 3.1399666341866716e-05, "loss": 0.684, "step": 100790 }, { "epoch": 1.12, "learning_rate": 3.1398743614728204e-05, "loss": 0.6427, "step": 100795 }, { "epoch": 1.12, "learning_rate": 3.139782088758969e-05, "loss": 0.647, "step": 100800 }, { "epoch": 1.12, "learning_rate": 3.139689816045118e-05, "loss": 0.6971, "step": 100805 }, { "epoch": 1.12, "learning_rate": 3.139597543331266e-05, "loss": 0.6271, "step": 100810 }, { "epoch": 1.12, "learning_rate": 3.1395052706174156e-05, "loss": 0.6634, "step": 100815 }, { "epoch": 1.12, "learning_rate": 3.1394129979035643e-05, "loss": 0.7157, "step": 100820 }, { "epoch": 1.12, "learning_rate": 3.1393207251897124e-05, "loss": 0.7045, "step": 100825 }, { "epoch": 1.12, "learning_rate": 3.139228452475861e-05, "loss": 0.7087, "step": 100830 }, { "epoch": 1.12, "learning_rate": 3.139136179762011e-05, "loss": 0.6313, "step": 100835 }, { "epoch": 1.12, "learning_rate": 3.1390439070481595e-05, "loss": 0.6887, "step": 100840 }, { "epoch": 1.12, "learning_rate": 3.1389516343343076e-05, "loss": 0.6385, "step": 100845 }, { "epoch": 1.12, "learning_rate": 3.1388593616204564e-05, "loss": 0.6308, "step": 100850 }, { "epoch": 1.12, "learning_rate": 3.138767088906606e-05, "loss": 0.6914, "step": 100855 }, { "epoch": 1.12, "learning_rate": 3.138674816192754e-05, "loss": 0.6106, "step": 100860 }, { "epoch": 1.12, "learning_rate": 3.138582543478903e-05, "loss": 0.6714, "step": 100865 }, { "epoch": 1.12, "learning_rate": 3.1384902707650515e-05, "loss": 0.6472, "step": 100870 }, { "epoch": 1.12, "learning_rate": 3.1383979980512e-05, "loss": 0.6881, "step": 100875 }, { "epoch": 1.12, "learning_rate": 3.138305725337349e-05, "loss": 0.6954, "step": 100880 }, { "epoch": 1.12, "learning_rate": 3.138213452623498e-05, "loss": 0.6341, "step": 100885 }, { "epoch": 1.12, "learning_rate": 3.1381211799096467e-05, "loss": 0.6578, "step": 100890 }, { "epoch": 1.12, "learning_rate": 3.1380289071957954e-05, "loss": 0.7035, "step": 100895 }, { "epoch": 1.12, "learning_rate": 3.137936634481944e-05, "loss": 0.6981, "step": 100900 }, { "epoch": 1.12, "learning_rate": 3.137844361768093e-05, "loss": 0.658, "step": 100905 }, { "epoch": 1.12, "learning_rate": 3.137752089054242e-05, "loss": 0.7113, "step": 100910 }, { "epoch": 1.12, "learning_rate": 3.1376598163403906e-05, "loss": 0.7205, "step": 100915 }, { "epoch": 1.12, "learning_rate": 3.1375675436265394e-05, "loss": 0.6534, "step": 100920 }, { "epoch": 1.12, "learning_rate": 3.137475270912688e-05, "loss": 0.6205, "step": 100925 }, { "epoch": 1.12, "learning_rate": 3.137382998198837e-05, "loss": 0.6644, "step": 100930 }, { "epoch": 1.12, "learning_rate": 3.137290725484985e-05, "loss": 0.6411, "step": 100935 }, { "epoch": 1.12, "learning_rate": 3.1371984527711345e-05, "loss": 0.7124, "step": 100940 }, { "epoch": 1.12, "learning_rate": 3.137106180057283e-05, "loss": 0.6918, "step": 100945 }, { "epoch": 1.12, "learning_rate": 3.137013907343432e-05, "loss": 0.6651, "step": 100950 }, { "epoch": 1.12, "learning_rate": 3.13692163462958e-05, "loss": 0.6875, "step": 100955 }, { "epoch": 1.12, "learning_rate": 3.136829361915729e-05, "loss": 0.7006, "step": 100960 }, { "epoch": 1.12, "learning_rate": 3.1367370892018784e-05, "loss": 0.658, "step": 100965 }, { "epoch": 1.12, "learning_rate": 3.136644816488027e-05, "loss": 0.6594, "step": 100970 }, { "epoch": 1.12, "learning_rate": 3.136552543774175e-05, "loss": 0.6906, "step": 100975 }, { "epoch": 1.12, "learning_rate": 3.136460271060324e-05, "loss": 0.6908, "step": 100980 }, { "epoch": 1.12, "learning_rate": 3.1363679983464736e-05, "loss": 0.7291, "step": 100985 }, { "epoch": 1.12, "learning_rate": 3.136275725632622e-05, "loss": 0.6628, "step": 100990 }, { "epoch": 1.12, "learning_rate": 3.1361834529187705e-05, "loss": 0.6508, "step": 100995 }, { "epoch": 1.12, "learning_rate": 3.136091180204919e-05, "loss": 0.6203, "step": 101000 }, { "epoch": 1.12, "eval_loss": 0.6655250191688538, "eval_runtime": 70.3412, "eval_samples_per_second": 28.433, "eval_steps_per_second": 14.216, "step": 101000 }, { "epoch": 1.12, "learning_rate": 3.135998907491069e-05, "loss": 0.7801, "step": 101005 }, { "epoch": 1.12, "learning_rate": 3.135906634777217e-05, "loss": 0.667, "step": 101010 }, { "epoch": 1.12, "learning_rate": 3.1358143620633656e-05, "loss": 0.6573, "step": 101015 }, { "epoch": 1.12, "learning_rate": 3.1357220893495144e-05, "loss": 0.6561, "step": 101020 }, { "epoch": 1.12, "learning_rate": 3.135629816635663e-05, "loss": 0.6959, "step": 101025 }, { "epoch": 1.12, "learning_rate": 3.135537543921812e-05, "loss": 0.7543, "step": 101030 }, { "epoch": 1.12, "learning_rate": 3.135445271207961e-05, "loss": 0.6451, "step": 101035 }, { "epoch": 1.12, "learning_rate": 3.1353529984941095e-05, "loss": 0.6407, "step": 101040 }, { "epoch": 1.12, "learning_rate": 3.135260725780258e-05, "loss": 0.6645, "step": 101045 }, { "epoch": 1.12, "learning_rate": 3.135168453066407e-05, "loss": 0.653, "step": 101050 }, { "epoch": 1.12, "learning_rate": 3.135076180352556e-05, "loss": 0.6741, "step": 101055 }, { "epoch": 1.12, "learning_rate": 3.1349839076387047e-05, "loss": 0.7199, "step": 101060 }, { "epoch": 1.12, "learning_rate": 3.134891634924853e-05, "loss": 0.6751, "step": 101065 }, { "epoch": 1.12, "learning_rate": 3.134799362211002e-05, "loss": 0.5985, "step": 101070 }, { "epoch": 1.12, "learning_rate": 3.134707089497151e-05, "loss": 0.6542, "step": 101075 }, { "epoch": 1.12, "learning_rate": 3.1346148167833e-05, "loss": 0.6759, "step": 101080 }, { "epoch": 1.12, "learning_rate": 3.134522544069448e-05, "loss": 0.6374, "step": 101085 }, { "epoch": 1.12, "learning_rate": 3.1344302713555974e-05, "loss": 0.623, "step": 101090 }, { "epoch": 1.12, "learning_rate": 3.134337998641746e-05, "loss": 0.6795, "step": 101095 }, { "epoch": 1.12, "learning_rate": 3.134245725927894e-05, "loss": 0.6828, "step": 101100 }, { "epoch": 1.12, "learning_rate": 3.134153453214043e-05, "loss": 0.6882, "step": 101105 }, { "epoch": 1.12, "learning_rate": 3.134061180500192e-05, "loss": 0.683, "step": 101110 }, { "epoch": 1.12, "learning_rate": 3.133968907786341e-05, "loss": 0.6878, "step": 101115 }, { "epoch": 1.12, "learning_rate": 3.1338766350724894e-05, "loss": 0.6754, "step": 101120 }, { "epoch": 1.12, "learning_rate": 3.133784362358638e-05, "loss": 0.6134, "step": 101125 }, { "epoch": 1.12, "learning_rate": 3.133692089644787e-05, "loss": 0.7099, "step": 101130 }, { "epoch": 1.12, "learning_rate": 3.133599816930936e-05, "loss": 0.6581, "step": 101135 }, { "epoch": 1.12, "learning_rate": 3.1335075442170845e-05, "loss": 0.6731, "step": 101140 }, { "epoch": 1.12, "learning_rate": 3.133415271503233e-05, "loss": 0.7095, "step": 101145 }, { "epoch": 1.12, "learning_rate": 3.133322998789382e-05, "loss": 0.6844, "step": 101150 }, { "epoch": 1.12, "learning_rate": 3.133230726075531e-05, "loss": 0.6783, "step": 101155 }, { "epoch": 1.12, "learning_rate": 3.13313845336168e-05, "loss": 0.6506, "step": 101160 }, { "epoch": 1.12, "learning_rate": 3.1330461806478285e-05, "loss": 0.6828, "step": 101165 }, { "epoch": 1.12, "learning_rate": 3.132953907933977e-05, "loss": 0.6319, "step": 101170 }, { "epoch": 1.12, "learning_rate": 3.132861635220126e-05, "loss": 0.6566, "step": 101175 }, { "epoch": 1.12, "learning_rate": 3.132769362506275e-05, "loss": 0.6697, "step": 101180 }, { "epoch": 1.12, "learning_rate": 3.1326770897924236e-05, "loss": 0.695, "step": 101185 }, { "epoch": 1.12, "learning_rate": 3.1325848170785724e-05, "loss": 0.636, "step": 101190 }, { "epoch": 1.12, "learning_rate": 3.1324925443647205e-05, "loss": 0.7068, "step": 101195 }, { "epoch": 1.12, "learning_rate": 3.13240027165087e-05, "loss": 0.7339, "step": 101200 }, { "epoch": 1.12, "learning_rate": 3.132307998937019e-05, "loss": 0.6804, "step": 101205 }, { "epoch": 1.12, "learning_rate": 3.132215726223167e-05, "loss": 0.6835, "step": 101210 }, { "epoch": 1.12, "learning_rate": 3.1321234535093156e-05, "loss": 0.6568, "step": 101215 }, { "epoch": 1.12, "learning_rate": 3.132031180795465e-05, "loss": 0.6831, "step": 101220 }, { "epoch": 1.12, "learning_rate": 3.131938908081614e-05, "loss": 0.6714, "step": 101225 }, { "epoch": 1.12, "learning_rate": 3.131846635367762e-05, "loss": 0.6741, "step": 101230 }, { "epoch": 1.12, "learning_rate": 3.131754362653911e-05, "loss": 0.7126, "step": 101235 }, { "epoch": 1.12, "learning_rate": 3.13166208994006e-05, "loss": 0.64, "step": 101240 }, { "epoch": 1.12, "learning_rate": 3.1315698172262083e-05, "loss": 0.7106, "step": 101245 }, { "epoch": 1.12, "learning_rate": 3.131477544512357e-05, "loss": 0.6953, "step": 101250 }, { "epoch": 1.12, "learning_rate": 3.131385271798506e-05, "loss": 0.696, "step": 101255 }, { "epoch": 1.12, "learning_rate": 3.131292999084655e-05, "loss": 0.6306, "step": 101260 }, { "epoch": 1.12, "learning_rate": 3.1312007263708035e-05, "loss": 0.6869, "step": 101265 }, { "epoch": 1.12, "learning_rate": 3.131108453656952e-05, "loss": 0.6711, "step": 101270 }, { "epoch": 1.12, "learning_rate": 3.131016180943101e-05, "loss": 0.6347, "step": 101275 }, { "epoch": 1.12, "learning_rate": 3.13092390822925e-05, "loss": 0.7049, "step": 101280 }, { "epoch": 1.12, "learning_rate": 3.1308316355153986e-05, "loss": 0.6887, "step": 101285 }, { "epoch": 1.12, "learning_rate": 3.1307393628015474e-05, "loss": 0.6837, "step": 101290 }, { "epoch": 1.12, "learning_rate": 3.130647090087696e-05, "loss": 0.6657, "step": 101295 }, { "epoch": 1.12, "learning_rate": 3.130554817373845e-05, "loss": 0.6431, "step": 101300 }, { "epoch": 1.12, "learning_rate": 3.130462544659994e-05, "loss": 0.6615, "step": 101305 }, { "epoch": 1.12, "learning_rate": 3.1303702719461425e-05, "loss": 0.6879, "step": 101310 }, { "epoch": 1.12, "learning_rate": 3.130277999232291e-05, "loss": 0.6923, "step": 101315 }, { "epoch": 1.12, "learning_rate": 3.1301857265184394e-05, "loss": 0.7182, "step": 101320 }, { "epoch": 1.12, "learning_rate": 3.130093453804589e-05, "loss": 0.6845, "step": 101325 }, { "epoch": 1.12, "learning_rate": 3.130001181090738e-05, "loss": 0.7065, "step": 101330 }, { "epoch": 1.12, "learning_rate": 3.1299089083768865e-05, "loss": 0.7066, "step": 101335 }, { "epoch": 1.12, "learning_rate": 3.1298166356630346e-05, "loss": 0.6303, "step": 101340 }, { "epoch": 1.12, "learning_rate": 3.1297243629491834e-05, "loss": 0.7633, "step": 101345 }, { "epoch": 1.12, "learning_rate": 3.129632090235333e-05, "loss": 0.6641, "step": 101350 }, { "epoch": 1.12, "learning_rate": 3.1295398175214816e-05, "loss": 0.6439, "step": 101355 }, { "epoch": 1.12, "learning_rate": 3.12944754480763e-05, "loss": 0.7081, "step": 101360 }, { "epoch": 1.12, "learning_rate": 3.1293552720937785e-05, "loss": 0.6727, "step": 101365 }, { "epoch": 1.12, "learning_rate": 3.129262999379928e-05, "loss": 0.7033, "step": 101370 }, { "epoch": 1.12, "learning_rate": 3.129170726666076e-05, "loss": 0.6887, "step": 101375 }, { "epoch": 1.12, "learning_rate": 3.129078453952225e-05, "loss": 0.678, "step": 101380 }, { "epoch": 1.12, "learning_rate": 3.1289861812383736e-05, "loss": 0.6846, "step": 101385 }, { "epoch": 1.12, "learning_rate": 3.128893908524523e-05, "loss": 0.6791, "step": 101390 }, { "epoch": 1.12, "learning_rate": 3.128801635810671e-05, "loss": 0.7007, "step": 101395 }, { "epoch": 1.12, "learning_rate": 3.12870936309682e-05, "loss": 0.641, "step": 101400 }, { "epoch": 1.12, "learning_rate": 3.128617090382969e-05, "loss": 0.6373, "step": 101405 }, { "epoch": 1.12, "learning_rate": 3.1285248176691176e-05, "loss": 0.6977, "step": 101410 }, { "epoch": 1.12, "learning_rate": 3.1284325449552664e-05, "loss": 0.6208, "step": 101415 }, { "epoch": 1.12, "learning_rate": 3.128340272241415e-05, "loss": 0.6618, "step": 101420 }, { "epoch": 1.12, "learning_rate": 3.128247999527564e-05, "loss": 0.6732, "step": 101425 }, { "epoch": 1.12, "learning_rate": 3.128155726813713e-05, "loss": 0.6824, "step": 101430 }, { "epoch": 1.12, "learning_rate": 3.1280634540998615e-05, "loss": 0.6679, "step": 101435 }, { "epoch": 1.12, "learning_rate": 3.12797118138601e-05, "loss": 0.6861, "step": 101440 }, { "epoch": 1.12, "learning_rate": 3.127878908672159e-05, "loss": 0.7091, "step": 101445 }, { "epoch": 1.12, "learning_rate": 3.127786635958307e-05, "loss": 0.6718, "step": 101450 }, { "epoch": 1.12, "learning_rate": 3.1276943632444566e-05, "loss": 0.7217, "step": 101455 }, { "epoch": 1.12, "learning_rate": 3.1276020905306054e-05, "loss": 0.6464, "step": 101460 }, { "epoch": 1.12, "learning_rate": 3.127509817816754e-05, "loss": 0.7022, "step": 101465 }, { "epoch": 1.12, "learning_rate": 3.127417545102902e-05, "loss": 0.6798, "step": 101470 }, { "epoch": 1.12, "learning_rate": 3.127325272389052e-05, "loss": 0.6781, "step": 101475 }, { "epoch": 1.12, "learning_rate": 3.1272329996752006e-05, "loss": 0.6556, "step": 101480 }, { "epoch": 1.12, "learning_rate": 3.127140726961349e-05, "loss": 0.705, "step": 101485 }, { "epoch": 1.12, "learning_rate": 3.1270484542474974e-05, "loss": 0.7118, "step": 101490 }, { "epoch": 1.12, "learning_rate": 3.126956181533646e-05, "loss": 0.6884, "step": 101495 }, { "epoch": 1.12, "learning_rate": 3.126863908819796e-05, "loss": 0.6368, "step": 101500 }, { "epoch": 1.12, "learning_rate": 3.126771636105944e-05, "loss": 0.6095, "step": 101505 }, { "epoch": 1.12, "learning_rate": 3.1266793633920926e-05, "loss": 0.6644, "step": 101510 }, { "epoch": 1.12, "learning_rate": 3.1265870906782414e-05, "loss": 0.7041, "step": 101515 }, { "epoch": 1.12, "learning_rate": 3.12649481796439e-05, "loss": 0.6641, "step": 101520 }, { "epoch": 1.12, "learning_rate": 3.126402545250539e-05, "loss": 0.6506, "step": 101525 }, { "epoch": 1.12, "learning_rate": 3.126310272536688e-05, "loss": 0.6706, "step": 101530 }, { "epoch": 1.12, "learning_rate": 3.1262179998228365e-05, "loss": 0.731, "step": 101535 }, { "epoch": 1.12, "learning_rate": 3.126125727108985e-05, "loss": 0.6311, "step": 101540 }, { "epoch": 1.12, "learning_rate": 3.126033454395134e-05, "loss": 0.7239, "step": 101545 }, { "epoch": 1.12, "learning_rate": 3.125941181681283e-05, "loss": 0.69, "step": 101550 }, { "epoch": 1.12, "learning_rate": 3.1258489089674317e-05, "loss": 0.6883, "step": 101555 }, { "epoch": 1.12, "learning_rate": 3.12575663625358e-05, "loss": 0.6797, "step": 101560 }, { "epoch": 1.12, "learning_rate": 3.125664363539729e-05, "loss": 0.7524, "step": 101565 }, { "epoch": 1.12, "learning_rate": 3.125572090825878e-05, "loss": 0.6532, "step": 101570 }, { "epoch": 1.12, "learning_rate": 3.125479818112027e-05, "loss": 0.6703, "step": 101575 }, { "epoch": 1.12, "learning_rate": 3.125387545398175e-05, "loss": 0.7218, "step": 101580 }, { "epoch": 1.12, "learning_rate": 3.1252952726843244e-05, "loss": 0.6939, "step": 101585 }, { "epoch": 1.12, "learning_rate": 3.125202999970473e-05, "loss": 0.6275, "step": 101590 }, { "epoch": 1.12, "learning_rate": 3.125110727256621e-05, "loss": 0.6969, "step": 101595 }, { "epoch": 1.12, "learning_rate": 3.12501845454277e-05, "loss": 0.6289, "step": 101600 }, { "epoch": 1.13, "learning_rate": 3.1249261818289195e-05, "loss": 0.6945, "step": 101605 }, { "epoch": 1.13, "learning_rate": 3.124833909115068e-05, "loss": 0.7187, "step": 101610 }, { "epoch": 1.13, "learning_rate": 3.1247416364012164e-05, "loss": 0.7443, "step": 101615 }, { "epoch": 1.13, "learning_rate": 3.124649363687365e-05, "loss": 0.6521, "step": 101620 }, { "epoch": 1.13, "learning_rate": 3.1245570909735146e-05, "loss": 0.7158, "step": 101625 }, { "epoch": 1.13, "learning_rate": 3.124464818259663e-05, "loss": 0.6424, "step": 101630 }, { "epoch": 1.13, "learning_rate": 3.1243725455458115e-05, "loss": 0.6954, "step": 101635 }, { "epoch": 1.13, "learning_rate": 3.12428027283196e-05, "loss": 0.6634, "step": 101640 }, { "epoch": 1.13, "learning_rate": 3.124188000118109e-05, "loss": 0.6321, "step": 101645 }, { "epoch": 1.13, "learning_rate": 3.124095727404258e-05, "loss": 0.6528, "step": 101650 }, { "epoch": 1.13, "learning_rate": 3.124003454690407e-05, "loss": 0.6879, "step": 101655 }, { "epoch": 1.13, "learning_rate": 3.1239111819765555e-05, "loss": 0.692, "step": 101660 }, { "epoch": 1.13, "learning_rate": 3.123818909262704e-05, "loss": 0.7007, "step": 101665 }, { "epoch": 1.13, "learning_rate": 3.123726636548853e-05, "loss": 0.7094, "step": 101670 }, { "epoch": 1.13, "learning_rate": 3.123634363835002e-05, "loss": 0.6637, "step": 101675 }, { "epoch": 1.13, "learning_rate": 3.1235420911211506e-05, "loss": 0.7011, "step": 101680 }, { "epoch": 1.13, "learning_rate": 3.1234498184072994e-05, "loss": 0.6496, "step": 101685 }, { "epoch": 1.13, "learning_rate": 3.123357545693448e-05, "loss": 0.6716, "step": 101690 }, { "epoch": 1.13, "learning_rate": 3.123265272979597e-05, "loss": 0.6222, "step": 101695 }, { "epoch": 1.13, "learning_rate": 3.123173000265746e-05, "loss": 0.6488, "step": 101700 }, { "epoch": 1.13, "learning_rate": 3.1230807275518945e-05, "loss": 0.6498, "step": 101705 }, { "epoch": 1.13, "learning_rate": 3.1229884548380426e-05, "loss": 0.6089, "step": 101710 }, { "epoch": 1.13, "learning_rate": 3.122896182124192e-05, "loss": 0.65, "step": 101715 }, { "epoch": 1.13, "learning_rate": 3.122803909410341e-05, "loss": 0.6887, "step": 101720 }, { "epoch": 1.13, "learning_rate": 3.122711636696489e-05, "loss": 0.6459, "step": 101725 }, { "epoch": 1.13, "learning_rate": 3.122619363982638e-05, "loss": 0.6928, "step": 101730 }, { "epoch": 1.13, "learning_rate": 3.122527091268787e-05, "loss": 0.6751, "step": 101735 }, { "epoch": 1.13, "learning_rate": 3.122434818554936e-05, "loss": 0.6197, "step": 101740 }, { "epoch": 1.13, "learning_rate": 3.122342545841084e-05, "loss": 0.6824, "step": 101745 }, { "epoch": 1.13, "learning_rate": 3.122250273127233e-05, "loss": 0.6942, "step": 101750 }, { "epoch": 1.13, "learning_rate": 3.1221580004133824e-05, "loss": 0.6294, "step": 101755 }, { "epoch": 1.13, "learning_rate": 3.1220657276995305e-05, "loss": 0.6436, "step": 101760 }, { "epoch": 1.13, "learning_rate": 3.121973454985679e-05, "loss": 0.6836, "step": 101765 }, { "epoch": 1.13, "learning_rate": 3.121881182271828e-05, "loss": 0.6783, "step": 101770 }, { "epoch": 1.13, "learning_rate": 3.1217889095579775e-05, "loss": 0.6596, "step": 101775 }, { "epoch": 1.13, "learning_rate": 3.1216966368441256e-05, "loss": 0.6942, "step": 101780 }, { "epoch": 1.13, "learning_rate": 3.1216043641302744e-05, "loss": 0.6481, "step": 101785 }, { "epoch": 1.13, "learning_rate": 3.121512091416423e-05, "loss": 0.674, "step": 101790 }, { "epoch": 1.13, "learning_rate": 3.121419818702572e-05, "loss": 0.6727, "step": 101795 }, { "epoch": 1.13, "learning_rate": 3.121327545988721e-05, "loss": 0.6242, "step": 101800 }, { "epoch": 1.13, "learning_rate": 3.1212352732748695e-05, "loss": 0.6821, "step": 101805 }, { "epoch": 1.13, "learning_rate": 3.121143000561018e-05, "loss": 0.6861, "step": 101810 }, { "epoch": 1.13, "learning_rate": 3.121050727847167e-05, "loss": 0.6461, "step": 101815 }, { "epoch": 1.13, "learning_rate": 3.120958455133316e-05, "loss": 0.6883, "step": 101820 }, { "epoch": 1.13, "learning_rate": 3.120866182419465e-05, "loss": 0.6345, "step": 101825 }, { "epoch": 1.13, "learning_rate": 3.1207739097056135e-05, "loss": 0.6657, "step": 101830 }, { "epoch": 1.13, "learning_rate": 3.1206816369917616e-05, "loss": 0.6378, "step": 101835 }, { "epoch": 1.13, "learning_rate": 3.120589364277911e-05, "loss": 0.6522, "step": 101840 }, { "epoch": 1.13, "learning_rate": 3.12049709156406e-05, "loss": 0.6758, "step": 101845 }, { "epoch": 1.13, "learning_rate": 3.1204048188502086e-05, "loss": 0.6353, "step": 101850 }, { "epoch": 1.13, "learning_rate": 3.120312546136357e-05, "loss": 0.7338, "step": 101855 }, { "epoch": 1.13, "learning_rate": 3.1202202734225055e-05, "loss": 0.6895, "step": 101860 }, { "epoch": 1.13, "learning_rate": 3.120128000708655e-05, "loss": 0.6667, "step": 101865 }, { "epoch": 1.13, "learning_rate": 3.120035727994803e-05, "loss": 0.6479, "step": 101870 }, { "epoch": 1.13, "learning_rate": 3.119943455280952e-05, "loss": 0.6476, "step": 101875 }, { "epoch": 1.13, "learning_rate": 3.1198511825671006e-05, "loss": 0.6541, "step": 101880 }, { "epoch": 1.13, "learning_rate": 3.11975890985325e-05, "loss": 0.6824, "step": 101885 }, { "epoch": 1.13, "learning_rate": 3.119666637139398e-05, "loss": 0.6798, "step": 101890 }, { "epoch": 1.13, "learning_rate": 3.119574364425547e-05, "loss": 0.701, "step": 101895 }, { "epoch": 1.13, "learning_rate": 3.119482091711696e-05, "loss": 0.7188, "step": 101900 }, { "epoch": 1.13, "learning_rate": 3.1193898189978446e-05, "loss": 0.6348, "step": 101905 }, { "epoch": 1.13, "learning_rate": 3.1192975462839933e-05, "loss": 0.6666, "step": 101910 }, { "epoch": 1.13, "learning_rate": 3.119205273570142e-05, "loss": 0.6572, "step": 101915 }, { "epoch": 1.13, "learning_rate": 3.119113000856291e-05, "loss": 0.67, "step": 101920 }, { "epoch": 1.13, "learning_rate": 3.11902072814244e-05, "loss": 0.6213, "step": 101925 }, { "epoch": 1.13, "learning_rate": 3.1189284554285885e-05, "loss": 0.6649, "step": 101930 }, { "epoch": 1.13, "learning_rate": 3.118836182714737e-05, "loss": 0.6696, "step": 101935 }, { "epoch": 1.13, "learning_rate": 3.118743910000886e-05, "loss": 0.669, "step": 101940 }, { "epoch": 1.13, "learning_rate": 3.118651637287034e-05, "loss": 0.6308, "step": 101945 }, { "epoch": 1.13, "learning_rate": 3.1185593645731836e-05, "loss": 0.6553, "step": 101950 }, { "epoch": 1.13, "learning_rate": 3.1184670918593324e-05, "loss": 0.6888, "step": 101955 }, { "epoch": 1.13, "learning_rate": 3.118374819145481e-05, "loss": 0.6838, "step": 101960 }, { "epoch": 1.13, "learning_rate": 3.118282546431629e-05, "loss": 0.6629, "step": 101965 }, { "epoch": 1.13, "learning_rate": 3.118190273717779e-05, "loss": 0.7477, "step": 101970 }, { "epoch": 1.13, "learning_rate": 3.1180980010039275e-05, "loss": 0.6845, "step": 101975 }, { "epoch": 1.13, "learning_rate": 3.1180057282900757e-05, "loss": 0.6361, "step": 101980 }, { "epoch": 1.13, "learning_rate": 3.1179134555762244e-05, "loss": 0.7018, "step": 101985 }, { "epoch": 1.13, "learning_rate": 3.117821182862374e-05, "loss": 0.6733, "step": 101990 }, { "epoch": 1.13, "learning_rate": 3.117728910148523e-05, "loss": 0.707, "step": 101995 }, { "epoch": 1.13, "learning_rate": 3.117636637434671e-05, "loss": 0.6765, "step": 102000 }, { "epoch": 1.13, "eval_loss": 0.6472858786582947, "eval_runtime": 69.8816, "eval_samples_per_second": 28.62, "eval_steps_per_second": 14.31, "step": 102000 }, { "epoch": 1.13, "learning_rate": 3.1175443647208196e-05, "loss": 0.6433, "step": 102005 }, { "epoch": 1.13, "learning_rate": 3.117452092006969e-05, "loss": 0.6942, "step": 102010 }, { "epoch": 1.13, "learning_rate": 3.117359819293118e-05, "loss": 0.6639, "step": 102015 }, { "epoch": 1.13, "learning_rate": 3.117267546579266e-05, "loss": 0.6179, "step": 102020 }, { "epoch": 1.13, "learning_rate": 3.117175273865415e-05, "loss": 0.6259, "step": 102025 }, { "epoch": 1.13, "learning_rate": 3.1170830011515635e-05, "loss": 0.647, "step": 102030 }, { "epoch": 1.13, "learning_rate": 3.116990728437712e-05, "loss": 0.7007, "step": 102035 }, { "epoch": 1.13, "learning_rate": 3.116898455723861e-05, "loss": 0.6929, "step": 102040 }, { "epoch": 1.13, "learning_rate": 3.11680618301001e-05, "loss": 0.675, "step": 102045 }, { "epoch": 1.13, "learning_rate": 3.1167139102961586e-05, "loss": 0.6483, "step": 102050 }, { "epoch": 1.13, "learning_rate": 3.1166216375823074e-05, "loss": 0.7092, "step": 102055 }, { "epoch": 1.13, "learning_rate": 3.116529364868456e-05, "loss": 0.7073, "step": 102060 }, { "epoch": 1.13, "learning_rate": 3.116437092154605e-05, "loss": 0.6507, "step": 102065 }, { "epoch": 1.13, "learning_rate": 3.116344819440754e-05, "loss": 0.69, "step": 102070 }, { "epoch": 1.13, "learning_rate": 3.1162525467269026e-05, "loss": 0.6033, "step": 102075 }, { "epoch": 1.13, "learning_rate": 3.1161602740130514e-05, "loss": 0.6826, "step": 102080 }, { "epoch": 1.13, "learning_rate": 3.1160680012992e-05, "loss": 0.6817, "step": 102085 }, { "epoch": 1.13, "learning_rate": 3.115975728585349e-05, "loss": 0.6458, "step": 102090 }, { "epoch": 1.13, "learning_rate": 3.115883455871497e-05, "loss": 0.6429, "step": 102095 }, { "epoch": 1.13, "learning_rate": 3.1157911831576465e-05, "loss": 0.6869, "step": 102100 }, { "epoch": 1.13, "learning_rate": 3.115698910443795e-05, "loss": 0.7036, "step": 102105 }, { "epoch": 1.13, "learning_rate": 3.1156066377299434e-05, "loss": 0.6543, "step": 102110 }, { "epoch": 1.13, "learning_rate": 3.115514365016092e-05, "loss": 0.6691, "step": 102115 }, { "epoch": 1.13, "learning_rate": 3.1154220923022416e-05, "loss": 0.7363, "step": 102120 }, { "epoch": 1.13, "learning_rate": 3.1153298195883904e-05, "loss": 0.6571, "step": 102125 }, { "epoch": 1.13, "learning_rate": 3.1152375468745385e-05, "loss": 0.6585, "step": 102130 }, { "epoch": 1.13, "learning_rate": 3.115145274160687e-05, "loss": 0.6897, "step": 102135 }, { "epoch": 1.13, "learning_rate": 3.115053001446837e-05, "loss": 0.6883, "step": 102140 }, { "epoch": 1.13, "learning_rate": 3.114960728732985e-05, "loss": 0.6571, "step": 102145 }, { "epoch": 1.13, "learning_rate": 3.114868456019134e-05, "loss": 0.768, "step": 102150 }, { "epoch": 1.13, "learning_rate": 3.1147761833052824e-05, "loss": 0.6882, "step": 102155 }, { "epoch": 1.13, "learning_rate": 3.114683910591432e-05, "loss": 0.6707, "step": 102160 }, { "epoch": 1.13, "learning_rate": 3.11459163787758e-05, "loss": 0.6471, "step": 102165 }, { "epoch": 1.13, "learning_rate": 3.114499365163729e-05, "loss": 0.6188, "step": 102170 }, { "epoch": 1.13, "learning_rate": 3.1144070924498776e-05, "loss": 0.6495, "step": 102175 }, { "epoch": 1.13, "learning_rate": 3.1143148197360264e-05, "loss": 0.6406, "step": 102180 }, { "epoch": 1.13, "learning_rate": 3.114222547022175e-05, "loss": 0.63, "step": 102185 }, { "epoch": 1.13, "learning_rate": 3.114130274308324e-05, "loss": 0.6532, "step": 102190 }, { "epoch": 1.13, "learning_rate": 3.114038001594473e-05, "loss": 0.6434, "step": 102195 }, { "epoch": 1.13, "learning_rate": 3.1139457288806215e-05, "loss": 0.6426, "step": 102200 }, { "epoch": 1.13, "learning_rate": 3.11385345616677e-05, "loss": 0.6924, "step": 102205 }, { "epoch": 1.13, "learning_rate": 3.113761183452919e-05, "loss": 0.67, "step": 102210 }, { "epoch": 1.13, "learning_rate": 3.113668910739068e-05, "loss": 0.6511, "step": 102215 }, { "epoch": 1.13, "learning_rate": 3.113576638025216e-05, "loss": 0.6632, "step": 102220 }, { "epoch": 1.13, "learning_rate": 3.1134843653113654e-05, "loss": 0.6747, "step": 102225 }, { "epoch": 1.13, "learning_rate": 3.113392092597514e-05, "loss": 0.6901, "step": 102230 }, { "epoch": 1.13, "learning_rate": 3.113299819883663e-05, "loss": 0.6316, "step": 102235 }, { "epoch": 1.13, "learning_rate": 3.113207547169811e-05, "loss": 0.6419, "step": 102240 }, { "epoch": 1.13, "learning_rate": 3.11311527445596e-05, "loss": 0.6664, "step": 102245 }, { "epoch": 1.13, "learning_rate": 3.1130230017421094e-05, "loss": 0.6881, "step": 102250 }, { "epoch": 1.13, "learning_rate": 3.1129307290282575e-05, "loss": 0.6618, "step": 102255 }, { "epoch": 1.13, "learning_rate": 3.112838456314406e-05, "loss": 0.6568, "step": 102260 }, { "epoch": 1.13, "learning_rate": 3.112746183600555e-05, "loss": 0.6009, "step": 102265 }, { "epoch": 1.13, "learning_rate": 3.1126539108867045e-05, "loss": 0.6742, "step": 102270 }, { "epoch": 1.13, "learning_rate": 3.1125616381728526e-05, "loss": 0.6708, "step": 102275 }, { "epoch": 1.13, "learning_rate": 3.1124693654590014e-05, "loss": 0.6469, "step": 102280 }, { "epoch": 1.13, "learning_rate": 3.11237709274515e-05, "loss": 0.6676, "step": 102285 }, { "epoch": 1.13, "learning_rate": 3.112284820031299e-05, "loss": 0.6209, "step": 102290 }, { "epoch": 1.13, "learning_rate": 3.112192547317448e-05, "loss": 0.6549, "step": 102295 }, { "epoch": 1.13, "learning_rate": 3.1121002746035965e-05, "loss": 0.7133, "step": 102300 }, { "epoch": 1.13, "learning_rate": 3.112008001889745e-05, "loss": 0.6215, "step": 102305 }, { "epoch": 1.13, "learning_rate": 3.111915729175894e-05, "loss": 0.695, "step": 102310 }, { "epoch": 1.13, "learning_rate": 3.111823456462043e-05, "loss": 0.6738, "step": 102315 }, { "epoch": 1.13, "learning_rate": 3.111731183748192e-05, "loss": 0.6715, "step": 102320 }, { "epoch": 1.13, "learning_rate": 3.1116389110343405e-05, "loss": 0.6979, "step": 102325 }, { "epoch": 1.13, "learning_rate": 3.1115466383204886e-05, "loss": 0.6915, "step": 102330 }, { "epoch": 1.13, "learning_rate": 3.111454365606638e-05, "loss": 0.6774, "step": 102335 }, { "epoch": 1.13, "learning_rate": 3.111362092892787e-05, "loss": 0.709, "step": 102340 }, { "epoch": 1.13, "learning_rate": 3.1112698201789356e-05, "loss": 0.7032, "step": 102345 }, { "epoch": 1.13, "learning_rate": 3.111177547465084e-05, "loss": 0.7112, "step": 102350 }, { "epoch": 1.13, "learning_rate": 3.111085274751233e-05, "loss": 0.6104, "step": 102355 }, { "epoch": 1.13, "learning_rate": 3.110993002037382e-05, "loss": 0.6584, "step": 102360 }, { "epoch": 1.13, "learning_rate": 3.11090072932353e-05, "loss": 0.6978, "step": 102365 }, { "epoch": 1.13, "learning_rate": 3.110808456609679e-05, "loss": 0.7187, "step": 102370 }, { "epoch": 1.13, "learning_rate": 3.110716183895828e-05, "loss": 0.6315, "step": 102375 }, { "epoch": 1.13, "learning_rate": 3.110623911181977e-05, "loss": 0.6303, "step": 102380 }, { "epoch": 1.13, "learning_rate": 3.110531638468125e-05, "loss": 0.6558, "step": 102385 }, { "epoch": 1.13, "learning_rate": 3.110439365754274e-05, "loss": 0.7418, "step": 102390 }, { "epoch": 1.13, "learning_rate": 3.110347093040423e-05, "loss": 0.6577, "step": 102395 }, { "epoch": 1.13, "learning_rate": 3.110254820326572e-05, "loss": 0.6905, "step": 102400 }, { "epoch": 1.13, "learning_rate": 3.11016254761272e-05, "loss": 0.6699, "step": 102405 }, { "epoch": 1.13, "learning_rate": 3.110070274898869e-05, "loss": 0.6381, "step": 102410 }, { "epoch": 1.13, "learning_rate": 3.109978002185018e-05, "loss": 0.6561, "step": 102415 }, { "epoch": 1.13, "learning_rate": 3.109885729471167e-05, "loss": 0.587, "step": 102420 }, { "epoch": 1.13, "learning_rate": 3.1097934567573155e-05, "loss": 0.6829, "step": 102425 }, { "epoch": 1.13, "learning_rate": 3.109701184043464e-05, "loss": 0.6646, "step": 102430 }, { "epoch": 1.13, "learning_rate": 3.109608911329613e-05, "loss": 0.678, "step": 102435 }, { "epoch": 1.13, "learning_rate": 3.109516638615762e-05, "loss": 0.6681, "step": 102440 }, { "epoch": 1.13, "learning_rate": 3.1094243659019106e-05, "loss": 0.6642, "step": 102445 }, { "epoch": 1.13, "learning_rate": 3.1093320931880594e-05, "loss": 0.7229, "step": 102450 }, { "epoch": 1.13, "learning_rate": 3.109239820474208e-05, "loss": 0.7074, "step": 102455 }, { "epoch": 1.13, "learning_rate": 3.109147547760357e-05, "loss": 0.6337, "step": 102460 }, { "epoch": 1.13, "learning_rate": 3.109055275046506e-05, "loss": 0.6957, "step": 102465 }, { "epoch": 1.13, "learning_rate": 3.1089630023326545e-05, "loss": 0.6712, "step": 102470 }, { "epoch": 1.13, "learning_rate": 3.108870729618803e-05, "loss": 0.7426, "step": 102475 }, { "epoch": 1.13, "learning_rate": 3.1087784569049514e-05, "loss": 0.6594, "step": 102480 }, { "epoch": 1.13, "learning_rate": 3.108686184191101e-05, "loss": 0.6834, "step": 102485 }, { "epoch": 1.13, "learning_rate": 3.10859391147725e-05, "loss": 0.7337, "step": 102490 }, { "epoch": 1.13, "learning_rate": 3.108501638763398e-05, "loss": 0.6879, "step": 102495 }, { "epoch": 1.13, "learning_rate": 3.1084093660495466e-05, "loss": 0.7036, "step": 102500 }, { "epoch": 1.14, "learning_rate": 3.108317093335696e-05, "loss": 0.681, "step": 102505 }, { "epoch": 1.14, "learning_rate": 3.108224820621845e-05, "loss": 0.6807, "step": 102510 }, { "epoch": 1.14, "learning_rate": 3.108132547907993e-05, "loss": 0.6776, "step": 102515 }, { "epoch": 1.14, "learning_rate": 3.108040275194142e-05, "loss": 0.6254, "step": 102520 }, { "epoch": 1.14, "learning_rate": 3.107948002480291e-05, "loss": 0.6387, "step": 102525 }, { "epoch": 1.14, "learning_rate": 3.107855729766439e-05, "loss": 0.6684, "step": 102530 }, { "epoch": 1.14, "learning_rate": 3.107763457052588e-05, "loss": 0.595, "step": 102535 }, { "epoch": 1.14, "learning_rate": 3.107671184338737e-05, "loss": 0.6433, "step": 102540 }, { "epoch": 1.14, "learning_rate": 3.1075789116248856e-05, "loss": 0.7124, "step": 102545 }, { "epoch": 1.14, "learning_rate": 3.1074866389110344e-05, "loss": 0.7405, "step": 102550 }, { "epoch": 1.14, "learning_rate": 3.107394366197183e-05, "loss": 0.6373, "step": 102555 }, { "epoch": 1.14, "learning_rate": 3.107302093483332e-05, "loss": 0.6523, "step": 102560 }, { "epoch": 1.14, "learning_rate": 3.107209820769481e-05, "loss": 0.6955, "step": 102565 }, { "epoch": 1.14, "learning_rate": 3.1071175480556296e-05, "loss": 0.6627, "step": 102570 }, { "epoch": 1.14, "learning_rate": 3.1070252753417783e-05, "loss": 0.6353, "step": 102575 }, { "epoch": 1.14, "learning_rate": 3.106933002627927e-05, "loss": 0.6698, "step": 102580 }, { "epoch": 1.14, "learning_rate": 3.106840729914076e-05, "loss": 0.7092, "step": 102585 }, { "epoch": 1.14, "learning_rate": 3.106748457200225e-05, "loss": 0.6532, "step": 102590 }, { "epoch": 1.14, "learning_rate": 3.1066561844863735e-05, "loss": 0.6589, "step": 102595 }, { "epoch": 1.14, "learning_rate": 3.106563911772522e-05, "loss": 0.6885, "step": 102600 }, { "epoch": 1.14, "learning_rate": 3.1064716390586704e-05, "loss": 0.6727, "step": 102605 }, { "epoch": 1.14, "learning_rate": 3.10637936634482e-05, "loss": 0.6087, "step": 102610 }, { "epoch": 1.14, "learning_rate": 3.1062870936309686e-05, "loss": 0.6953, "step": 102615 }, { "epoch": 1.14, "learning_rate": 3.1061948209171174e-05, "loss": 0.7029, "step": 102620 }, { "epoch": 1.14, "learning_rate": 3.1061025482032655e-05, "loss": 0.6865, "step": 102625 }, { "epoch": 1.14, "learning_rate": 3.106010275489414e-05, "loss": 0.6556, "step": 102630 }, { "epoch": 1.14, "learning_rate": 3.105918002775564e-05, "loss": 0.6457, "step": 102635 }, { "epoch": 1.14, "learning_rate": 3.105825730061712e-05, "loss": 0.6175, "step": 102640 }, { "epoch": 1.14, "learning_rate": 3.1057334573478607e-05, "loss": 0.6551, "step": 102645 }, { "epoch": 1.14, "learning_rate": 3.1056411846340094e-05, "loss": 0.6026, "step": 102650 }, { "epoch": 1.14, "learning_rate": 3.105548911920159e-05, "loss": 0.7274, "step": 102655 }, { "epoch": 1.14, "learning_rate": 3.105456639206307e-05, "loss": 0.6845, "step": 102660 }, { "epoch": 1.14, "learning_rate": 3.105364366492456e-05, "loss": 0.6625, "step": 102665 }, { "epoch": 1.14, "learning_rate": 3.1052720937786046e-05, "loss": 0.684, "step": 102670 }, { "epoch": 1.14, "learning_rate": 3.1051798210647534e-05, "loss": 0.5991, "step": 102675 }, { "epoch": 1.14, "learning_rate": 3.105087548350902e-05, "loss": 0.6649, "step": 102680 }, { "epoch": 1.14, "learning_rate": 3.104995275637051e-05, "loss": 0.6624, "step": 102685 }, { "epoch": 1.14, "learning_rate": 3.1049030029232e-05, "loss": 0.6175, "step": 102690 }, { "epoch": 1.14, "learning_rate": 3.1048107302093485e-05, "loss": 0.6763, "step": 102695 }, { "epoch": 1.14, "learning_rate": 3.104718457495497e-05, "loss": 0.7123, "step": 102700 }, { "epoch": 1.14, "learning_rate": 3.104626184781646e-05, "loss": 0.6564, "step": 102705 }, { "epoch": 1.14, "learning_rate": 3.104533912067795e-05, "loss": 0.6486, "step": 102710 }, { "epoch": 1.14, "learning_rate": 3.104441639353943e-05, "loss": 0.6771, "step": 102715 }, { "epoch": 1.14, "learning_rate": 3.1043493666400924e-05, "loss": 0.6475, "step": 102720 }, { "epoch": 1.14, "learning_rate": 3.104257093926241e-05, "loss": 0.6682, "step": 102725 }, { "epoch": 1.14, "learning_rate": 3.10416482121239e-05, "loss": 0.5898, "step": 102730 }, { "epoch": 1.14, "learning_rate": 3.104072548498538e-05, "loss": 0.6338, "step": 102735 }, { "epoch": 1.14, "learning_rate": 3.1039802757846876e-05, "loss": 0.6666, "step": 102740 }, { "epoch": 1.14, "learning_rate": 3.1038880030708364e-05, "loss": 0.6942, "step": 102745 }, { "epoch": 1.14, "learning_rate": 3.1037957303569845e-05, "loss": 0.7113, "step": 102750 }, { "epoch": 1.14, "learning_rate": 3.103703457643133e-05, "loss": 0.7136, "step": 102755 }, { "epoch": 1.14, "learning_rate": 3.103611184929283e-05, "loss": 0.6847, "step": 102760 }, { "epoch": 1.14, "learning_rate": 3.1035189122154315e-05, "loss": 0.6767, "step": 102765 }, { "epoch": 1.14, "learning_rate": 3.1034266395015796e-05, "loss": 0.6302, "step": 102770 }, { "epoch": 1.14, "learning_rate": 3.1033343667877284e-05, "loss": 0.7562, "step": 102775 }, { "epoch": 1.14, "learning_rate": 3.103242094073877e-05, "loss": 0.6814, "step": 102780 }, { "epoch": 1.14, "learning_rate": 3.1031498213600266e-05, "loss": 0.6398, "step": 102785 }, { "epoch": 1.14, "learning_rate": 3.103057548646175e-05, "loss": 0.6802, "step": 102790 }, { "epoch": 1.14, "learning_rate": 3.1029652759323235e-05, "loss": 0.6234, "step": 102795 }, { "epoch": 1.14, "learning_rate": 3.102873003218472e-05, "loss": 0.6222, "step": 102800 }, { "epoch": 1.14, "learning_rate": 3.102780730504621e-05, "loss": 0.5839, "step": 102805 }, { "epoch": 1.14, "learning_rate": 3.10268845779077e-05, "loss": 0.7025, "step": 102810 }, { "epoch": 1.14, "learning_rate": 3.102596185076919e-05, "loss": 0.682, "step": 102815 }, { "epoch": 1.14, "learning_rate": 3.1025039123630674e-05, "loss": 0.6379, "step": 102820 }, { "epoch": 1.14, "learning_rate": 3.102411639649216e-05, "loss": 0.7007, "step": 102825 }, { "epoch": 1.14, "learning_rate": 3.102319366935365e-05, "loss": 0.6501, "step": 102830 }, { "epoch": 1.14, "learning_rate": 3.102227094221514e-05, "loss": 0.7271, "step": 102835 }, { "epoch": 1.14, "learning_rate": 3.1021348215076626e-05, "loss": 0.7236, "step": 102840 }, { "epoch": 1.14, "learning_rate": 3.1020425487938114e-05, "loss": 0.6486, "step": 102845 }, { "epoch": 1.14, "learning_rate": 3.10195027607996e-05, "loss": 0.6977, "step": 102850 }, { "epoch": 1.14, "learning_rate": 3.101858003366109e-05, "loss": 0.6735, "step": 102855 }, { "epoch": 1.14, "learning_rate": 3.101765730652258e-05, "loss": 0.6555, "step": 102860 }, { "epoch": 1.14, "learning_rate": 3.101673457938406e-05, "loss": 0.6889, "step": 102865 }, { "epoch": 1.14, "learning_rate": 3.101581185224555e-05, "loss": 0.6426, "step": 102870 }, { "epoch": 1.14, "learning_rate": 3.101488912510704e-05, "loss": 0.6718, "step": 102875 }, { "epoch": 1.14, "learning_rate": 3.101396639796852e-05, "loss": 0.6921, "step": 102880 }, { "epoch": 1.14, "learning_rate": 3.101304367083001e-05, "loss": 0.6788, "step": 102885 }, { "epoch": 1.14, "learning_rate": 3.1012120943691504e-05, "loss": 0.6608, "step": 102890 }, { "epoch": 1.14, "learning_rate": 3.101119821655299e-05, "loss": 0.6926, "step": 102895 }, { "epoch": 1.14, "learning_rate": 3.101027548941447e-05, "loss": 0.7052, "step": 102900 }, { "epoch": 1.14, "learning_rate": 3.100935276227596e-05, "loss": 0.6737, "step": 102905 }, { "epoch": 1.14, "learning_rate": 3.1008430035137456e-05, "loss": 0.6972, "step": 102910 }, { "epoch": 1.14, "learning_rate": 3.100750730799894e-05, "loss": 0.6409, "step": 102915 }, { "epoch": 1.14, "learning_rate": 3.1006584580860425e-05, "loss": 0.6684, "step": 102920 }, { "epoch": 1.14, "learning_rate": 3.100566185372191e-05, "loss": 0.6977, "step": 102925 }, { "epoch": 1.14, "learning_rate": 3.10047391265834e-05, "loss": 0.6727, "step": 102930 }, { "epoch": 1.14, "learning_rate": 3.100381639944489e-05, "loss": 0.6632, "step": 102935 }, { "epoch": 1.14, "learning_rate": 3.1002893672306376e-05, "loss": 0.6147, "step": 102940 }, { "epoch": 1.14, "learning_rate": 3.1001970945167864e-05, "loss": 0.6397, "step": 102945 }, { "epoch": 1.14, "learning_rate": 3.100104821802935e-05, "loss": 0.6432, "step": 102950 }, { "epoch": 1.14, "learning_rate": 3.100012549089084e-05, "loss": 0.6791, "step": 102955 }, { "epoch": 1.14, "learning_rate": 3.099920276375233e-05, "loss": 0.6297, "step": 102960 }, { "epoch": 1.14, "learning_rate": 3.0998280036613815e-05, "loss": 0.6743, "step": 102965 }, { "epoch": 1.14, "learning_rate": 3.09973573094753e-05, "loss": 0.7383, "step": 102970 }, { "epoch": 1.14, "learning_rate": 3.099643458233679e-05, "loss": 0.6612, "step": 102975 }, { "epoch": 1.14, "learning_rate": 3.099551185519828e-05, "loss": 0.7104, "step": 102980 }, { "epoch": 1.14, "learning_rate": 3.099458912805977e-05, "loss": 0.6538, "step": 102985 }, { "epoch": 1.14, "learning_rate": 3.099366640092125e-05, "loss": 0.6143, "step": 102990 }, { "epoch": 1.14, "learning_rate": 3.099274367378274e-05, "loss": 0.6479, "step": 102995 }, { "epoch": 1.14, "learning_rate": 3.099182094664423e-05, "loss": 0.6703, "step": 103000 }, { "epoch": 1.14, "eval_loss": 0.6308215260505676, "eval_runtime": 70.226, "eval_samples_per_second": 28.479, "eval_steps_per_second": 14.24, "step": 103000 }, { "epoch": 1.14, "learning_rate": 3.099089821950572e-05, "loss": 0.6538, "step": 103005 }, { "epoch": 1.14, "learning_rate": 3.09899754923672e-05, "loss": 0.6653, "step": 103010 }, { "epoch": 1.14, "learning_rate": 3.098905276522869e-05, "loss": 0.6445, "step": 103015 }, { "epoch": 1.14, "learning_rate": 3.098813003809018e-05, "loss": 0.637, "step": 103020 }, { "epoch": 1.14, "learning_rate": 3.098720731095166e-05, "loss": 0.6665, "step": 103025 }, { "epoch": 1.14, "learning_rate": 3.098628458381315e-05, "loss": 0.6335, "step": 103030 }, { "epoch": 1.14, "learning_rate": 3.098536185667464e-05, "loss": 0.7214, "step": 103035 }, { "epoch": 1.14, "learning_rate": 3.098443912953613e-05, "loss": 0.6576, "step": 103040 }, { "epoch": 1.14, "learning_rate": 3.0983516402397614e-05, "loss": 0.6493, "step": 103045 }, { "epoch": 1.14, "learning_rate": 3.09825936752591e-05, "loss": 0.6331, "step": 103050 }, { "epoch": 1.14, "learning_rate": 3.098167094812059e-05, "loss": 0.644, "step": 103055 }, { "epoch": 1.14, "learning_rate": 3.098074822098208e-05, "loss": 0.6532, "step": 103060 }, { "epoch": 1.14, "learning_rate": 3.0979825493843565e-05, "loss": 0.6644, "step": 103065 }, { "epoch": 1.14, "learning_rate": 3.097890276670505e-05, "loss": 0.6704, "step": 103070 }, { "epoch": 1.14, "learning_rate": 3.097798003956654e-05, "loss": 0.7235, "step": 103075 }, { "epoch": 1.14, "learning_rate": 3.097705731242803e-05, "loss": 0.7074, "step": 103080 }, { "epoch": 1.14, "learning_rate": 3.097613458528952e-05, "loss": 0.7059, "step": 103085 }, { "epoch": 1.14, "learning_rate": 3.0975211858151005e-05, "loss": 0.6525, "step": 103090 }, { "epoch": 1.14, "learning_rate": 3.097428913101249e-05, "loss": 0.7376, "step": 103095 }, { "epoch": 1.14, "learning_rate": 3.0973366403873974e-05, "loss": 0.6423, "step": 103100 }, { "epoch": 1.14, "learning_rate": 3.097244367673547e-05, "loss": 0.6644, "step": 103105 }, { "epoch": 1.14, "learning_rate": 3.0971520949596956e-05, "loss": 0.7163, "step": 103110 }, { "epoch": 1.14, "learning_rate": 3.0970598222458444e-05, "loss": 0.6311, "step": 103115 }, { "epoch": 1.14, "learning_rate": 3.0969675495319925e-05, "loss": 0.6807, "step": 103120 }, { "epoch": 1.14, "learning_rate": 3.096875276818142e-05, "loss": 0.6484, "step": 103125 }, { "epoch": 1.14, "learning_rate": 3.096783004104291e-05, "loss": 0.6578, "step": 103130 }, { "epoch": 1.14, "learning_rate": 3.096690731390439e-05, "loss": 0.6505, "step": 103135 }, { "epoch": 1.14, "learning_rate": 3.0965984586765876e-05, "loss": 0.7117, "step": 103140 }, { "epoch": 1.14, "learning_rate": 3.096506185962737e-05, "loss": 0.6621, "step": 103145 }, { "epoch": 1.14, "learning_rate": 3.096413913248886e-05, "loss": 0.7419, "step": 103150 }, { "epoch": 1.14, "learning_rate": 3.096321640535034e-05, "loss": 0.6923, "step": 103155 }, { "epoch": 1.14, "learning_rate": 3.096229367821183e-05, "loss": 0.6135, "step": 103160 }, { "epoch": 1.14, "learning_rate": 3.0961370951073316e-05, "loss": 0.6512, "step": 103165 }, { "epoch": 1.14, "learning_rate": 3.096044822393481e-05, "loss": 0.7204, "step": 103170 }, { "epoch": 1.14, "learning_rate": 3.095952549679629e-05, "loss": 0.6721, "step": 103175 }, { "epoch": 1.14, "learning_rate": 3.095860276965778e-05, "loss": 0.6573, "step": 103180 }, { "epoch": 1.14, "learning_rate": 3.095768004251927e-05, "loss": 0.6421, "step": 103185 }, { "epoch": 1.14, "learning_rate": 3.0956757315380755e-05, "loss": 0.7049, "step": 103190 }, { "epoch": 1.14, "learning_rate": 3.095583458824224e-05, "loss": 0.6434, "step": 103195 }, { "epoch": 1.14, "learning_rate": 3.095491186110373e-05, "loss": 0.7162, "step": 103200 }, { "epoch": 1.14, "learning_rate": 3.095398913396522e-05, "loss": 0.6286, "step": 103205 }, { "epoch": 1.14, "learning_rate": 3.0953066406826706e-05, "loss": 0.7279, "step": 103210 }, { "epoch": 1.14, "learning_rate": 3.0952143679688194e-05, "loss": 0.7185, "step": 103215 }, { "epoch": 1.14, "learning_rate": 3.095122095254968e-05, "loss": 0.6594, "step": 103220 }, { "epoch": 1.14, "learning_rate": 3.095029822541117e-05, "loss": 0.599, "step": 103225 }, { "epoch": 1.14, "learning_rate": 3.094937549827265e-05, "loss": 0.6771, "step": 103230 }, { "epoch": 1.14, "learning_rate": 3.0948452771134146e-05, "loss": 0.654, "step": 103235 }, { "epoch": 1.14, "learning_rate": 3.0947530043995633e-05, "loss": 0.6592, "step": 103240 }, { "epoch": 1.14, "learning_rate": 3.094660731685712e-05, "loss": 0.6482, "step": 103245 }, { "epoch": 1.14, "learning_rate": 3.09456845897186e-05, "loss": 0.6659, "step": 103250 }, { "epoch": 1.14, "learning_rate": 3.09447618625801e-05, "loss": 0.6328, "step": 103255 }, { "epoch": 1.14, "learning_rate": 3.0943839135441585e-05, "loss": 0.6129, "step": 103260 }, { "epoch": 1.14, "learning_rate": 3.0942916408303066e-05, "loss": 0.6834, "step": 103265 }, { "epoch": 1.14, "learning_rate": 3.0941993681164554e-05, "loss": 0.6698, "step": 103270 }, { "epoch": 1.14, "learning_rate": 3.094107095402605e-05, "loss": 0.6497, "step": 103275 }, { "epoch": 1.14, "learning_rate": 3.0940148226887536e-05, "loss": 0.6633, "step": 103280 }, { "epoch": 1.14, "learning_rate": 3.093922549974902e-05, "loss": 0.7183, "step": 103285 }, { "epoch": 1.14, "learning_rate": 3.0938302772610505e-05, "loss": 0.6159, "step": 103290 }, { "epoch": 1.14, "learning_rate": 3.0937380045472e-05, "loss": 0.6983, "step": 103295 }, { "epoch": 1.14, "learning_rate": 3.093645731833348e-05, "loss": 0.6898, "step": 103300 }, { "epoch": 1.14, "learning_rate": 3.093553459119497e-05, "loss": 0.691, "step": 103305 }, { "epoch": 1.14, "learning_rate": 3.0934611864056457e-05, "loss": 0.7192, "step": 103310 }, { "epoch": 1.14, "learning_rate": 3.0933689136917944e-05, "loss": 0.6707, "step": 103315 }, { "epoch": 1.14, "learning_rate": 3.093276640977943e-05, "loss": 0.6609, "step": 103320 }, { "epoch": 1.14, "learning_rate": 3.093184368264092e-05, "loss": 0.7152, "step": 103325 }, { "epoch": 1.14, "learning_rate": 3.093092095550241e-05, "loss": 0.6896, "step": 103330 }, { "epoch": 1.14, "learning_rate": 3.0929998228363896e-05, "loss": 0.6856, "step": 103335 }, { "epoch": 1.14, "learning_rate": 3.0929075501225384e-05, "loss": 0.698, "step": 103340 }, { "epoch": 1.14, "learning_rate": 3.092815277408687e-05, "loss": 0.6824, "step": 103345 }, { "epoch": 1.14, "learning_rate": 3.092723004694836e-05, "loss": 0.6686, "step": 103350 }, { "epoch": 1.14, "learning_rate": 3.092630731980985e-05, "loss": 0.6657, "step": 103355 }, { "epoch": 1.14, "learning_rate": 3.0925384592671335e-05, "loss": 0.7164, "step": 103360 }, { "epoch": 1.14, "learning_rate": 3.092446186553282e-05, "loss": 0.6615, "step": 103365 }, { "epoch": 1.14, "learning_rate": 3.092353913839431e-05, "loss": 0.6513, "step": 103370 }, { "epoch": 1.14, "learning_rate": 3.092261641125579e-05, "loss": 0.7189, "step": 103375 }, { "epoch": 1.14, "learning_rate": 3.092169368411728e-05, "loss": 0.6758, "step": 103380 }, { "epoch": 1.14, "learning_rate": 3.0920770956978774e-05, "loss": 0.6598, "step": 103385 }, { "epoch": 1.14, "learning_rate": 3.091984822984026e-05, "loss": 0.6415, "step": 103390 }, { "epoch": 1.14, "learning_rate": 3.091892550270174e-05, "loss": 0.6335, "step": 103395 }, { "epoch": 1.14, "learning_rate": 3.091800277556323e-05, "loss": 0.6422, "step": 103400 }, { "epoch": 1.14, "learning_rate": 3.0917080048424726e-05, "loss": 0.7443, "step": 103405 }, { "epoch": 1.15, "learning_rate": 3.091615732128621e-05, "loss": 0.6487, "step": 103410 }, { "epoch": 1.15, "learning_rate": 3.0915234594147695e-05, "loss": 0.6705, "step": 103415 }, { "epoch": 1.15, "learning_rate": 3.091431186700918e-05, "loss": 0.6738, "step": 103420 }, { "epoch": 1.15, "learning_rate": 3.091338913987068e-05, "loss": 0.6794, "step": 103425 }, { "epoch": 1.15, "learning_rate": 3.091246641273216e-05, "loss": 0.7049, "step": 103430 }, { "epoch": 1.15, "learning_rate": 3.0911543685593646e-05, "loss": 0.6924, "step": 103435 }, { "epoch": 1.15, "learning_rate": 3.0910620958455134e-05, "loss": 0.6509, "step": 103440 }, { "epoch": 1.15, "learning_rate": 3.090969823131662e-05, "loss": 0.69, "step": 103445 }, { "epoch": 1.15, "learning_rate": 3.090877550417811e-05, "loss": 0.6644, "step": 103450 }, { "epoch": 1.15, "learning_rate": 3.09078527770396e-05, "loss": 0.6203, "step": 103455 }, { "epoch": 1.15, "learning_rate": 3.0906930049901085e-05, "loss": 0.6776, "step": 103460 }, { "epoch": 1.15, "learning_rate": 3.090600732276257e-05, "loss": 0.6696, "step": 103465 }, { "epoch": 1.15, "learning_rate": 3.090508459562406e-05, "loss": 0.6976, "step": 103470 }, { "epoch": 1.15, "learning_rate": 3.090416186848555e-05, "loss": 0.6495, "step": 103475 }, { "epoch": 1.15, "learning_rate": 3.0903239141347037e-05, "loss": 0.6611, "step": 103480 }, { "epoch": 1.15, "learning_rate": 3.090231641420852e-05, "loss": 0.6266, "step": 103485 }, { "epoch": 1.15, "learning_rate": 3.090139368707001e-05, "loss": 0.6252, "step": 103490 }, { "epoch": 1.15, "learning_rate": 3.09004709599315e-05, "loss": 0.6601, "step": 103495 }, { "epoch": 1.15, "learning_rate": 3.089954823279299e-05, "loss": 0.7071, "step": 103500 }, { "epoch": 1.15, "learning_rate": 3.089862550565447e-05, "loss": 0.6403, "step": 103505 }, { "epoch": 1.15, "learning_rate": 3.0897702778515964e-05, "loss": 0.6735, "step": 103510 }, { "epoch": 1.15, "learning_rate": 3.089678005137745e-05, "loss": 0.6699, "step": 103515 }, { "epoch": 1.15, "learning_rate": 3.089585732423893e-05, "loss": 0.658, "step": 103520 }, { "epoch": 1.15, "learning_rate": 3.089493459710042e-05, "loss": 0.6853, "step": 103525 }, { "epoch": 1.15, "learning_rate": 3.089401186996191e-05, "loss": 0.6245, "step": 103530 }, { "epoch": 1.15, "learning_rate": 3.08930891428234e-05, "loss": 0.6695, "step": 103535 }, { "epoch": 1.15, "learning_rate": 3.0892166415684884e-05, "loss": 0.6711, "step": 103540 }, { "epoch": 1.15, "learning_rate": 3.089124368854637e-05, "loss": 0.6642, "step": 103545 }, { "epoch": 1.15, "learning_rate": 3.089032096140786e-05, "loss": 0.6666, "step": 103550 }, { "epoch": 1.15, "learning_rate": 3.0889398234269354e-05, "loss": 0.6657, "step": 103555 }, { "epoch": 1.15, "learning_rate": 3.0888475507130835e-05, "loss": 0.7013, "step": 103560 }, { "epoch": 1.15, "learning_rate": 3.088755277999232e-05, "loss": 0.6811, "step": 103565 }, { "epoch": 1.15, "learning_rate": 3.088663005285381e-05, "loss": 0.6651, "step": 103570 }, { "epoch": 1.15, "learning_rate": 3.08857073257153e-05, "loss": 0.6832, "step": 103575 }, { "epoch": 1.15, "learning_rate": 3.088478459857679e-05, "loss": 0.6432, "step": 103580 }, { "epoch": 1.15, "learning_rate": 3.0883861871438275e-05, "loss": 0.7005, "step": 103585 }, { "epoch": 1.15, "learning_rate": 3.088293914429976e-05, "loss": 0.7171, "step": 103590 }, { "epoch": 1.15, "learning_rate": 3.088201641716125e-05, "loss": 0.6714, "step": 103595 }, { "epoch": 1.15, "learning_rate": 3.088109369002274e-05, "loss": 0.6292, "step": 103600 }, { "epoch": 1.15, "learning_rate": 3.0880170962884226e-05, "loss": 0.7298, "step": 103605 }, { "epoch": 1.15, "learning_rate": 3.0879248235745714e-05, "loss": 0.6659, "step": 103610 }, { "epoch": 1.15, "learning_rate": 3.0878325508607195e-05, "loss": 0.6848, "step": 103615 }, { "epoch": 1.15, "learning_rate": 3.087740278146869e-05, "loss": 0.6699, "step": 103620 }, { "epoch": 1.15, "learning_rate": 3.087648005433018e-05, "loss": 0.6633, "step": 103625 }, { "epoch": 1.15, "learning_rate": 3.0875557327191665e-05, "loss": 0.7116, "step": 103630 }, { "epoch": 1.15, "learning_rate": 3.0874634600053146e-05, "loss": 0.6172, "step": 103635 }, { "epoch": 1.15, "learning_rate": 3.087371187291464e-05, "loss": 0.6758, "step": 103640 }, { "epoch": 1.15, "learning_rate": 3.087278914577613e-05, "loss": 0.6831, "step": 103645 }, { "epoch": 1.15, "learning_rate": 3.087186641863761e-05, "loss": 0.6472, "step": 103650 }, { "epoch": 1.15, "learning_rate": 3.08709436914991e-05, "loss": 0.7054, "step": 103655 }, { "epoch": 1.15, "learning_rate": 3.087002096436059e-05, "loss": 0.6992, "step": 103660 }, { "epoch": 1.15, "learning_rate": 3.086909823722208e-05, "loss": 0.6274, "step": 103665 }, { "epoch": 1.15, "learning_rate": 3.086817551008356e-05, "loss": 0.7067, "step": 103670 }, { "epoch": 1.15, "learning_rate": 3.086725278294505e-05, "loss": 0.6489, "step": 103675 }, { "epoch": 1.15, "learning_rate": 3.0866330055806544e-05, "loss": 0.6331, "step": 103680 }, { "epoch": 1.15, "learning_rate": 3.0865407328668025e-05, "loss": 0.6319, "step": 103685 }, { "epoch": 1.15, "learning_rate": 3.086448460152951e-05, "loss": 0.6813, "step": 103690 }, { "epoch": 1.15, "learning_rate": 3.0863561874391e-05, "loss": 0.667, "step": 103695 }, { "epoch": 1.15, "learning_rate": 3.086263914725249e-05, "loss": 0.6363, "step": 103700 }, { "epoch": 1.15, "learning_rate": 3.0861716420113976e-05, "loss": 0.6744, "step": 103705 }, { "epoch": 1.15, "learning_rate": 3.0860793692975464e-05, "loss": 0.6745, "step": 103710 }, { "epoch": 1.15, "learning_rate": 3.085987096583695e-05, "loss": 0.69, "step": 103715 }, { "epoch": 1.15, "learning_rate": 3.085894823869844e-05, "loss": 0.7119, "step": 103720 }, { "epoch": 1.15, "learning_rate": 3.085802551155993e-05, "loss": 0.6409, "step": 103725 }, { "epoch": 1.15, "learning_rate": 3.0857102784421415e-05, "loss": 0.6929, "step": 103730 }, { "epoch": 1.15, "learning_rate": 3.08561800572829e-05, "loss": 0.7132, "step": 103735 }, { "epoch": 1.15, "learning_rate": 3.085525733014439e-05, "loss": 0.6107, "step": 103740 }, { "epoch": 1.15, "learning_rate": 3.085433460300588e-05, "loss": 0.6333, "step": 103745 }, { "epoch": 1.15, "learning_rate": 3.085341187586737e-05, "loss": 0.7343, "step": 103750 }, { "epoch": 1.15, "learning_rate": 3.0852489148728855e-05, "loss": 0.6999, "step": 103755 }, { "epoch": 1.15, "learning_rate": 3.0851566421590336e-05, "loss": 0.635, "step": 103760 }, { "epoch": 1.15, "learning_rate": 3.0850643694451824e-05, "loss": 0.7079, "step": 103765 }, { "epoch": 1.15, "learning_rate": 3.084972096731332e-05, "loss": 0.6223, "step": 103770 }, { "epoch": 1.15, "learning_rate": 3.0848798240174806e-05, "loss": 0.6819, "step": 103775 }, { "epoch": 1.15, "learning_rate": 3.084787551303629e-05, "loss": 0.6556, "step": 103780 }, { "epoch": 1.15, "learning_rate": 3.0846952785897775e-05, "loss": 0.6303, "step": 103785 }, { "epoch": 1.15, "learning_rate": 3.084603005875927e-05, "loss": 0.6485, "step": 103790 }, { "epoch": 1.15, "learning_rate": 3.084510733162075e-05, "loss": 0.6888, "step": 103795 }, { "epoch": 1.15, "learning_rate": 3.084418460448224e-05, "loss": 0.6969, "step": 103800 }, { "epoch": 1.15, "learning_rate": 3.0843261877343726e-05, "loss": 0.6908, "step": 103805 }, { "epoch": 1.15, "learning_rate": 3.084233915020522e-05, "loss": 0.6681, "step": 103810 }, { "epoch": 1.15, "learning_rate": 3.08414164230667e-05, "loss": 0.6662, "step": 103815 }, { "epoch": 1.15, "learning_rate": 3.084049369592819e-05, "loss": 0.6445, "step": 103820 }, { "epoch": 1.15, "learning_rate": 3.083957096878968e-05, "loss": 0.6451, "step": 103825 }, { "epoch": 1.15, "learning_rate": 3.0838648241651166e-05, "loss": 0.675, "step": 103830 }, { "epoch": 1.15, "learning_rate": 3.0837725514512654e-05, "loss": 0.6873, "step": 103835 }, { "epoch": 1.15, "learning_rate": 3.083680278737414e-05, "loss": 0.6346, "step": 103840 }, { "epoch": 1.15, "learning_rate": 3.083588006023563e-05, "loss": 0.6926, "step": 103845 }, { "epoch": 1.15, "learning_rate": 3.083495733309712e-05, "loss": 0.6635, "step": 103850 }, { "epoch": 1.15, "learning_rate": 3.0834034605958605e-05, "loss": 0.6803, "step": 103855 }, { "epoch": 1.15, "learning_rate": 3.083311187882009e-05, "loss": 0.6408, "step": 103860 }, { "epoch": 1.15, "learning_rate": 3.083218915168158e-05, "loss": 0.6753, "step": 103865 }, { "epoch": 1.15, "learning_rate": 3.083126642454306e-05, "loss": 0.616, "step": 103870 }, { "epoch": 1.15, "learning_rate": 3.0830343697404556e-05, "loss": 0.7131, "step": 103875 }, { "epoch": 1.15, "learning_rate": 3.0829420970266044e-05, "loss": 0.715, "step": 103880 }, { "epoch": 1.15, "learning_rate": 3.082849824312753e-05, "loss": 0.6496, "step": 103885 }, { "epoch": 1.15, "learning_rate": 3.082757551598901e-05, "loss": 0.6673, "step": 103890 }, { "epoch": 1.15, "learning_rate": 3.082665278885051e-05, "loss": 0.6855, "step": 103895 }, { "epoch": 1.15, "learning_rate": 3.0825730061711996e-05, "loss": 0.6991, "step": 103900 }, { "epoch": 1.15, "learning_rate": 3.082480733457348e-05, "loss": 0.6859, "step": 103905 }, { "epoch": 1.15, "learning_rate": 3.0823884607434964e-05, "loss": 0.6458, "step": 103910 }, { "epoch": 1.15, "learning_rate": 3.082296188029645e-05, "loss": 0.6474, "step": 103915 }, { "epoch": 1.15, "learning_rate": 3.082203915315795e-05, "loss": 0.6896, "step": 103920 }, { "epoch": 1.15, "learning_rate": 3.082111642601943e-05, "loss": 0.7124, "step": 103925 }, { "epoch": 1.15, "learning_rate": 3.0820193698880916e-05, "loss": 0.7154, "step": 103930 }, { "epoch": 1.15, "learning_rate": 3.0819270971742404e-05, "loss": 0.6344, "step": 103935 }, { "epoch": 1.15, "learning_rate": 3.08183482446039e-05, "loss": 0.7161, "step": 103940 }, { "epoch": 1.15, "learning_rate": 3.081742551746538e-05, "loss": 0.6296, "step": 103945 }, { "epoch": 1.15, "learning_rate": 3.081650279032687e-05, "loss": 0.6977, "step": 103950 }, { "epoch": 1.15, "learning_rate": 3.0815580063188355e-05, "loss": 0.7205, "step": 103955 }, { "epoch": 1.15, "learning_rate": 3.081465733604984e-05, "loss": 0.6824, "step": 103960 }, { "epoch": 1.15, "learning_rate": 3.081373460891133e-05, "loss": 0.7384, "step": 103965 }, { "epoch": 1.15, "learning_rate": 3.081281188177282e-05, "loss": 0.665, "step": 103970 }, { "epoch": 1.15, "learning_rate": 3.0811889154634307e-05, "loss": 0.6953, "step": 103975 }, { "epoch": 1.15, "learning_rate": 3.0810966427495794e-05, "loss": 0.6807, "step": 103980 }, { "epoch": 1.15, "learning_rate": 3.081004370035728e-05, "loss": 0.6809, "step": 103985 }, { "epoch": 1.15, "learning_rate": 3.080912097321877e-05, "loss": 0.6425, "step": 103990 }, { "epoch": 1.15, "learning_rate": 3.080819824608026e-05, "loss": 0.7186, "step": 103995 }, { "epoch": 1.15, "learning_rate": 3.080727551894174e-05, "loss": 0.6793, "step": 104000 }, { "epoch": 1.15, "eval_loss": 0.6530967950820923, "eval_runtime": 70.1912, "eval_samples_per_second": 28.494, "eval_steps_per_second": 14.247, "step": 104000 }, { "epoch": 1.15, "learning_rate": 3.0806352791803234e-05, "loss": 0.6145, "step": 104005 }, { "epoch": 1.15, "learning_rate": 3.080543006466472e-05, "loss": 0.6926, "step": 104010 }, { "epoch": 1.15, "learning_rate": 3.080450733752621e-05, "loss": 0.657, "step": 104015 }, { "epoch": 1.15, "learning_rate": 3.080358461038769e-05, "loss": 0.6799, "step": 104020 }, { "epoch": 1.15, "learning_rate": 3.0802661883249185e-05, "loss": 0.7214, "step": 104025 }, { "epoch": 1.15, "learning_rate": 3.080173915611067e-05, "loss": 0.6461, "step": 104030 }, { "epoch": 1.15, "learning_rate": 3.0800816428972154e-05, "loss": 0.6996, "step": 104035 }, { "epoch": 1.15, "learning_rate": 3.079989370183364e-05, "loss": 0.6881, "step": 104040 }, { "epoch": 1.15, "learning_rate": 3.0798970974695136e-05, "loss": 0.7186, "step": 104045 }, { "epoch": 1.15, "learning_rate": 3.0798048247556624e-05, "loss": 0.7157, "step": 104050 }, { "epoch": 1.15, "learning_rate": 3.0797125520418105e-05, "loss": 0.7617, "step": 104055 }, { "epoch": 1.15, "learning_rate": 3.079620279327959e-05, "loss": 0.6577, "step": 104060 }, { "epoch": 1.15, "learning_rate": 3.079528006614108e-05, "loss": 0.6688, "step": 104065 }, { "epoch": 1.15, "learning_rate": 3.079435733900257e-05, "loss": 0.6482, "step": 104070 }, { "epoch": 1.15, "learning_rate": 3.079343461186406e-05, "loss": 0.6857, "step": 104075 }, { "epoch": 1.15, "learning_rate": 3.0792511884725545e-05, "loss": 0.6553, "step": 104080 }, { "epoch": 1.15, "learning_rate": 3.079158915758703e-05, "loss": 0.6751, "step": 104085 }, { "epoch": 1.15, "learning_rate": 3.079066643044852e-05, "loss": 0.6475, "step": 104090 }, { "epoch": 1.15, "learning_rate": 3.078974370331001e-05, "loss": 0.6653, "step": 104095 }, { "epoch": 1.15, "learning_rate": 3.0788820976171496e-05, "loss": 0.7009, "step": 104100 }, { "epoch": 1.15, "learning_rate": 3.0787898249032984e-05, "loss": 0.6331, "step": 104105 }, { "epoch": 1.15, "learning_rate": 3.078697552189447e-05, "loss": 0.5824, "step": 104110 }, { "epoch": 1.15, "learning_rate": 3.078605279475596e-05, "loss": 0.7241, "step": 104115 }, { "epoch": 1.15, "learning_rate": 3.078513006761745e-05, "loss": 0.7181, "step": 104120 }, { "epoch": 1.15, "learning_rate": 3.0784207340478935e-05, "loss": 0.7013, "step": 104125 }, { "epoch": 1.15, "learning_rate": 3.078328461334042e-05, "loss": 0.7064, "step": 104130 }, { "epoch": 1.15, "learning_rate": 3.078236188620191e-05, "loss": 0.6598, "step": 104135 }, { "epoch": 1.15, "learning_rate": 3.07814391590634e-05, "loss": 0.6506, "step": 104140 }, { "epoch": 1.15, "learning_rate": 3.078051643192488e-05, "loss": 0.6781, "step": 104145 }, { "epoch": 1.15, "learning_rate": 3.077959370478637e-05, "loss": 0.679, "step": 104150 }, { "epoch": 1.15, "learning_rate": 3.077867097764786e-05, "loss": 0.6629, "step": 104155 }, { "epoch": 1.15, "learning_rate": 3.077774825050935e-05, "loss": 0.6794, "step": 104160 }, { "epoch": 1.15, "learning_rate": 3.077682552337083e-05, "loss": 0.7085, "step": 104165 }, { "epoch": 1.15, "learning_rate": 3.077590279623232e-05, "loss": 0.7092, "step": 104170 }, { "epoch": 1.15, "learning_rate": 3.0774980069093814e-05, "loss": 0.6729, "step": 104175 }, { "epoch": 1.15, "learning_rate": 3.0774057341955295e-05, "loss": 0.6287, "step": 104180 }, { "epoch": 1.15, "learning_rate": 3.077313461481678e-05, "loss": 0.6801, "step": 104185 }, { "epoch": 1.15, "learning_rate": 3.077221188767827e-05, "loss": 0.7129, "step": 104190 }, { "epoch": 1.15, "learning_rate": 3.0771289160539765e-05, "loss": 0.6429, "step": 104195 }, { "epoch": 1.15, "learning_rate": 3.0770366433401246e-05, "loss": 0.6393, "step": 104200 }, { "epoch": 1.15, "learning_rate": 3.0769443706262734e-05, "loss": 0.7147, "step": 104205 }, { "epoch": 1.15, "learning_rate": 3.076852097912422e-05, "loss": 0.6819, "step": 104210 }, { "epoch": 1.15, "learning_rate": 3.076759825198571e-05, "loss": 0.7042, "step": 104215 }, { "epoch": 1.15, "learning_rate": 3.07666755248472e-05, "loss": 0.6149, "step": 104220 }, { "epoch": 1.15, "learning_rate": 3.0765752797708685e-05, "loss": 0.6933, "step": 104225 }, { "epoch": 1.15, "learning_rate": 3.076483007057017e-05, "loss": 0.6217, "step": 104230 }, { "epoch": 1.15, "learning_rate": 3.076390734343166e-05, "loss": 0.6657, "step": 104235 }, { "epoch": 1.15, "learning_rate": 3.076298461629315e-05, "loss": 0.6588, "step": 104240 }, { "epoch": 1.15, "learning_rate": 3.076206188915464e-05, "loss": 0.7029, "step": 104245 }, { "epoch": 1.15, "learning_rate": 3.0761139162016125e-05, "loss": 0.6261, "step": 104250 }, { "epoch": 1.15, "learning_rate": 3.0760216434877606e-05, "loss": 0.6799, "step": 104255 }, { "epoch": 1.15, "learning_rate": 3.07592937077391e-05, "loss": 0.6941, "step": 104260 }, { "epoch": 1.15, "learning_rate": 3.075837098060059e-05, "loss": 0.6516, "step": 104265 }, { "epoch": 1.15, "learning_rate": 3.0757448253462076e-05, "loss": 0.6505, "step": 104270 }, { "epoch": 1.15, "learning_rate": 3.075652552632356e-05, "loss": 0.6267, "step": 104275 }, { "epoch": 1.15, "learning_rate": 3.075560279918505e-05, "loss": 0.7365, "step": 104280 }, { "epoch": 1.15, "learning_rate": 3.075468007204654e-05, "loss": 0.6367, "step": 104285 }, { "epoch": 1.15, "learning_rate": 3.075375734490803e-05, "loss": 0.6555, "step": 104290 }, { "epoch": 1.15, "learning_rate": 3.075283461776951e-05, "loss": 0.7033, "step": 104295 }, { "epoch": 1.15, "learning_rate": 3.0751911890630996e-05, "loss": 0.6096, "step": 104300 }, { "epoch": 1.15, "learning_rate": 3.075098916349249e-05, "loss": 0.6046, "step": 104305 }, { "epoch": 1.15, "learning_rate": 3.075006643635397e-05, "loss": 0.6482, "step": 104310 }, { "epoch": 1.16, "learning_rate": 3.074914370921546e-05, "loss": 0.6588, "step": 104315 }, { "epoch": 1.16, "learning_rate": 3.074822098207695e-05, "loss": 0.6944, "step": 104320 }, { "epoch": 1.16, "learning_rate": 3.074729825493844e-05, "loss": 0.7323, "step": 104325 }, { "epoch": 1.16, "learning_rate": 3.0746375527799923e-05, "loss": 0.6662, "step": 104330 }, { "epoch": 1.16, "learning_rate": 3.074545280066141e-05, "loss": 0.6157, "step": 104335 }, { "epoch": 1.16, "learning_rate": 3.07445300735229e-05, "loss": 0.6707, "step": 104340 }, { "epoch": 1.16, "learning_rate": 3.074360734638439e-05, "loss": 0.6658, "step": 104345 }, { "epoch": 1.16, "learning_rate": 3.0742684619245875e-05, "loss": 0.6495, "step": 104350 }, { "epoch": 1.16, "learning_rate": 3.074176189210736e-05, "loss": 0.6287, "step": 104355 }, { "epoch": 1.16, "learning_rate": 3.074083916496885e-05, "loss": 0.7173, "step": 104360 }, { "epoch": 1.16, "learning_rate": 3.073991643783034e-05, "loss": 0.6493, "step": 104365 }, { "epoch": 1.16, "learning_rate": 3.0738993710691826e-05, "loss": 0.6555, "step": 104370 }, { "epoch": 1.16, "learning_rate": 3.0738070983553314e-05, "loss": 0.6668, "step": 104375 }, { "epoch": 1.16, "learning_rate": 3.07371482564148e-05, "loss": 0.6544, "step": 104380 }, { "epoch": 1.16, "learning_rate": 3.073622552927628e-05, "loss": 0.6064, "step": 104385 }, { "epoch": 1.16, "learning_rate": 3.073530280213778e-05, "loss": 0.6933, "step": 104390 }, { "epoch": 1.16, "learning_rate": 3.0734380074999265e-05, "loss": 0.706, "step": 104395 }, { "epoch": 1.16, "learning_rate": 3.073345734786075e-05, "loss": 0.6509, "step": 104400 }, { "epoch": 1.16, "learning_rate": 3.0732534620722234e-05, "loss": 0.6473, "step": 104405 }, { "epoch": 1.16, "learning_rate": 3.073161189358373e-05, "loss": 0.6374, "step": 104410 }, { "epoch": 1.16, "learning_rate": 3.073068916644522e-05, "loss": 0.6359, "step": 104415 }, { "epoch": 1.16, "learning_rate": 3.07297664393067e-05, "loss": 0.6498, "step": 104420 }, { "epoch": 1.16, "learning_rate": 3.0728843712168186e-05, "loss": 0.6719, "step": 104425 }, { "epoch": 1.16, "learning_rate": 3.072792098502968e-05, "loss": 0.6689, "step": 104430 }, { "epoch": 1.16, "learning_rate": 3.072699825789117e-05, "loss": 0.7044, "step": 104435 }, { "epoch": 1.16, "learning_rate": 3.072607553075265e-05, "loss": 0.6391, "step": 104440 }, { "epoch": 1.16, "learning_rate": 3.072515280361414e-05, "loss": 0.6644, "step": 104445 }, { "epoch": 1.16, "learning_rate": 3.0724230076475625e-05, "loss": 0.7052, "step": 104450 }, { "epoch": 1.16, "learning_rate": 3.072330734933711e-05, "loss": 0.6159, "step": 104455 }, { "epoch": 1.16, "learning_rate": 3.07223846221986e-05, "loss": 0.6818, "step": 104460 }, { "epoch": 1.16, "learning_rate": 3.072146189506009e-05, "loss": 0.689, "step": 104465 }, { "epoch": 1.16, "learning_rate": 3.0720539167921576e-05, "loss": 0.7186, "step": 104470 }, { "epoch": 1.16, "learning_rate": 3.0719616440783064e-05, "loss": 0.6621, "step": 104475 }, { "epoch": 1.16, "learning_rate": 3.071869371364455e-05, "loss": 0.6358, "step": 104480 }, { "epoch": 1.16, "learning_rate": 3.071777098650604e-05, "loss": 0.6758, "step": 104485 }, { "epoch": 1.16, "learning_rate": 3.071684825936753e-05, "loss": 0.6354, "step": 104490 }, { "epoch": 1.16, "learning_rate": 3.0715925532229016e-05, "loss": 0.6196, "step": 104495 }, { "epoch": 1.16, "learning_rate": 3.0715002805090504e-05, "loss": 0.6737, "step": 104500 }, { "epoch": 1.16, "learning_rate": 3.071408007795199e-05, "loss": 0.6963, "step": 104505 }, { "epoch": 1.16, "learning_rate": 3.071315735081348e-05, "loss": 0.5958, "step": 104510 }, { "epoch": 1.16, "learning_rate": 3.071223462367497e-05, "loss": 0.6745, "step": 104515 }, { "epoch": 1.16, "learning_rate": 3.0711311896536455e-05, "loss": 0.6165, "step": 104520 }, { "epoch": 1.16, "learning_rate": 3.071038916939794e-05, "loss": 0.7214, "step": 104525 }, { "epoch": 1.16, "learning_rate": 3.0709466442259424e-05, "loss": 0.7303, "step": 104530 }, { "epoch": 1.16, "learning_rate": 3.070854371512091e-05, "loss": 0.6934, "step": 104535 }, { "epoch": 1.16, "learning_rate": 3.0707620987982406e-05, "loss": 0.6743, "step": 104540 }, { "epoch": 1.16, "learning_rate": 3.0706698260843894e-05, "loss": 0.6257, "step": 104545 }, { "epoch": 1.16, "learning_rate": 3.0705775533705375e-05, "loss": 0.655, "step": 104550 }, { "epoch": 1.16, "learning_rate": 3.070485280656686e-05, "loss": 0.7465, "step": 104555 }, { "epoch": 1.16, "learning_rate": 3.070393007942836e-05, "loss": 0.666, "step": 104560 }, { "epoch": 1.16, "learning_rate": 3.070300735228984e-05, "loss": 0.6873, "step": 104565 }, { "epoch": 1.16, "learning_rate": 3.070208462515133e-05, "loss": 0.6435, "step": 104570 }, { "epoch": 1.16, "learning_rate": 3.0701161898012814e-05, "loss": 0.669, "step": 104575 }, { "epoch": 1.16, "learning_rate": 3.070023917087431e-05, "loss": 0.6466, "step": 104580 }, { "epoch": 1.16, "learning_rate": 3.069931644373579e-05, "loss": 0.6504, "step": 104585 }, { "epoch": 1.16, "learning_rate": 3.069839371659728e-05, "loss": 0.6437, "step": 104590 }, { "epoch": 1.16, "learning_rate": 3.0697470989458766e-05, "loss": 0.6266, "step": 104595 }, { "epoch": 1.16, "learning_rate": 3.0696548262320254e-05, "loss": 0.6307, "step": 104600 }, { "epoch": 1.16, "learning_rate": 3.069562553518174e-05, "loss": 0.6627, "step": 104605 }, { "epoch": 1.16, "learning_rate": 3.069470280804323e-05, "loss": 0.6532, "step": 104610 }, { "epoch": 1.16, "learning_rate": 3.069378008090472e-05, "loss": 0.6762, "step": 104615 }, { "epoch": 1.16, "learning_rate": 3.0692857353766205e-05, "loss": 0.6449, "step": 104620 }, { "epoch": 1.16, "learning_rate": 3.069193462662769e-05, "loss": 0.6656, "step": 104625 }, { "epoch": 1.16, "learning_rate": 3.069101189948918e-05, "loss": 0.6508, "step": 104630 }, { "epoch": 1.16, "learning_rate": 3.069008917235067e-05, "loss": 0.6271, "step": 104635 }, { "epoch": 1.16, "learning_rate": 3.068916644521215e-05, "loss": 0.6647, "step": 104640 }, { "epoch": 1.16, "learning_rate": 3.0688243718073644e-05, "loss": 0.7078, "step": 104645 }, { "epoch": 1.16, "learning_rate": 3.068732099093513e-05, "loss": 0.7236, "step": 104650 }, { "epoch": 1.16, "learning_rate": 3.068639826379662e-05, "loss": 0.6911, "step": 104655 }, { "epoch": 1.16, "learning_rate": 3.06854755366581e-05, "loss": 0.6488, "step": 104660 }, { "epoch": 1.16, "learning_rate": 3.0684552809519596e-05, "loss": 0.6285, "step": 104665 }, { "epoch": 1.16, "learning_rate": 3.0683630082381084e-05, "loss": 0.626, "step": 104670 }, { "epoch": 1.16, "learning_rate": 3.068270735524257e-05, "loss": 0.6528, "step": 104675 }, { "epoch": 1.16, "learning_rate": 3.068178462810405e-05, "loss": 0.7041, "step": 104680 }, { "epoch": 1.16, "learning_rate": 3.068086190096554e-05, "loss": 0.6824, "step": 104685 }, { "epoch": 1.16, "learning_rate": 3.0679939173827035e-05, "loss": 0.6773, "step": 104690 }, { "epoch": 1.16, "learning_rate": 3.0679016446688516e-05, "loss": 0.6843, "step": 104695 }, { "epoch": 1.16, "learning_rate": 3.0678093719550004e-05, "loss": 0.6703, "step": 104700 }, { "epoch": 1.16, "learning_rate": 3.067717099241149e-05, "loss": 0.665, "step": 104705 }, { "epoch": 1.16, "learning_rate": 3.0676248265272986e-05, "loss": 0.638, "step": 104710 }, { "epoch": 1.16, "learning_rate": 3.067532553813447e-05, "loss": 0.6839, "step": 104715 }, { "epoch": 1.16, "learning_rate": 3.0674402810995955e-05, "loss": 0.7096, "step": 104720 }, { "epoch": 1.16, "learning_rate": 3.067348008385744e-05, "loss": 0.7157, "step": 104725 }, { "epoch": 1.16, "learning_rate": 3.067255735671893e-05, "loss": 0.6893, "step": 104730 }, { "epoch": 1.16, "learning_rate": 3.067163462958042e-05, "loss": 0.6849, "step": 104735 }, { "epoch": 1.16, "learning_rate": 3.067071190244191e-05, "loss": 0.6205, "step": 104740 }, { "epoch": 1.16, "learning_rate": 3.0669789175303395e-05, "loss": 0.6719, "step": 104745 }, { "epoch": 1.16, "learning_rate": 3.066886644816488e-05, "loss": 0.7321, "step": 104750 }, { "epoch": 1.16, "learning_rate": 3.066794372102637e-05, "loss": 0.6851, "step": 104755 }, { "epoch": 1.16, "learning_rate": 3.066702099388786e-05, "loss": 0.7329, "step": 104760 }, { "epoch": 1.16, "learning_rate": 3.0666098266749346e-05, "loss": 0.6365, "step": 104765 }, { "epoch": 1.16, "learning_rate": 3.066517553961083e-05, "loss": 0.7275, "step": 104770 }, { "epoch": 1.16, "learning_rate": 3.066425281247232e-05, "loss": 0.6219, "step": 104775 }, { "epoch": 1.16, "learning_rate": 3.066333008533381e-05, "loss": 0.7194, "step": 104780 }, { "epoch": 1.16, "learning_rate": 3.06624073581953e-05, "loss": 0.6517, "step": 104785 }, { "epoch": 1.16, "learning_rate": 3.066148463105678e-05, "loss": 0.6456, "step": 104790 }, { "epoch": 1.16, "learning_rate": 3.066056190391827e-05, "loss": 0.664, "step": 104795 }, { "epoch": 1.16, "learning_rate": 3.065963917677976e-05, "loss": 0.6611, "step": 104800 }, { "epoch": 1.16, "learning_rate": 3.065871644964124e-05, "loss": 0.7321, "step": 104805 }, { "epoch": 1.16, "learning_rate": 3.065779372250273e-05, "loss": 0.6447, "step": 104810 }, { "epoch": 1.16, "learning_rate": 3.0656870995364224e-05, "loss": 0.6509, "step": 104815 }, { "epoch": 1.16, "learning_rate": 3.065594826822571e-05, "loss": 0.7083, "step": 104820 }, { "epoch": 1.16, "learning_rate": 3.065502554108719e-05, "loss": 0.7065, "step": 104825 }, { "epoch": 1.16, "learning_rate": 3.065410281394868e-05, "loss": 0.6435, "step": 104830 }, { "epoch": 1.16, "learning_rate": 3.065318008681017e-05, "loss": 0.663, "step": 104835 }, { "epoch": 1.16, "learning_rate": 3.065225735967166e-05, "loss": 0.6299, "step": 104840 }, { "epoch": 1.16, "learning_rate": 3.0651334632533145e-05, "loss": 0.6495, "step": 104845 }, { "epoch": 1.16, "learning_rate": 3.065041190539463e-05, "loss": 0.6298, "step": 104850 }, { "epoch": 1.16, "learning_rate": 3.064948917825612e-05, "loss": 0.6566, "step": 104855 }, { "epoch": 1.16, "learning_rate": 3.064856645111761e-05, "loss": 0.6676, "step": 104860 }, { "epoch": 1.16, "learning_rate": 3.0647643723979096e-05, "loss": 0.6548, "step": 104865 }, { "epoch": 1.16, "learning_rate": 3.0646720996840584e-05, "loss": 0.6463, "step": 104870 }, { "epoch": 1.16, "learning_rate": 3.064579826970207e-05, "loss": 0.6675, "step": 104875 }, { "epoch": 1.16, "learning_rate": 3.064487554256356e-05, "loss": 0.6936, "step": 104880 }, { "epoch": 1.16, "learning_rate": 3.064395281542505e-05, "loss": 0.696, "step": 104885 }, { "epoch": 1.16, "learning_rate": 3.0643030088286535e-05, "loss": 0.6407, "step": 104890 }, { "epoch": 1.16, "learning_rate": 3.064210736114802e-05, "loss": 0.6401, "step": 104895 }, { "epoch": 1.16, "learning_rate": 3.0641184634009504e-05, "loss": 0.7083, "step": 104900 }, { "epoch": 1.16, "learning_rate": 3.0640261906871e-05, "loss": 0.6301, "step": 104905 }, { "epoch": 1.16, "learning_rate": 3.063933917973249e-05, "loss": 0.6617, "step": 104910 }, { "epoch": 1.16, "learning_rate": 3.063841645259397e-05, "loss": 0.6949, "step": 104915 }, { "epoch": 1.16, "learning_rate": 3.0637493725455456e-05, "loss": 0.6703, "step": 104920 }, { "epoch": 1.16, "learning_rate": 3.063657099831695e-05, "loss": 0.6251, "step": 104925 }, { "epoch": 1.16, "learning_rate": 3.063564827117844e-05, "loss": 0.611, "step": 104930 }, { "epoch": 1.16, "learning_rate": 3.063472554403992e-05, "loss": 0.7082, "step": 104935 }, { "epoch": 1.16, "learning_rate": 3.063380281690141e-05, "loss": 0.6924, "step": 104940 }, { "epoch": 1.16, "learning_rate": 3.06328800897629e-05, "loss": 0.6725, "step": 104945 }, { "epoch": 1.16, "learning_rate": 3.063195736262438e-05, "loss": 0.6479, "step": 104950 }, { "epoch": 1.16, "learning_rate": 3.063103463548587e-05, "loss": 0.6443, "step": 104955 }, { "epoch": 1.16, "learning_rate": 3.063011190834736e-05, "loss": 0.6488, "step": 104960 }, { "epoch": 1.16, "learning_rate": 3.062918918120885e-05, "loss": 0.6636, "step": 104965 }, { "epoch": 1.16, "learning_rate": 3.0628266454070334e-05, "loss": 0.6894, "step": 104970 }, { "epoch": 1.16, "learning_rate": 3.062734372693182e-05, "loss": 0.597, "step": 104975 }, { "epoch": 1.16, "learning_rate": 3.062642099979331e-05, "loss": 0.7226, "step": 104980 }, { "epoch": 1.16, "learning_rate": 3.06254982726548e-05, "loss": 0.7259, "step": 104985 }, { "epoch": 1.16, "learning_rate": 3.0624575545516286e-05, "loss": 0.7135, "step": 104990 }, { "epoch": 1.16, "learning_rate": 3.0623652818377773e-05, "loss": 0.6236, "step": 104995 }, { "epoch": 1.16, "learning_rate": 3.062273009123926e-05, "loss": 0.683, "step": 105000 }, { "epoch": 1.16, "eval_loss": 0.6692774891853333, "eval_runtime": 70.4055, "eval_samples_per_second": 28.407, "eval_steps_per_second": 14.203, "step": 105000 }, { "epoch": 1.16, "learning_rate": 3.062180736410075e-05, "loss": 0.6778, "step": 105005 }, { "epoch": 1.16, "learning_rate": 3.062088463696224e-05, "loss": 0.706, "step": 105010 }, { "epoch": 1.16, "learning_rate": 3.0619961909823725e-05, "loss": 0.6552, "step": 105015 }, { "epoch": 1.16, "learning_rate": 3.061903918268521e-05, "loss": 0.6889, "step": 105020 }, { "epoch": 1.16, "learning_rate": 3.0618116455546694e-05, "loss": 0.7102, "step": 105025 }, { "epoch": 1.16, "learning_rate": 3.061719372840819e-05, "loss": 0.6882, "step": 105030 }, { "epoch": 1.16, "learning_rate": 3.0616271001269676e-05, "loss": 0.6401, "step": 105035 }, { "epoch": 1.16, "learning_rate": 3.0615348274131164e-05, "loss": 0.7175, "step": 105040 }, { "epoch": 1.16, "learning_rate": 3.0614425546992645e-05, "loss": 0.6783, "step": 105045 }, { "epoch": 1.16, "learning_rate": 3.061350281985413e-05, "loss": 0.6058, "step": 105050 }, { "epoch": 1.16, "learning_rate": 3.061258009271563e-05, "loss": 0.677, "step": 105055 }, { "epoch": 1.16, "learning_rate": 3.0611657365577115e-05, "loss": 0.6539, "step": 105060 }, { "epoch": 1.16, "learning_rate": 3.0610734638438597e-05, "loss": 0.6578, "step": 105065 }, { "epoch": 1.16, "learning_rate": 3.0609811911300084e-05, "loss": 0.6822, "step": 105070 }, { "epoch": 1.16, "learning_rate": 3.060888918416158e-05, "loss": 0.6816, "step": 105075 }, { "epoch": 1.16, "learning_rate": 3.060796645702306e-05, "loss": 0.691, "step": 105080 }, { "epoch": 1.16, "learning_rate": 3.060704372988455e-05, "loss": 0.709, "step": 105085 }, { "epoch": 1.16, "learning_rate": 3.0606121002746036e-05, "loss": 0.6431, "step": 105090 }, { "epoch": 1.16, "learning_rate": 3.060519827560753e-05, "loss": 0.6676, "step": 105095 }, { "epoch": 1.16, "learning_rate": 3.060427554846901e-05, "loss": 0.6687, "step": 105100 }, { "epoch": 1.16, "learning_rate": 3.06033528213305e-05, "loss": 0.6364, "step": 105105 }, { "epoch": 1.16, "learning_rate": 3.060243009419199e-05, "loss": 0.6633, "step": 105110 }, { "epoch": 1.16, "learning_rate": 3.0601507367053475e-05, "loss": 0.6444, "step": 105115 }, { "epoch": 1.16, "learning_rate": 3.060058463991496e-05, "loss": 0.6239, "step": 105120 }, { "epoch": 1.16, "learning_rate": 3.059966191277645e-05, "loss": 0.6983, "step": 105125 }, { "epoch": 1.16, "learning_rate": 3.059873918563794e-05, "loss": 0.68, "step": 105130 }, { "epoch": 1.16, "learning_rate": 3.0597816458499426e-05, "loss": 0.7158, "step": 105135 }, { "epoch": 1.16, "learning_rate": 3.0596893731360914e-05, "loss": 0.724, "step": 105140 }, { "epoch": 1.16, "learning_rate": 3.05959710042224e-05, "loss": 0.6926, "step": 105145 }, { "epoch": 1.16, "learning_rate": 3.059504827708389e-05, "loss": 0.6682, "step": 105150 }, { "epoch": 1.16, "learning_rate": 3.059412554994537e-05, "loss": 0.6516, "step": 105155 }, { "epoch": 1.16, "learning_rate": 3.0593202822806866e-05, "loss": 0.6637, "step": 105160 }, { "epoch": 1.16, "learning_rate": 3.0592280095668354e-05, "loss": 0.6646, "step": 105165 }, { "epoch": 1.16, "learning_rate": 3.059135736852984e-05, "loss": 0.6109, "step": 105170 }, { "epoch": 1.16, "learning_rate": 3.059043464139132e-05, "loss": 0.6951, "step": 105175 }, { "epoch": 1.16, "learning_rate": 3.058951191425282e-05, "loss": 0.642, "step": 105180 }, { "epoch": 1.16, "learning_rate": 3.0588589187114305e-05, "loss": 0.6804, "step": 105185 }, { "epoch": 1.16, "learning_rate": 3.0587666459975786e-05, "loss": 0.708, "step": 105190 }, { "epoch": 1.16, "learning_rate": 3.0586743732837274e-05, "loss": 0.6354, "step": 105195 }, { "epoch": 1.16, "learning_rate": 3.058582100569876e-05, "loss": 0.6235, "step": 105200 }, { "epoch": 1.16, "learning_rate": 3.0584898278560256e-05, "loss": 0.6925, "step": 105205 }, { "epoch": 1.16, "learning_rate": 3.058397555142174e-05, "loss": 0.6242, "step": 105210 }, { "epoch": 1.17, "learning_rate": 3.0583052824283225e-05, "loss": 0.681, "step": 105215 }, { "epoch": 1.17, "learning_rate": 3.058213009714471e-05, "loss": 0.6723, "step": 105220 }, { "epoch": 1.17, "learning_rate": 3.05812073700062e-05, "loss": 0.6465, "step": 105225 }, { "epoch": 1.17, "learning_rate": 3.058028464286769e-05, "loss": 0.6796, "step": 105230 }, { "epoch": 1.17, "learning_rate": 3.057936191572918e-05, "loss": 0.661, "step": 105235 }, { "epoch": 1.17, "learning_rate": 3.0578439188590664e-05, "loss": 0.6893, "step": 105240 }, { "epoch": 1.17, "learning_rate": 3.057751646145215e-05, "loss": 0.7081, "step": 105245 }, { "epoch": 1.17, "learning_rate": 3.057659373431364e-05, "loss": 0.6721, "step": 105250 }, { "epoch": 1.17, "learning_rate": 3.057567100717513e-05, "loss": 0.6666, "step": 105255 }, { "epoch": 1.17, "learning_rate": 3.0574748280036616e-05, "loss": 0.6851, "step": 105260 }, { "epoch": 1.17, "learning_rate": 3.0573825552898104e-05, "loss": 0.6101, "step": 105265 }, { "epoch": 1.17, "learning_rate": 3.057290282575959e-05, "loss": 0.6879, "step": 105270 }, { "epoch": 1.17, "learning_rate": 3.057198009862108e-05, "loss": 0.6789, "step": 105275 }, { "epoch": 1.17, "learning_rate": 3.057105737148257e-05, "loss": 0.6584, "step": 105280 }, { "epoch": 1.17, "learning_rate": 3.057013464434405e-05, "loss": 0.6613, "step": 105285 }, { "epoch": 1.17, "learning_rate": 3.056921191720554e-05, "loss": 0.6513, "step": 105290 }, { "epoch": 1.17, "learning_rate": 3.056828919006703e-05, "loss": 0.644, "step": 105295 }, { "epoch": 1.17, "learning_rate": 3.056736646292851e-05, "loss": 0.6643, "step": 105300 }, { "epoch": 1.17, "learning_rate": 3.056644373579e-05, "loss": 0.665, "step": 105305 }, { "epoch": 1.17, "learning_rate": 3.0565521008651494e-05, "loss": 0.6279, "step": 105310 }, { "epoch": 1.17, "learning_rate": 3.056459828151298e-05, "loss": 0.664, "step": 105315 }, { "epoch": 1.17, "learning_rate": 3.056367555437446e-05, "loss": 0.6727, "step": 105320 }, { "epoch": 1.17, "learning_rate": 3.056275282723595e-05, "loss": 0.6172, "step": 105325 }, { "epoch": 1.17, "learning_rate": 3.0561830100097446e-05, "loss": 0.6429, "step": 105330 }, { "epoch": 1.17, "learning_rate": 3.056090737295893e-05, "loss": 0.6509, "step": 105335 }, { "epoch": 1.17, "learning_rate": 3.0559984645820415e-05, "loss": 0.7142, "step": 105340 }, { "epoch": 1.17, "learning_rate": 3.05590619186819e-05, "loss": 0.7271, "step": 105345 }, { "epoch": 1.17, "learning_rate": 3.05581391915434e-05, "loss": 0.6943, "step": 105350 }, { "epoch": 1.17, "learning_rate": 3.055721646440488e-05, "loss": 0.6249, "step": 105355 }, { "epoch": 1.17, "learning_rate": 3.0556293737266366e-05, "loss": 0.6406, "step": 105360 }, { "epoch": 1.17, "learning_rate": 3.0555371010127854e-05, "loss": 0.6882, "step": 105365 }, { "epoch": 1.17, "learning_rate": 3.055444828298934e-05, "loss": 0.6861, "step": 105370 }, { "epoch": 1.17, "learning_rate": 3.055352555585083e-05, "loss": 0.6705, "step": 105375 }, { "epoch": 1.17, "learning_rate": 3.055260282871232e-05, "loss": 0.7102, "step": 105380 }, { "epoch": 1.17, "learning_rate": 3.0551680101573805e-05, "loss": 0.6724, "step": 105385 }, { "epoch": 1.17, "learning_rate": 3.055075737443529e-05, "loss": 0.6639, "step": 105390 }, { "epoch": 1.17, "learning_rate": 3.054983464729678e-05, "loss": 0.6251, "step": 105395 }, { "epoch": 1.17, "learning_rate": 3.054891192015827e-05, "loss": 0.6508, "step": 105400 }, { "epoch": 1.17, "learning_rate": 3.054798919301976e-05, "loss": 0.6188, "step": 105405 }, { "epoch": 1.17, "learning_rate": 3.054706646588124e-05, "loss": 0.6946, "step": 105410 }, { "epoch": 1.17, "learning_rate": 3.054614373874273e-05, "loss": 0.666, "step": 105415 }, { "epoch": 1.17, "learning_rate": 3.054522101160422e-05, "loss": 0.6018, "step": 105420 }, { "epoch": 1.17, "learning_rate": 3.054429828446571e-05, "loss": 0.6489, "step": 105425 }, { "epoch": 1.17, "learning_rate": 3.054337555732719e-05, "loss": 0.6677, "step": 105430 }, { "epoch": 1.17, "learning_rate": 3.054245283018868e-05, "loss": 0.6293, "step": 105435 }, { "epoch": 1.17, "learning_rate": 3.054153010305017e-05, "loss": 0.6112, "step": 105440 }, { "epoch": 1.17, "learning_rate": 3.054060737591166e-05, "loss": 0.7423, "step": 105445 }, { "epoch": 1.17, "learning_rate": 3.053968464877314e-05, "loss": 0.6841, "step": 105450 }, { "epoch": 1.17, "learning_rate": 3.053876192163463e-05, "loss": 0.7045, "step": 105455 }, { "epoch": 1.17, "learning_rate": 3.053783919449612e-05, "loss": 0.6037, "step": 105460 }, { "epoch": 1.17, "learning_rate": 3.0536916467357604e-05, "loss": 0.6365, "step": 105465 }, { "epoch": 1.17, "learning_rate": 3.053599374021909e-05, "loss": 0.6823, "step": 105470 }, { "epoch": 1.17, "learning_rate": 3.053507101308058e-05, "loss": 0.6699, "step": 105475 }, { "epoch": 1.17, "learning_rate": 3.0534148285942074e-05, "loss": 0.7061, "step": 105480 }, { "epoch": 1.17, "learning_rate": 3.0533225558803556e-05, "loss": 0.6687, "step": 105485 }, { "epoch": 1.17, "learning_rate": 3.053230283166504e-05, "loss": 0.7075, "step": 105490 }, { "epoch": 1.17, "learning_rate": 3.053138010452653e-05, "loss": 0.6478, "step": 105495 }, { "epoch": 1.17, "learning_rate": 3.053045737738802e-05, "loss": 0.6632, "step": 105500 }, { "epoch": 1.17, "learning_rate": 3.052953465024951e-05, "loss": 0.7389, "step": 105505 }, { "epoch": 1.17, "learning_rate": 3.0528611923110995e-05, "loss": 0.6566, "step": 105510 }, { "epoch": 1.17, "learning_rate": 3.052768919597248e-05, "loss": 0.6765, "step": 105515 }, { "epoch": 1.17, "learning_rate": 3.052676646883397e-05, "loss": 0.6894, "step": 105520 }, { "epoch": 1.17, "learning_rate": 3.052584374169546e-05, "loss": 0.655, "step": 105525 }, { "epoch": 1.17, "learning_rate": 3.0524921014556946e-05, "loss": 0.6624, "step": 105530 }, { "epoch": 1.17, "learning_rate": 3.0523998287418434e-05, "loss": 0.6376, "step": 105535 }, { "epoch": 1.17, "learning_rate": 3.0523075560279915e-05, "loss": 0.7093, "step": 105540 }, { "epoch": 1.17, "learning_rate": 3.052215283314141e-05, "loss": 0.6577, "step": 105545 }, { "epoch": 1.17, "learning_rate": 3.05212301060029e-05, "loss": 0.65, "step": 105550 }, { "epoch": 1.17, "learning_rate": 3.0520307378864385e-05, "loss": 0.6662, "step": 105555 }, { "epoch": 1.17, "learning_rate": 3.0519384651725866e-05, "loss": 0.6807, "step": 105560 }, { "epoch": 1.17, "learning_rate": 3.051846192458736e-05, "loss": 0.6336, "step": 105565 }, { "epoch": 1.17, "learning_rate": 3.0517539197448846e-05, "loss": 0.6891, "step": 105570 }, { "epoch": 1.17, "learning_rate": 3.0516616470310333e-05, "loss": 0.6454, "step": 105575 }, { "epoch": 1.17, "learning_rate": 3.0515693743171818e-05, "loss": 0.6804, "step": 105580 }, { "epoch": 1.17, "learning_rate": 3.0514771016033306e-05, "loss": 0.6667, "step": 105585 }, { "epoch": 1.17, "learning_rate": 3.0513848288894797e-05, "loss": 0.6506, "step": 105590 }, { "epoch": 1.17, "learning_rate": 3.0512925561756285e-05, "loss": 0.6539, "step": 105595 }, { "epoch": 1.17, "learning_rate": 3.051200283461777e-05, "loss": 0.681, "step": 105600 }, { "epoch": 1.17, "learning_rate": 3.0511080107479257e-05, "loss": 0.7075, "step": 105605 }, { "epoch": 1.17, "learning_rate": 3.051015738034075e-05, "loss": 0.6382, "step": 105610 }, { "epoch": 1.17, "learning_rate": 3.0509234653202233e-05, "loss": 0.6087, "step": 105615 }, { "epoch": 1.17, "learning_rate": 3.050831192606372e-05, "loss": 0.6674, "step": 105620 }, { "epoch": 1.17, "learning_rate": 3.050738919892521e-05, "loss": 0.6315, "step": 105625 }, { "epoch": 1.17, "learning_rate": 3.05064664717867e-05, "loss": 0.6635, "step": 105630 }, { "epoch": 1.17, "learning_rate": 3.0505543744648184e-05, "loss": 0.6967, "step": 105635 }, { "epoch": 1.17, "learning_rate": 3.0504621017509672e-05, "loss": 0.6764, "step": 105640 }, { "epoch": 1.17, "learning_rate": 3.0503698290371156e-05, "loss": 0.7154, "step": 105645 }, { "epoch": 1.17, "learning_rate": 3.050277556323265e-05, "loss": 0.6935, "step": 105650 }, { "epoch": 1.17, "learning_rate": 3.0501852836094136e-05, "loss": 0.6996, "step": 105655 }, { "epoch": 1.17, "learning_rate": 3.0500930108955623e-05, "loss": 0.6728, "step": 105660 }, { "epoch": 1.17, "learning_rate": 3.0500007381817108e-05, "loss": 0.709, "step": 105665 }, { "epoch": 1.17, "learning_rate": 3.0499084654678596e-05, "loss": 0.7131, "step": 105670 }, { "epoch": 1.17, "learning_rate": 3.0498161927540087e-05, "loss": 0.7164, "step": 105675 }, { "epoch": 1.17, "learning_rate": 3.049723920040157e-05, "loss": 0.6765, "step": 105680 }, { "epoch": 1.17, "learning_rate": 3.049631647326306e-05, "loss": 0.626, "step": 105685 }, { "epoch": 1.17, "learning_rate": 3.0495393746124544e-05, "loss": 0.7027, "step": 105690 }, { "epoch": 1.17, "learning_rate": 3.049447101898604e-05, "loss": 0.6429, "step": 105695 }, { "epoch": 1.17, "learning_rate": 3.0493548291847523e-05, "loss": 0.6647, "step": 105700 }, { "epoch": 1.17, "learning_rate": 3.049262556470901e-05, "loss": 0.6902, "step": 105705 }, { "epoch": 1.17, "learning_rate": 3.0491702837570495e-05, "loss": 0.6352, "step": 105710 }, { "epoch": 1.17, "learning_rate": 3.0490780110431986e-05, "loss": 0.7266, "step": 105715 }, { "epoch": 1.17, "learning_rate": 3.0489857383293474e-05, "loss": 0.6387, "step": 105720 }, { "epoch": 1.17, "learning_rate": 3.0488934656154962e-05, "loss": 0.6685, "step": 105725 }, { "epoch": 1.17, "learning_rate": 3.0488011929016447e-05, "loss": 0.7794, "step": 105730 }, { "epoch": 1.17, "learning_rate": 3.0487089201877934e-05, "loss": 0.7445, "step": 105735 }, { "epoch": 1.17, "learning_rate": 3.0486166474739426e-05, "loss": 0.6099, "step": 105740 }, { "epoch": 1.17, "learning_rate": 3.048524374760091e-05, "loss": 0.6382, "step": 105745 }, { "epoch": 1.17, "learning_rate": 3.0484321020462398e-05, "loss": 0.6791, "step": 105750 }, { "epoch": 1.17, "learning_rate": 3.0483398293323882e-05, "loss": 0.6205, "step": 105755 }, { "epoch": 1.17, "learning_rate": 3.0482475566185377e-05, "loss": 0.6125, "step": 105760 }, { "epoch": 1.17, "learning_rate": 3.048155283904686e-05, "loss": 0.6341, "step": 105765 }, { "epoch": 1.17, "learning_rate": 3.048063011190835e-05, "loss": 0.6915, "step": 105770 }, { "epoch": 1.17, "learning_rate": 3.0479707384769834e-05, "loss": 0.6395, "step": 105775 }, { "epoch": 1.17, "learning_rate": 3.0478784657631325e-05, "loss": 0.6404, "step": 105780 }, { "epoch": 1.17, "learning_rate": 3.0477861930492813e-05, "loss": 0.661, "step": 105785 }, { "epoch": 1.17, "learning_rate": 3.0476939203354297e-05, "loss": 0.6503, "step": 105790 }, { "epoch": 1.17, "learning_rate": 3.0476016476215785e-05, "loss": 0.6318, "step": 105795 }, { "epoch": 1.17, "learning_rate": 3.0475093749077276e-05, "loss": 0.6723, "step": 105800 }, { "epoch": 1.17, "learning_rate": 3.0474171021938764e-05, "loss": 0.6726, "step": 105805 }, { "epoch": 1.17, "learning_rate": 3.047324829480025e-05, "loss": 0.7021, "step": 105810 }, { "epoch": 1.17, "learning_rate": 3.0472325567661737e-05, "loss": 0.6705, "step": 105815 }, { "epoch": 1.17, "learning_rate": 3.047140284052322e-05, "loss": 0.636, "step": 105820 }, { "epoch": 1.17, "learning_rate": 3.0470480113384716e-05, "loss": 0.7063, "step": 105825 }, { "epoch": 1.17, "learning_rate": 3.04695573862462e-05, "loss": 0.6435, "step": 105830 }, { "epoch": 1.17, "learning_rate": 3.0468634659107688e-05, "loss": 0.7231, "step": 105835 }, { "epoch": 1.17, "learning_rate": 3.0467711931969172e-05, "loss": 0.7113, "step": 105840 }, { "epoch": 1.17, "learning_rate": 3.0466789204830664e-05, "loss": 0.6561, "step": 105845 }, { "epoch": 1.17, "learning_rate": 3.046586647769215e-05, "loss": 0.6444, "step": 105850 }, { "epoch": 1.17, "learning_rate": 3.0464943750553636e-05, "loss": 0.6714, "step": 105855 }, { "epoch": 1.17, "learning_rate": 3.0464021023415124e-05, "loss": 0.7288, "step": 105860 }, { "epoch": 1.17, "learning_rate": 3.0463098296276615e-05, "loss": 0.6593, "step": 105865 }, { "epoch": 1.17, "learning_rate": 3.0462175569138103e-05, "loss": 0.6736, "step": 105870 }, { "epoch": 1.17, "learning_rate": 3.0461252841999587e-05, "loss": 0.6605, "step": 105875 }, { "epoch": 1.17, "learning_rate": 3.0460330114861075e-05, "loss": 0.6228, "step": 105880 }, { "epoch": 1.17, "learning_rate": 3.045940738772256e-05, "loss": 0.6665, "step": 105885 }, { "epoch": 1.17, "learning_rate": 3.045848466058405e-05, "loss": 0.6828, "step": 105890 }, { "epoch": 1.17, "learning_rate": 3.045756193344554e-05, "loss": 0.6378, "step": 105895 }, { "epoch": 1.17, "learning_rate": 3.0456639206307027e-05, "loss": 0.6917, "step": 105900 }, { "epoch": 1.17, "learning_rate": 3.045571647916851e-05, "loss": 0.666, "step": 105905 }, { "epoch": 1.17, "learning_rate": 3.0454793752030002e-05, "loss": 0.6191, "step": 105910 }, { "epoch": 1.17, "learning_rate": 3.045387102489149e-05, "loss": 0.6548, "step": 105915 }, { "epoch": 1.17, "learning_rate": 3.0452948297752975e-05, "loss": 0.6687, "step": 105920 }, { "epoch": 1.17, "learning_rate": 3.0452025570614462e-05, "loss": 0.6841, "step": 105925 }, { "epoch": 1.17, "learning_rate": 3.0451102843475954e-05, "loss": 0.6735, "step": 105930 }, { "epoch": 1.17, "learning_rate": 3.045018011633744e-05, "loss": 0.6917, "step": 105935 }, { "epoch": 1.17, "learning_rate": 3.0449257389198926e-05, "loss": 0.6463, "step": 105940 }, { "epoch": 1.17, "learning_rate": 3.0448334662060414e-05, "loss": 0.7041, "step": 105945 }, { "epoch": 1.17, "learning_rate": 3.0447411934921905e-05, "loss": 0.6699, "step": 105950 }, { "epoch": 1.17, "learning_rate": 3.044648920778339e-05, "loss": 0.7224, "step": 105955 }, { "epoch": 1.17, "learning_rate": 3.0445566480644877e-05, "loss": 0.7609, "step": 105960 }, { "epoch": 1.17, "learning_rate": 3.0444643753506362e-05, "loss": 0.6234, "step": 105965 }, { "epoch": 1.17, "learning_rate": 3.044372102636785e-05, "loss": 0.6354, "step": 105970 }, { "epoch": 1.17, "learning_rate": 3.044279829922934e-05, "loss": 0.6514, "step": 105975 }, { "epoch": 1.17, "learning_rate": 3.044187557209083e-05, "loss": 0.6694, "step": 105980 }, { "epoch": 1.17, "learning_rate": 3.0440952844952313e-05, "loss": 0.6894, "step": 105985 }, { "epoch": 1.17, "learning_rate": 3.04400301178138e-05, "loss": 0.659, "step": 105990 }, { "epoch": 1.17, "learning_rate": 3.0439107390675292e-05, "loss": 0.6981, "step": 105995 }, { "epoch": 1.17, "learning_rate": 3.0438184663536777e-05, "loss": 0.6654, "step": 106000 }, { "epoch": 1.17, "eval_loss": 0.6240520477294922, "eval_runtime": 70.3761, "eval_samples_per_second": 28.419, "eval_steps_per_second": 14.209, "step": 106000 }, { "epoch": 1.17, "learning_rate": 3.0437261936398265e-05, "loss": 0.6274, "step": 106005 }, { "epoch": 1.17, "learning_rate": 3.0436339209259753e-05, "loss": 0.6996, "step": 106010 }, { "epoch": 1.17, "learning_rate": 3.0435416482121244e-05, "loss": 0.6656, "step": 106015 }, { "epoch": 1.17, "learning_rate": 3.0434493754982728e-05, "loss": 0.6631, "step": 106020 }, { "epoch": 1.17, "learning_rate": 3.0433571027844216e-05, "loss": 0.6825, "step": 106025 }, { "epoch": 1.17, "learning_rate": 3.04326483007057e-05, "loss": 0.6818, "step": 106030 }, { "epoch": 1.17, "learning_rate": 3.043172557356719e-05, "loss": 0.7081, "step": 106035 }, { "epoch": 1.17, "learning_rate": 3.043080284642868e-05, "loss": 0.6442, "step": 106040 }, { "epoch": 1.17, "learning_rate": 3.0429880119290167e-05, "loss": 0.6581, "step": 106045 }, { "epoch": 1.17, "learning_rate": 3.0428957392151652e-05, "loss": 0.6689, "step": 106050 }, { "epoch": 1.17, "learning_rate": 3.042803466501314e-05, "loss": 0.6727, "step": 106055 }, { "epoch": 1.17, "learning_rate": 3.042711193787463e-05, "loss": 0.6811, "step": 106060 }, { "epoch": 1.17, "learning_rate": 3.0426189210736115e-05, "loss": 0.6561, "step": 106065 }, { "epoch": 1.17, "learning_rate": 3.0425266483597603e-05, "loss": 0.6956, "step": 106070 }, { "epoch": 1.17, "learning_rate": 3.0424343756459088e-05, "loss": 0.7143, "step": 106075 }, { "epoch": 1.17, "learning_rate": 3.0423421029320582e-05, "loss": 0.7577, "step": 106080 }, { "epoch": 1.17, "learning_rate": 3.0422498302182067e-05, "loss": 0.6757, "step": 106085 }, { "epoch": 1.17, "learning_rate": 3.0421575575043555e-05, "loss": 0.7164, "step": 106090 }, { "epoch": 1.17, "learning_rate": 3.042065284790504e-05, "loss": 0.6904, "step": 106095 }, { "epoch": 1.17, "learning_rate": 3.041973012076653e-05, "loss": 0.6392, "step": 106100 }, { "epoch": 1.17, "learning_rate": 3.0418807393628018e-05, "loss": 0.6805, "step": 106105 }, { "epoch": 1.17, "learning_rate": 3.0417884666489506e-05, "loss": 0.6681, "step": 106110 }, { "epoch": 1.17, "learning_rate": 3.041696193935099e-05, "loss": 0.6651, "step": 106115 }, { "epoch": 1.18, "learning_rate": 3.041603921221248e-05, "loss": 0.6362, "step": 106120 }, { "epoch": 1.18, "learning_rate": 3.041511648507397e-05, "loss": 0.68, "step": 106125 }, { "epoch": 1.18, "learning_rate": 3.0414193757935454e-05, "loss": 0.6353, "step": 106130 }, { "epoch": 1.18, "learning_rate": 3.0413271030796942e-05, "loss": 0.6916, "step": 106135 }, { "epoch": 1.18, "learning_rate": 3.0412348303658426e-05, "loss": 0.701, "step": 106140 }, { "epoch": 1.18, "learning_rate": 3.041142557651992e-05, "loss": 0.631, "step": 106145 }, { "epoch": 1.18, "learning_rate": 3.0410502849381405e-05, "loss": 0.7135, "step": 106150 }, { "epoch": 1.18, "learning_rate": 3.0409580122242893e-05, "loss": 0.7085, "step": 106155 }, { "epoch": 1.18, "learning_rate": 3.0408657395104378e-05, "loss": 0.6318, "step": 106160 }, { "epoch": 1.18, "learning_rate": 3.040773466796587e-05, "loss": 0.7005, "step": 106165 }, { "epoch": 1.18, "learning_rate": 3.0406811940827357e-05, "loss": 0.6706, "step": 106170 }, { "epoch": 1.18, "learning_rate": 3.040588921368884e-05, "loss": 0.6672, "step": 106175 }, { "epoch": 1.18, "learning_rate": 3.040496648655033e-05, "loss": 0.7126, "step": 106180 }, { "epoch": 1.18, "learning_rate": 3.040404375941182e-05, "loss": 0.6502, "step": 106185 }, { "epoch": 1.18, "learning_rate": 3.0403121032273308e-05, "loss": 0.6801, "step": 106190 }, { "epoch": 1.18, "learning_rate": 3.0402198305134793e-05, "loss": 0.6741, "step": 106195 }, { "epoch": 1.18, "learning_rate": 3.040127557799628e-05, "loss": 0.6889, "step": 106200 }, { "epoch": 1.18, "learning_rate": 3.0400352850857765e-05, "loss": 0.6639, "step": 106205 }, { "epoch": 1.18, "learning_rate": 3.039943012371926e-05, "loss": 0.7437, "step": 106210 }, { "epoch": 1.18, "learning_rate": 3.0398507396580744e-05, "loss": 0.6547, "step": 106215 }, { "epoch": 1.18, "learning_rate": 3.0397584669442232e-05, "loss": 0.652, "step": 106220 }, { "epoch": 1.18, "learning_rate": 3.0396661942303716e-05, "loss": 0.7088, "step": 106225 }, { "epoch": 1.18, "learning_rate": 3.0395739215165208e-05, "loss": 0.6478, "step": 106230 }, { "epoch": 1.18, "learning_rate": 3.0394816488026696e-05, "loss": 0.6869, "step": 106235 }, { "epoch": 1.18, "learning_rate": 3.039389376088818e-05, "loss": 0.6686, "step": 106240 }, { "epoch": 1.18, "learning_rate": 3.0392971033749668e-05, "loss": 0.6198, "step": 106245 }, { "epoch": 1.18, "learning_rate": 3.039204830661116e-05, "loss": 0.6384, "step": 106250 }, { "epoch": 1.18, "learning_rate": 3.0391125579472647e-05, "loss": 0.6643, "step": 106255 }, { "epoch": 1.18, "learning_rate": 3.039020285233413e-05, "loss": 0.647, "step": 106260 }, { "epoch": 1.18, "learning_rate": 3.038928012519562e-05, "loss": 0.6922, "step": 106265 }, { "epoch": 1.18, "learning_rate": 3.0388357398057104e-05, "loss": 0.6795, "step": 106270 }, { "epoch": 1.18, "learning_rate": 3.0387434670918595e-05, "loss": 0.6644, "step": 106275 }, { "epoch": 1.18, "learning_rate": 3.0386511943780083e-05, "loss": 0.6461, "step": 106280 }, { "epoch": 1.18, "learning_rate": 3.038558921664157e-05, "loss": 0.7304, "step": 106285 }, { "epoch": 1.18, "learning_rate": 3.0384666489503055e-05, "loss": 0.6857, "step": 106290 }, { "epoch": 1.18, "learning_rate": 3.0383743762364546e-05, "loss": 0.7145, "step": 106295 }, { "epoch": 1.18, "learning_rate": 3.0382821035226034e-05, "loss": 0.7238, "step": 106300 }, { "epoch": 1.18, "learning_rate": 3.038189830808752e-05, "loss": 0.6121, "step": 106305 }, { "epoch": 1.18, "learning_rate": 3.0380975580949006e-05, "loss": 0.6448, "step": 106310 }, { "epoch": 1.18, "learning_rate": 3.0380052853810498e-05, "loss": 0.6837, "step": 106315 }, { "epoch": 1.18, "learning_rate": 3.0379130126671986e-05, "loss": 0.6503, "step": 106320 }, { "epoch": 1.18, "learning_rate": 3.037820739953347e-05, "loss": 0.7131, "step": 106325 }, { "epoch": 1.18, "learning_rate": 3.0377284672394958e-05, "loss": 0.6523, "step": 106330 }, { "epoch": 1.18, "learning_rate": 3.037636194525645e-05, "loss": 0.6032, "step": 106335 }, { "epoch": 1.18, "learning_rate": 3.0375439218117934e-05, "loss": 0.7897, "step": 106340 }, { "epoch": 1.18, "learning_rate": 3.037451649097942e-05, "loss": 0.6689, "step": 106345 }, { "epoch": 1.18, "learning_rate": 3.0373593763840906e-05, "loss": 0.7015, "step": 106350 }, { "epoch": 1.18, "learning_rate": 3.0372671036702394e-05, "loss": 0.6897, "step": 106355 }, { "epoch": 1.18, "learning_rate": 3.0371748309563885e-05, "loss": 0.6947, "step": 106360 }, { "epoch": 1.18, "learning_rate": 3.0370825582425373e-05, "loss": 0.6561, "step": 106365 }, { "epoch": 1.18, "learning_rate": 3.0369902855286857e-05, "loss": 0.6744, "step": 106370 }, { "epoch": 1.18, "learning_rate": 3.0368980128148345e-05, "loss": 0.7229, "step": 106375 }, { "epoch": 1.18, "learning_rate": 3.0368057401009836e-05, "loss": 0.6688, "step": 106380 }, { "epoch": 1.18, "learning_rate": 3.036713467387132e-05, "loss": 0.6873, "step": 106385 }, { "epoch": 1.18, "learning_rate": 3.036621194673281e-05, "loss": 0.6847, "step": 106390 }, { "epoch": 1.18, "learning_rate": 3.0365289219594297e-05, "loss": 0.6145, "step": 106395 }, { "epoch": 1.18, "learning_rate": 3.0364366492455788e-05, "loss": 0.6453, "step": 106400 }, { "epoch": 1.18, "learning_rate": 3.0363443765317272e-05, "loss": 0.6565, "step": 106405 }, { "epoch": 1.18, "learning_rate": 3.036252103817876e-05, "loss": 0.7012, "step": 106410 }, { "epoch": 1.18, "learning_rate": 3.0361598311040245e-05, "loss": 0.6553, "step": 106415 }, { "epoch": 1.18, "learning_rate": 3.0360675583901732e-05, "loss": 0.7598, "step": 106420 }, { "epoch": 1.18, "learning_rate": 3.0359752856763224e-05, "loss": 0.7039, "step": 106425 }, { "epoch": 1.18, "learning_rate": 3.035883012962471e-05, "loss": 0.6174, "step": 106430 }, { "epoch": 1.18, "learning_rate": 3.0357907402486196e-05, "loss": 0.6087, "step": 106435 }, { "epoch": 1.18, "learning_rate": 3.0356984675347684e-05, "loss": 0.671, "step": 106440 }, { "epoch": 1.18, "learning_rate": 3.0356061948209175e-05, "loss": 0.6583, "step": 106445 }, { "epoch": 1.18, "learning_rate": 3.035513922107066e-05, "loss": 0.6522, "step": 106450 }, { "epoch": 1.18, "learning_rate": 3.0354216493932147e-05, "loss": 0.6922, "step": 106455 }, { "epoch": 1.18, "learning_rate": 3.0353293766793632e-05, "loss": 0.6719, "step": 106460 }, { "epoch": 1.18, "learning_rate": 3.0352371039655126e-05, "loss": 0.6893, "step": 106465 }, { "epoch": 1.18, "learning_rate": 3.035144831251661e-05, "loss": 0.7078, "step": 106470 }, { "epoch": 1.18, "learning_rate": 3.03505255853781e-05, "loss": 0.7027, "step": 106475 }, { "epoch": 1.18, "learning_rate": 3.0349602858239583e-05, "loss": 0.6567, "step": 106480 }, { "epoch": 1.18, "learning_rate": 3.0348680131101074e-05, "loss": 0.688, "step": 106485 }, { "epoch": 1.18, "learning_rate": 3.0347757403962562e-05, "loss": 0.737, "step": 106490 }, { "epoch": 1.18, "learning_rate": 3.034683467682405e-05, "loss": 0.6624, "step": 106495 }, { "epoch": 1.18, "learning_rate": 3.0345911949685535e-05, "loss": 0.6649, "step": 106500 }, { "epoch": 1.18, "learning_rate": 3.0344989222547022e-05, "loss": 0.6848, "step": 106505 }, { "epoch": 1.18, "learning_rate": 3.0344066495408514e-05, "loss": 0.6755, "step": 106510 }, { "epoch": 1.18, "learning_rate": 3.0343143768269998e-05, "loss": 0.6658, "step": 106515 }, { "epoch": 1.18, "learning_rate": 3.0342221041131486e-05, "loss": 0.6932, "step": 106520 }, { "epoch": 1.18, "learning_rate": 3.034129831399297e-05, "loss": 0.6275, "step": 106525 }, { "epoch": 1.18, "learning_rate": 3.0340375586854465e-05, "loss": 0.6971, "step": 106530 }, { "epoch": 1.18, "learning_rate": 3.033945285971595e-05, "loss": 0.6326, "step": 106535 }, { "epoch": 1.18, "learning_rate": 3.0338530132577437e-05, "loss": 0.6922, "step": 106540 }, { "epoch": 1.18, "learning_rate": 3.0337607405438922e-05, "loss": 0.6624, "step": 106545 }, { "epoch": 1.18, "learning_rate": 3.0336684678300413e-05, "loss": 0.6033, "step": 106550 }, { "epoch": 1.18, "learning_rate": 3.03357619511619e-05, "loss": 0.6639, "step": 106555 }, { "epoch": 1.18, "learning_rate": 3.0334839224023385e-05, "loss": 0.7049, "step": 106560 }, { "epoch": 1.18, "learning_rate": 3.0333916496884873e-05, "loss": 0.669, "step": 106565 }, { "epoch": 1.18, "learning_rate": 3.033299376974636e-05, "loss": 0.6764, "step": 106570 }, { "epoch": 1.18, "learning_rate": 3.0332071042607852e-05, "loss": 0.6877, "step": 106575 }, { "epoch": 1.18, "learning_rate": 3.0331148315469337e-05, "loss": 0.6812, "step": 106580 }, { "epoch": 1.18, "learning_rate": 3.0330225588330825e-05, "loss": 0.613, "step": 106585 }, { "epoch": 1.18, "learning_rate": 3.032930286119231e-05, "loss": 0.7116, "step": 106590 }, { "epoch": 1.18, "learning_rate": 3.0328380134053804e-05, "loss": 0.677, "step": 106595 }, { "epoch": 1.18, "learning_rate": 3.0327457406915288e-05, "loss": 0.6623, "step": 106600 }, { "epoch": 1.18, "learning_rate": 3.0326534679776776e-05, "loss": 0.6846, "step": 106605 }, { "epoch": 1.18, "learning_rate": 3.032561195263826e-05, "loss": 0.6903, "step": 106610 }, { "epoch": 1.18, "learning_rate": 3.0324689225499752e-05, "loss": 0.6646, "step": 106615 }, { "epoch": 1.18, "learning_rate": 3.032376649836124e-05, "loss": 0.6326, "step": 106620 }, { "epoch": 1.18, "learning_rate": 3.0322843771222724e-05, "loss": 0.6834, "step": 106625 }, { "epoch": 1.18, "learning_rate": 3.0321921044084212e-05, "loss": 0.6755, "step": 106630 }, { "epoch": 1.18, "learning_rate": 3.0320998316945703e-05, "loss": 0.6462, "step": 106635 }, { "epoch": 1.18, "learning_rate": 3.032007558980719e-05, "loss": 0.6288, "step": 106640 }, { "epoch": 1.18, "learning_rate": 3.0319152862668675e-05, "loss": 0.6291, "step": 106645 }, { "epoch": 1.18, "learning_rate": 3.0318230135530163e-05, "loss": 0.6752, "step": 106650 }, { "epoch": 1.18, "learning_rate": 3.0317307408391648e-05, "loss": 0.686, "step": 106655 }, { "epoch": 1.18, "learning_rate": 3.031638468125314e-05, "loss": 0.6719, "step": 106660 }, { "epoch": 1.18, "learning_rate": 3.0315461954114627e-05, "loss": 0.6499, "step": 106665 }, { "epoch": 1.18, "learning_rate": 3.0314539226976115e-05, "loss": 0.6421, "step": 106670 }, { "epoch": 1.18, "learning_rate": 3.03136164998376e-05, "loss": 0.6678, "step": 106675 }, { "epoch": 1.18, "learning_rate": 3.031269377269909e-05, "loss": 0.6587, "step": 106680 }, { "epoch": 1.18, "learning_rate": 3.0311771045560578e-05, "loss": 0.6373, "step": 106685 }, { "epoch": 1.18, "learning_rate": 3.0310848318422063e-05, "loss": 0.7079, "step": 106690 }, { "epoch": 1.18, "learning_rate": 3.030992559128355e-05, "loss": 0.6994, "step": 106695 }, { "epoch": 1.18, "learning_rate": 3.0309002864145042e-05, "loss": 0.6617, "step": 106700 }, { "epoch": 1.18, "learning_rate": 3.030808013700653e-05, "loss": 0.6794, "step": 106705 }, { "epoch": 1.18, "learning_rate": 3.0307157409868014e-05, "loss": 0.6761, "step": 106710 }, { "epoch": 1.18, "learning_rate": 3.0306234682729502e-05, "loss": 0.666, "step": 106715 }, { "epoch": 1.18, "learning_rate": 3.0305311955590986e-05, "loss": 0.6545, "step": 106720 }, { "epoch": 1.18, "learning_rate": 3.0304389228452478e-05, "loss": 0.6427, "step": 106725 }, { "epoch": 1.18, "learning_rate": 3.0303466501313965e-05, "loss": 0.6838, "step": 106730 }, { "epoch": 1.18, "learning_rate": 3.030254377417545e-05, "loss": 0.6316, "step": 106735 }, { "epoch": 1.18, "learning_rate": 3.0301621047036938e-05, "loss": 0.7249, "step": 106740 }, { "epoch": 1.18, "learning_rate": 3.030069831989843e-05, "loss": 0.6319, "step": 106745 }, { "epoch": 1.18, "learning_rate": 3.0299775592759917e-05, "loss": 0.6615, "step": 106750 }, { "epoch": 1.18, "learning_rate": 3.02988528656214e-05, "loss": 0.6967, "step": 106755 }, { "epoch": 1.18, "learning_rate": 3.029793013848289e-05, "loss": 0.667, "step": 106760 }, { "epoch": 1.18, "learning_rate": 3.029700741134438e-05, "loss": 0.6217, "step": 106765 }, { "epoch": 1.18, "learning_rate": 3.0296084684205865e-05, "loss": 0.697, "step": 106770 }, { "epoch": 1.18, "learning_rate": 3.0295161957067353e-05, "loss": 0.659, "step": 106775 }, { "epoch": 1.18, "learning_rate": 3.029423922992884e-05, "loss": 0.6295, "step": 106780 }, { "epoch": 1.18, "learning_rate": 3.0293316502790332e-05, "loss": 0.7089, "step": 106785 }, { "epoch": 1.18, "learning_rate": 3.0292393775651816e-05, "loss": 0.6541, "step": 106790 }, { "epoch": 1.18, "learning_rate": 3.0291471048513304e-05, "loss": 0.688, "step": 106795 }, { "epoch": 1.18, "learning_rate": 3.029054832137479e-05, "loss": 0.6026, "step": 106800 }, { "epoch": 1.18, "learning_rate": 3.0289625594236276e-05, "loss": 0.656, "step": 106805 }, { "epoch": 1.18, "learning_rate": 3.0288702867097768e-05, "loss": 0.6806, "step": 106810 }, { "epoch": 1.18, "learning_rate": 3.0287780139959255e-05, "loss": 0.6244, "step": 106815 }, { "epoch": 1.18, "learning_rate": 3.028685741282074e-05, "loss": 0.6874, "step": 106820 }, { "epoch": 1.18, "learning_rate": 3.0285934685682228e-05, "loss": 0.6324, "step": 106825 }, { "epoch": 1.18, "learning_rate": 3.028501195854372e-05, "loss": 0.6265, "step": 106830 }, { "epoch": 1.18, "learning_rate": 3.0284089231405203e-05, "loss": 0.6411, "step": 106835 }, { "epoch": 1.18, "learning_rate": 3.028316650426669e-05, "loss": 0.6737, "step": 106840 }, { "epoch": 1.18, "learning_rate": 3.028224377712818e-05, "loss": 0.6589, "step": 106845 }, { "epoch": 1.18, "learning_rate": 3.028132104998967e-05, "loss": 0.6586, "step": 106850 }, { "epoch": 1.18, "learning_rate": 3.0280398322851155e-05, "loss": 0.6929, "step": 106855 }, { "epoch": 1.18, "learning_rate": 3.0279475595712643e-05, "loss": 0.6907, "step": 106860 }, { "epoch": 1.18, "learning_rate": 3.0278552868574127e-05, "loss": 0.6779, "step": 106865 }, { "epoch": 1.18, "learning_rate": 3.0277630141435615e-05, "loss": 0.681, "step": 106870 }, { "epoch": 1.18, "learning_rate": 3.0276707414297106e-05, "loss": 0.6498, "step": 106875 }, { "epoch": 1.18, "learning_rate": 3.0275784687158594e-05, "loss": 0.5984, "step": 106880 }, { "epoch": 1.18, "learning_rate": 3.027486196002008e-05, "loss": 0.6392, "step": 106885 }, { "epoch": 1.18, "learning_rate": 3.0273939232881566e-05, "loss": 0.6733, "step": 106890 }, { "epoch": 1.18, "learning_rate": 3.0273016505743058e-05, "loss": 0.6947, "step": 106895 }, { "epoch": 1.18, "learning_rate": 3.0272093778604542e-05, "loss": 0.6568, "step": 106900 }, { "epoch": 1.18, "learning_rate": 3.027117105146603e-05, "loss": 0.6861, "step": 106905 }, { "epoch": 1.18, "learning_rate": 3.0270248324327514e-05, "loss": 0.6918, "step": 106910 }, { "epoch": 1.18, "learning_rate": 3.026932559718901e-05, "loss": 0.6705, "step": 106915 }, { "epoch": 1.18, "learning_rate": 3.0268402870050494e-05, "loss": 0.6752, "step": 106920 }, { "epoch": 1.18, "learning_rate": 3.026748014291198e-05, "loss": 0.6781, "step": 106925 }, { "epoch": 1.18, "learning_rate": 3.0266557415773466e-05, "loss": 0.6989, "step": 106930 }, { "epoch": 1.18, "learning_rate": 3.0265634688634957e-05, "loss": 0.6323, "step": 106935 }, { "epoch": 1.18, "learning_rate": 3.0264711961496445e-05, "loss": 0.6354, "step": 106940 }, { "epoch": 1.18, "learning_rate": 3.026378923435793e-05, "loss": 0.6574, "step": 106945 }, { "epoch": 1.18, "learning_rate": 3.0262866507219417e-05, "loss": 0.6446, "step": 106950 }, { "epoch": 1.18, "learning_rate": 3.0261943780080905e-05, "loss": 0.7346, "step": 106955 }, { "epoch": 1.18, "learning_rate": 3.0261021052942396e-05, "loss": 0.6621, "step": 106960 }, { "epoch": 1.18, "learning_rate": 3.026009832580388e-05, "loss": 0.6413, "step": 106965 }, { "epoch": 1.18, "learning_rate": 3.025917559866537e-05, "loss": 0.6888, "step": 106970 }, { "epoch": 1.18, "learning_rate": 3.0258252871526853e-05, "loss": 0.6828, "step": 106975 }, { "epoch": 1.18, "learning_rate": 3.0257330144388348e-05, "loss": 0.6619, "step": 106980 }, { "epoch": 1.18, "learning_rate": 3.0256407417249832e-05, "loss": 0.6776, "step": 106985 }, { "epoch": 1.18, "learning_rate": 3.025548469011132e-05, "loss": 0.6579, "step": 106990 }, { "epoch": 1.18, "learning_rate": 3.0254561962972804e-05, "loss": 0.6903, "step": 106995 }, { "epoch": 1.18, "learning_rate": 3.0253639235834296e-05, "loss": 0.6626, "step": 107000 }, { "epoch": 1.18, "eval_loss": 0.6215060949325562, "eval_runtime": 70.1106, "eval_samples_per_second": 28.526, "eval_steps_per_second": 14.263, "step": 107000 }, { "epoch": 1.18, "learning_rate": 3.0252716508695784e-05, "loss": 0.6219, "step": 107005 }, { "epoch": 1.18, "learning_rate": 3.0251793781557268e-05, "loss": 0.6509, "step": 107010 }, { "epoch": 1.18, "learning_rate": 3.0250871054418756e-05, "loss": 0.6758, "step": 107015 }, { "epoch": 1.19, "learning_rate": 3.0249948327280247e-05, "loss": 0.6607, "step": 107020 }, { "epoch": 1.19, "learning_rate": 3.0249025600141735e-05, "loss": 0.6237, "step": 107025 }, { "epoch": 1.19, "learning_rate": 3.024810287300322e-05, "loss": 0.69, "step": 107030 }, { "epoch": 1.19, "learning_rate": 3.0247180145864707e-05, "loss": 0.6742, "step": 107035 }, { "epoch": 1.19, "learning_rate": 3.0246257418726192e-05, "loss": 0.6747, "step": 107040 }, { "epoch": 1.19, "learning_rate": 3.0245334691587683e-05, "loss": 0.6436, "step": 107045 }, { "epoch": 1.19, "learning_rate": 3.024441196444917e-05, "loss": 0.597, "step": 107050 }, { "epoch": 1.19, "learning_rate": 3.024348923731066e-05, "loss": 0.6737, "step": 107055 }, { "epoch": 1.19, "learning_rate": 3.0242566510172143e-05, "loss": 0.7232, "step": 107060 }, { "epoch": 1.19, "learning_rate": 3.0241643783033634e-05, "loss": 0.6246, "step": 107065 }, { "epoch": 1.19, "learning_rate": 3.0240721055895122e-05, "loss": 0.631, "step": 107070 }, { "epoch": 1.19, "learning_rate": 3.0239798328756607e-05, "loss": 0.6375, "step": 107075 }, { "epoch": 1.19, "learning_rate": 3.0238875601618095e-05, "loss": 0.6729, "step": 107080 }, { "epoch": 1.19, "learning_rate": 3.0237952874479586e-05, "loss": 0.6918, "step": 107085 }, { "epoch": 1.19, "learning_rate": 3.0237030147341074e-05, "loss": 0.6901, "step": 107090 }, { "epoch": 1.19, "learning_rate": 3.0236107420202558e-05, "loss": 0.6439, "step": 107095 }, { "epoch": 1.19, "learning_rate": 3.0235184693064046e-05, "loss": 0.6569, "step": 107100 }, { "epoch": 1.19, "learning_rate": 3.023426196592553e-05, "loss": 0.6658, "step": 107105 }, { "epoch": 1.19, "learning_rate": 3.023333923878702e-05, "loss": 0.702, "step": 107110 }, { "epoch": 1.19, "learning_rate": 3.023241651164851e-05, "loss": 0.6855, "step": 107115 }, { "epoch": 1.19, "learning_rate": 3.0231493784509994e-05, "loss": 0.6932, "step": 107120 }, { "epoch": 1.19, "learning_rate": 3.0230571057371482e-05, "loss": 0.6649, "step": 107125 }, { "epoch": 1.19, "learning_rate": 3.0229648330232973e-05, "loss": 0.6977, "step": 107130 }, { "epoch": 1.19, "learning_rate": 3.022872560309446e-05, "loss": 0.5907, "step": 107135 }, { "epoch": 1.19, "learning_rate": 3.0227802875955945e-05, "loss": 0.6863, "step": 107140 }, { "epoch": 1.19, "learning_rate": 3.0226880148817433e-05, "loss": 0.6099, "step": 107145 }, { "epoch": 1.19, "learning_rate": 3.0225957421678924e-05, "loss": 0.6529, "step": 107150 }, { "epoch": 1.19, "learning_rate": 3.0225034694540412e-05, "loss": 0.7005, "step": 107155 }, { "epoch": 1.19, "learning_rate": 3.0224111967401897e-05, "loss": 0.6422, "step": 107160 }, { "epoch": 1.19, "learning_rate": 3.0223189240263385e-05, "loss": 0.6647, "step": 107165 }, { "epoch": 1.19, "learning_rate": 3.0222266513124876e-05, "loss": 0.6872, "step": 107170 }, { "epoch": 1.19, "learning_rate": 3.022134378598636e-05, "loss": 0.7155, "step": 107175 }, { "epoch": 1.19, "learning_rate": 3.0220421058847848e-05, "loss": 0.6313, "step": 107180 }, { "epoch": 1.19, "learning_rate": 3.0219498331709333e-05, "loss": 0.6716, "step": 107185 }, { "epoch": 1.19, "learning_rate": 3.021857560457082e-05, "loss": 0.6491, "step": 107190 }, { "epoch": 1.19, "learning_rate": 3.021765287743231e-05, "loss": 0.6317, "step": 107195 }, { "epoch": 1.19, "learning_rate": 3.02167301502938e-05, "loss": 0.6438, "step": 107200 }, { "epoch": 1.19, "learning_rate": 3.0215807423155284e-05, "loss": 0.6553, "step": 107205 }, { "epoch": 1.19, "learning_rate": 3.0214884696016772e-05, "loss": 0.6004, "step": 107210 }, { "epoch": 1.19, "learning_rate": 3.0213961968878263e-05, "loss": 0.6649, "step": 107215 }, { "epoch": 1.19, "learning_rate": 3.0213039241739748e-05, "loss": 0.6539, "step": 107220 }, { "epoch": 1.19, "learning_rate": 3.0212116514601235e-05, "loss": 0.6576, "step": 107225 }, { "epoch": 1.19, "learning_rate": 3.0211193787462723e-05, "loss": 0.7014, "step": 107230 }, { "epoch": 1.19, "learning_rate": 3.0210271060324214e-05, "loss": 0.6709, "step": 107235 }, { "epoch": 1.19, "learning_rate": 3.02093483331857e-05, "loss": 0.6944, "step": 107240 }, { "epoch": 1.19, "learning_rate": 3.0208425606047187e-05, "loss": 0.6572, "step": 107245 }, { "epoch": 1.19, "learning_rate": 3.020750287890867e-05, "loss": 0.6625, "step": 107250 }, { "epoch": 1.19, "learning_rate": 3.020658015177016e-05, "loss": 0.62, "step": 107255 }, { "epoch": 1.19, "learning_rate": 3.020565742463165e-05, "loss": 0.6311, "step": 107260 }, { "epoch": 1.19, "learning_rate": 3.0204734697493138e-05, "loss": 0.6533, "step": 107265 }, { "epoch": 1.19, "learning_rate": 3.0203811970354623e-05, "loss": 0.6869, "step": 107270 }, { "epoch": 1.19, "learning_rate": 3.020288924321611e-05, "loss": 0.6897, "step": 107275 }, { "epoch": 1.19, "learning_rate": 3.0201966516077602e-05, "loss": 0.6296, "step": 107280 }, { "epoch": 1.19, "learning_rate": 3.0201043788939086e-05, "loss": 0.6428, "step": 107285 }, { "epoch": 1.19, "learning_rate": 3.0200121061800574e-05, "loss": 0.6898, "step": 107290 }, { "epoch": 1.19, "learning_rate": 3.019919833466206e-05, "loss": 0.6361, "step": 107295 }, { "epoch": 1.19, "learning_rate": 3.0198275607523553e-05, "loss": 0.6312, "step": 107300 }, { "epoch": 1.19, "learning_rate": 3.0197352880385038e-05, "loss": 0.6874, "step": 107305 }, { "epoch": 1.19, "learning_rate": 3.0196430153246525e-05, "loss": 0.6521, "step": 107310 }, { "epoch": 1.19, "learning_rate": 3.019550742610801e-05, "loss": 0.6581, "step": 107315 }, { "epoch": 1.19, "learning_rate": 3.01945846989695e-05, "loss": 0.6803, "step": 107320 }, { "epoch": 1.19, "learning_rate": 3.019366197183099e-05, "loss": 0.6655, "step": 107325 }, { "epoch": 1.19, "learning_rate": 3.0192739244692473e-05, "loss": 0.6731, "step": 107330 }, { "epoch": 1.19, "learning_rate": 3.019181651755396e-05, "loss": 0.66, "step": 107335 }, { "epoch": 1.19, "learning_rate": 3.019089379041545e-05, "loss": 0.6792, "step": 107340 }, { "epoch": 1.19, "learning_rate": 3.018997106327694e-05, "loss": 0.704, "step": 107345 }, { "epoch": 1.19, "learning_rate": 3.0189048336138425e-05, "loss": 0.6821, "step": 107350 }, { "epoch": 1.19, "learning_rate": 3.0188125608999913e-05, "loss": 0.6249, "step": 107355 }, { "epoch": 1.19, "learning_rate": 3.0187202881861397e-05, "loss": 0.6512, "step": 107360 }, { "epoch": 1.19, "learning_rate": 3.0186280154722892e-05, "loss": 0.6976, "step": 107365 }, { "epoch": 1.19, "learning_rate": 3.0185357427584376e-05, "loss": 0.6909, "step": 107370 }, { "epoch": 1.19, "learning_rate": 3.0184434700445864e-05, "loss": 0.721, "step": 107375 }, { "epoch": 1.19, "learning_rate": 3.018351197330735e-05, "loss": 0.6603, "step": 107380 }, { "epoch": 1.19, "learning_rate": 3.018258924616884e-05, "loss": 0.7175, "step": 107385 }, { "epoch": 1.19, "learning_rate": 3.0181666519030328e-05, "loss": 0.6401, "step": 107390 }, { "epoch": 1.19, "learning_rate": 3.0180743791891812e-05, "loss": 0.6701, "step": 107395 }, { "epoch": 1.19, "learning_rate": 3.01798210647533e-05, "loss": 0.72, "step": 107400 }, { "epoch": 1.19, "learning_rate": 3.0178898337614784e-05, "loss": 0.72, "step": 107405 }, { "epoch": 1.19, "learning_rate": 3.017797561047628e-05, "loss": 0.6624, "step": 107410 }, { "epoch": 1.19, "learning_rate": 3.0177052883337763e-05, "loss": 0.6499, "step": 107415 }, { "epoch": 1.19, "learning_rate": 3.017613015619925e-05, "loss": 0.6563, "step": 107420 }, { "epoch": 1.19, "learning_rate": 3.0175207429060736e-05, "loss": 0.663, "step": 107425 }, { "epoch": 1.19, "learning_rate": 3.0174284701922227e-05, "loss": 0.6402, "step": 107430 }, { "epoch": 1.19, "learning_rate": 3.0173361974783715e-05, "loss": 0.7039, "step": 107435 }, { "epoch": 1.19, "learning_rate": 3.0172439247645203e-05, "loss": 0.65, "step": 107440 }, { "epoch": 1.19, "learning_rate": 3.0171516520506687e-05, "loss": 0.6213, "step": 107445 }, { "epoch": 1.19, "learning_rate": 3.017059379336818e-05, "loss": 0.666, "step": 107450 }, { "epoch": 1.19, "learning_rate": 3.0169671066229666e-05, "loss": 0.678, "step": 107455 }, { "epoch": 1.19, "learning_rate": 3.016874833909115e-05, "loss": 0.7385, "step": 107460 }, { "epoch": 1.19, "learning_rate": 3.016782561195264e-05, "loss": 0.6439, "step": 107465 }, { "epoch": 1.19, "learning_rate": 3.016690288481413e-05, "loss": 0.648, "step": 107470 }, { "epoch": 1.19, "learning_rate": 3.0165980157675618e-05, "loss": 0.6369, "step": 107475 }, { "epoch": 1.19, "learning_rate": 3.0165057430537102e-05, "loss": 0.6324, "step": 107480 }, { "epoch": 1.19, "learning_rate": 3.016413470339859e-05, "loss": 0.6196, "step": 107485 }, { "epoch": 1.19, "learning_rate": 3.0163211976260074e-05, "loss": 0.7604, "step": 107490 }, { "epoch": 1.19, "learning_rate": 3.0162289249121566e-05, "loss": 0.6803, "step": 107495 }, { "epoch": 1.19, "learning_rate": 3.0161366521983053e-05, "loss": 0.6585, "step": 107500 }, { "epoch": 1.19, "learning_rate": 3.0160443794844538e-05, "loss": 0.6991, "step": 107505 }, { "epoch": 1.19, "learning_rate": 3.0159521067706026e-05, "loss": 0.616, "step": 107510 }, { "epoch": 1.19, "learning_rate": 3.0158598340567517e-05, "loss": 0.7285, "step": 107515 }, { "epoch": 1.19, "learning_rate": 3.0157675613429005e-05, "loss": 0.6917, "step": 107520 }, { "epoch": 1.19, "learning_rate": 3.015675288629049e-05, "loss": 0.6525, "step": 107525 }, { "epoch": 1.19, "learning_rate": 3.0155830159151977e-05, "loss": 0.6612, "step": 107530 }, { "epoch": 1.19, "learning_rate": 3.015490743201347e-05, "loss": 0.6135, "step": 107535 }, { "epoch": 1.19, "learning_rate": 3.0153984704874956e-05, "loss": 0.6782, "step": 107540 }, { "epoch": 1.19, "learning_rate": 3.015306197773644e-05, "loss": 0.6323, "step": 107545 }, { "epoch": 1.19, "learning_rate": 3.015213925059793e-05, "loss": 0.6839, "step": 107550 }, { "epoch": 1.19, "learning_rate": 3.0151216523459413e-05, "loss": 0.7071, "step": 107555 }, { "epoch": 1.19, "learning_rate": 3.0150293796320904e-05, "loss": 0.6272, "step": 107560 }, { "epoch": 1.19, "learning_rate": 3.0149371069182392e-05, "loss": 0.614, "step": 107565 }, { "epoch": 1.19, "learning_rate": 3.0148448342043877e-05, "loss": 0.6493, "step": 107570 }, { "epoch": 1.19, "learning_rate": 3.0147525614905364e-05, "loss": 0.6639, "step": 107575 }, { "epoch": 1.19, "learning_rate": 3.0146602887766856e-05, "loss": 0.6386, "step": 107580 }, { "epoch": 1.19, "learning_rate": 3.0145680160628344e-05, "loss": 0.6772, "step": 107585 }, { "epoch": 1.19, "learning_rate": 3.0144757433489828e-05, "loss": 0.6914, "step": 107590 }, { "epoch": 1.19, "learning_rate": 3.0143834706351316e-05, "loss": 0.6909, "step": 107595 }, { "epoch": 1.19, "learning_rate": 3.0142911979212807e-05, "loss": 0.6779, "step": 107600 }, { "epoch": 1.19, "learning_rate": 3.014198925207429e-05, "loss": 0.6133, "step": 107605 }, { "epoch": 1.19, "learning_rate": 3.014106652493578e-05, "loss": 0.6521, "step": 107610 }, { "epoch": 1.19, "learning_rate": 3.0140143797797267e-05, "loss": 0.6378, "step": 107615 }, { "epoch": 1.19, "learning_rate": 3.013922107065876e-05, "loss": 0.6717, "step": 107620 }, { "epoch": 1.19, "learning_rate": 3.0138298343520243e-05, "loss": 0.699, "step": 107625 }, { "epoch": 1.19, "learning_rate": 3.013737561638173e-05, "loss": 0.6676, "step": 107630 }, { "epoch": 1.19, "learning_rate": 3.0136452889243215e-05, "loss": 0.6754, "step": 107635 }, { "epoch": 1.19, "learning_rate": 3.0135530162104703e-05, "loss": 0.6718, "step": 107640 }, { "epoch": 1.19, "learning_rate": 3.0134607434966194e-05, "loss": 0.7015, "step": 107645 }, { "epoch": 1.19, "learning_rate": 3.0133684707827682e-05, "loss": 0.6602, "step": 107650 }, { "epoch": 1.19, "learning_rate": 3.0132761980689167e-05, "loss": 0.6671, "step": 107655 }, { "epoch": 1.19, "learning_rate": 3.0131839253550654e-05, "loss": 0.6421, "step": 107660 }, { "epoch": 1.19, "learning_rate": 3.0130916526412146e-05, "loss": 0.6485, "step": 107665 }, { "epoch": 1.19, "learning_rate": 3.012999379927363e-05, "loss": 0.7017, "step": 107670 }, { "epoch": 1.19, "learning_rate": 3.0129071072135118e-05, "loss": 0.6723, "step": 107675 }, { "epoch": 1.19, "learning_rate": 3.0128148344996602e-05, "loss": 0.6475, "step": 107680 }, { "epoch": 1.19, "learning_rate": 3.0127225617858097e-05, "loss": 0.6941, "step": 107685 }, { "epoch": 1.19, "learning_rate": 3.012630289071958e-05, "loss": 0.6762, "step": 107690 }, { "epoch": 1.19, "learning_rate": 3.012538016358107e-05, "loss": 0.641, "step": 107695 }, { "epoch": 1.19, "learning_rate": 3.0124457436442554e-05, "loss": 0.6296, "step": 107700 }, { "epoch": 1.19, "learning_rate": 3.0123534709304042e-05, "loss": 0.6425, "step": 107705 }, { "epoch": 1.19, "learning_rate": 3.0122611982165533e-05, "loss": 0.6013, "step": 107710 }, { "epoch": 1.19, "learning_rate": 3.0121689255027017e-05, "loss": 0.6613, "step": 107715 }, { "epoch": 1.19, "learning_rate": 3.0120766527888505e-05, "loss": 0.6812, "step": 107720 }, { "epoch": 1.19, "learning_rate": 3.0119843800749993e-05, "loss": 0.6375, "step": 107725 }, { "epoch": 1.19, "learning_rate": 3.0118921073611484e-05, "loss": 0.6489, "step": 107730 }, { "epoch": 1.19, "learning_rate": 3.011799834647297e-05, "loss": 0.7142, "step": 107735 }, { "epoch": 1.19, "learning_rate": 3.0117075619334457e-05, "loss": 0.6935, "step": 107740 }, { "epoch": 1.19, "learning_rate": 3.011615289219594e-05, "loss": 0.685, "step": 107745 }, { "epoch": 1.19, "learning_rate": 3.0115230165057436e-05, "loss": 0.6773, "step": 107750 }, { "epoch": 1.19, "learning_rate": 3.011430743791892e-05, "loss": 0.7133, "step": 107755 }, { "epoch": 1.19, "learning_rate": 3.0113384710780408e-05, "loss": 0.6946, "step": 107760 }, { "epoch": 1.19, "learning_rate": 3.0112461983641893e-05, "loss": 0.6796, "step": 107765 }, { "epoch": 1.19, "learning_rate": 3.0111539256503384e-05, "loss": 0.653, "step": 107770 }, { "epoch": 1.19, "learning_rate": 3.011061652936487e-05, "loss": 0.6434, "step": 107775 }, { "epoch": 1.19, "learning_rate": 3.0109693802226356e-05, "loss": 0.6138, "step": 107780 }, { "epoch": 1.19, "learning_rate": 3.0108771075087844e-05, "loss": 0.6973, "step": 107785 }, { "epoch": 1.19, "learning_rate": 3.010784834794933e-05, "loss": 0.6226, "step": 107790 }, { "epoch": 1.19, "learning_rate": 3.0106925620810823e-05, "loss": 0.6761, "step": 107795 }, { "epoch": 1.19, "learning_rate": 3.0106002893672307e-05, "loss": 0.6256, "step": 107800 }, { "epoch": 1.19, "learning_rate": 3.0105080166533795e-05, "loss": 0.6574, "step": 107805 }, { "epoch": 1.19, "learning_rate": 3.010415743939528e-05, "loss": 0.6182, "step": 107810 }, { "epoch": 1.19, "learning_rate": 3.010323471225677e-05, "loss": 0.6408, "step": 107815 }, { "epoch": 1.19, "learning_rate": 3.010231198511826e-05, "loss": 0.7057, "step": 107820 }, { "epoch": 1.19, "learning_rate": 3.0101389257979747e-05, "loss": 0.6484, "step": 107825 }, { "epoch": 1.19, "learning_rate": 3.010046653084123e-05, "loss": 0.7028, "step": 107830 }, { "epoch": 1.19, "learning_rate": 3.0099543803702722e-05, "loss": 0.7385, "step": 107835 }, { "epoch": 1.19, "learning_rate": 3.009862107656421e-05, "loss": 0.6843, "step": 107840 }, { "epoch": 1.19, "learning_rate": 3.0097698349425695e-05, "loss": 0.6338, "step": 107845 }, { "epoch": 1.19, "learning_rate": 3.0096775622287183e-05, "loss": 0.6829, "step": 107850 }, { "epoch": 1.19, "learning_rate": 3.0095852895148674e-05, "loss": 0.6786, "step": 107855 }, { "epoch": 1.19, "learning_rate": 3.009493016801016e-05, "loss": 0.642, "step": 107860 }, { "epoch": 1.19, "learning_rate": 3.0094007440871646e-05, "loss": 0.6829, "step": 107865 }, { "epoch": 1.19, "learning_rate": 3.0093084713733134e-05, "loss": 0.6922, "step": 107870 }, { "epoch": 1.19, "learning_rate": 3.009216198659462e-05, "loss": 0.6406, "step": 107875 }, { "epoch": 1.19, "learning_rate": 3.009123925945611e-05, "loss": 0.6549, "step": 107880 }, { "epoch": 1.19, "learning_rate": 3.0090316532317598e-05, "loss": 0.6436, "step": 107885 }, { "epoch": 1.19, "learning_rate": 3.0089393805179082e-05, "loss": 0.6534, "step": 107890 }, { "epoch": 1.19, "learning_rate": 3.008847107804057e-05, "loss": 0.6814, "step": 107895 }, { "epoch": 1.19, "learning_rate": 3.008754835090206e-05, "loss": 0.6313, "step": 107900 }, { "epoch": 1.19, "learning_rate": 3.008662562376355e-05, "loss": 0.6165, "step": 107905 }, { "epoch": 1.19, "learning_rate": 3.0085702896625033e-05, "loss": 0.7274, "step": 107910 }, { "epoch": 1.19, "learning_rate": 3.008478016948652e-05, "loss": 0.652, "step": 107915 }, { "epoch": 1.19, "learning_rate": 3.0083857442348012e-05, "loss": 0.6389, "step": 107920 }, { "epoch": 1.2, "learning_rate": 3.00829347152095e-05, "loss": 0.6185, "step": 107925 }, { "epoch": 1.2, "learning_rate": 3.0082011988070985e-05, "loss": 0.6644, "step": 107930 }, { "epoch": 1.2, "learning_rate": 3.0081089260932473e-05, "loss": 0.6612, "step": 107935 }, { "epoch": 1.2, "learning_rate": 3.0080166533793957e-05, "loss": 0.66, "step": 107940 }, { "epoch": 1.2, "learning_rate": 3.0079243806655448e-05, "loss": 0.6801, "step": 107945 }, { "epoch": 1.2, "learning_rate": 3.0078321079516936e-05, "loss": 0.6957, "step": 107950 }, { "epoch": 1.2, "learning_rate": 3.007739835237842e-05, "loss": 0.6159, "step": 107955 }, { "epoch": 1.2, "learning_rate": 3.007647562523991e-05, "loss": 0.6046, "step": 107960 }, { "epoch": 1.2, "learning_rate": 3.00755528981014e-05, "loss": 0.7096, "step": 107965 }, { "epoch": 1.2, "learning_rate": 3.0074630170962888e-05, "loss": 0.6439, "step": 107970 }, { "epoch": 1.2, "learning_rate": 3.0073707443824372e-05, "loss": 0.6343, "step": 107975 }, { "epoch": 1.2, "learning_rate": 3.007278471668586e-05, "loss": 0.6186, "step": 107980 }, { "epoch": 1.2, "learning_rate": 3.007186198954735e-05, "loss": 0.7141, "step": 107985 }, { "epoch": 1.2, "learning_rate": 3.0070939262408836e-05, "loss": 0.6973, "step": 107990 }, { "epoch": 1.2, "learning_rate": 3.0070016535270323e-05, "loss": 0.701, "step": 107995 }, { "epoch": 1.2, "learning_rate": 3.006909380813181e-05, "loss": 0.6976, "step": 108000 }, { "epoch": 1.2, "eval_loss": 0.6479074358940125, "eval_runtime": 70.0077, "eval_samples_per_second": 28.568, "eval_steps_per_second": 14.284, "step": 108000 }, { "epoch": 1.2, "learning_rate": 3.0068171080993302e-05, "loss": 0.6668, "step": 108005 }, { "epoch": 1.2, "learning_rate": 3.0067248353854787e-05, "loss": 0.6087, "step": 108010 }, { "epoch": 1.2, "learning_rate": 3.0066325626716275e-05, "loss": 0.6503, "step": 108015 }, { "epoch": 1.2, "learning_rate": 3.006540289957776e-05, "loss": 0.6698, "step": 108020 }, { "epoch": 1.2, "learning_rate": 3.0064480172439247e-05, "loss": 0.6997, "step": 108025 }, { "epoch": 1.2, "learning_rate": 3.006355744530074e-05, "loss": 0.6552, "step": 108030 }, { "epoch": 1.2, "learning_rate": 3.0062634718162226e-05, "loss": 0.6372, "step": 108035 }, { "epoch": 1.2, "learning_rate": 3.006171199102371e-05, "loss": 0.7469, "step": 108040 }, { "epoch": 1.2, "learning_rate": 3.00607892638852e-05, "loss": 0.7052, "step": 108045 }, { "epoch": 1.2, "learning_rate": 3.005986653674669e-05, "loss": 0.6881, "step": 108050 }, { "epoch": 1.2, "learning_rate": 3.0058943809608174e-05, "loss": 0.6911, "step": 108055 }, { "epoch": 1.2, "learning_rate": 3.0058021082469662e-05, "loss": 0.6602, "step": 108060 }, { "epoch": 1.2, "learning_rate": 3.0057098355331147e-05, "loss": 0.6223, "step": 108065 }, { "epoch": 1.2, "learning_rate": 3.005617562819264e-05, "loss": 0.6386, "step": 108070 }, { "epoch": 1.2, "learning_rate": 3.0055252901054126e-05, "loss": 0.6461, "step": 108075 }, { "epoch": 1.2, "learning_rate": 3.0054330173915613e-05, "loss": 0.6586, "step": 108080 }, { "epoch": 1.2, "learning_rate": 3.0053407446777098e-05, "loss": 0.6398, "step": 108085 }, { "epoch": 1.2, "learning_rate": 3.0052484719638586e-05, "loss": 0.6931, "step": 108090 }, { "epoch": 1.2, "learning_rate": 3.0051561992500077e-05, "loss": 0.6593, "step": 108095 }, { "epoch": 1.2, "learning_rate": 3.0050639265361565e-05, "loss": 0.6632, "step": 108100 }, { "epoch": 1.2, "learning_rate": 3.004971653822305e-05, "loss": 0.5982, "step": 108105 }, { "epoch": 1.2, "learning_rate": 3.0048793811084537e-05, "loss": 0.6403, "step": 108110 }, { "epoch": 1.2, "learning_rate": 3.004787108394603e-05, "loss": 0.695, "step": 108115 }, { "epoch": 1.2, "learning_rate": 3.0046948356807513e-05, "loss": 0.6463, "step": 108120 }, { "epoch": 1.2, "learning_rate": 3.0046025629669e-05, "loss": 0.6676, "step": 108125 }, { "epoch": 1.2, "learning_rate": 3.0045102902530485e-05, "loss": 0.6582, "step": 108130 }, { "epoch": 1.2, "learning_rate": 3.004418017539198e-05, "loss": 0.6326, "step": 108135 }, { "epoch": 1.2, "learning_rate": 3.0043257448253464e-05, "loss": 0.6143, "step": 108140 }, { "epoch": 1.2, "learning_rate": 3.0042334721114952e-05, "loss": 0.6501, "step": 108145 }, { "epoch": 1.2, "learning_rate": 3.0041411993976437e-05, "loss": 0.7173, "step": 108150 }, { "epoch": 1.2, "learning_rate": 3.0040489266837928e-05, "loss": 0.6049, "step": 108155 }, { "epoch": 1.2, "learning_rate": 3.0039566539699416e-05, "loss": 0.6785, "step": 108160 }, { "epoch": 1.2, "learning_rate": 3.00386438125609e-05, "loss": 0.6575, "step": 108165 }, { "epoch": 1.2, "learning_rate": 3.0037721085422388e-05, "loss": 0.6178, "step": 108170 }, { "epoch": 1.2, "learning_rate": 3.0036798358283876e-05, "loss": 0.7062, "step": 108175 }, { "epoch": 1.2, "learning_rate": 3.0035875631145367e-05, "loss": 0.7175, "step": 108180 }, { "epoch": 1.2, "learning_rate": 3.003495290400685e-05, "loss": 0.6509, "step": 108185 }, { "epoch": 1.2, "learning_rate": 3.003403017686834e-05, "loss": 0.6915, "step": 108190 }, { "epoch": 1.2, "learning_rate": 3.0033107449729824e-05, "loss": 0.6356, "step": 108195 }, { "epoch": 1.2, "learning_rate": 3.0032184722591315e-05, "loss": 0.6823, "step": 108200 }, { "epoch": 1.2, "learning_rate": 3.0031261995452803e-05, "loss": 0.6801, "step": 108205 }, { "epoch": 1.2, "learning_rate": 3.003033926831429e-05, "loss": 0.6582, "step": 108210 }, { "epoch": 1.2, "learning_rate": 3.0029416541175775e-05, "loss": 0.7194, "step": 108215 }, { "epoch": 1.2, "learning_rate": 3.0028493814037266e-05, "loss": 0.6513, "step": 108220 }, { "epoch": 1.2, "learning_rate": 3.0027571086898754e-05, "loss": 0.6583, "step": 108225 }, { "epoch": 1.2, "learning_rate": 3.002664835976024e-05, "loss": 0.6715, "step": 108230 }, { "epoch": 1.2, "learning_rate": 3.0025725632621727e-05, "loss": 0.6893, "step": 108235 }, { "epoch": 1.2, "learning_rate": 3.002480290548321e-05, "loss": 0.6894, "step": 108240 }, { "epoch": 1.2, "learning_rate": 3.0023880178344706e-05, "loss": 0.6405, "step": 108245 }, { "epoch": 1.2, "learning_rate": 3.002295745120619e-05, "loss": 0.6677, "step": 108250 }, { "epoch": 1.2, "learning_rate": 3.0022034724067678e-05, "loss": 0.5966, "step": 108255 }, { "epoch": 1.2, "learning_rate": 3.0021111996929162e-05, "loss": 0.662, "step": 108260 }, { "epoch": 1.2, "learning_rate": 3.0020189269790654e-05, "loss": 0.7395, "step": 108265 }, { "epoch": 1.2, "learning_rate": 3.001926654265214e-05, "loss": 0.6465, "step": 108270 }, { "epoch": 1.2, "learning_rate": 3.0018343815513626e-05, "loss": 0.6482, "step": 108275 }, { "epoch": 1.2, "learning_rate": 3.0017421088375114e-05, "loss": 0.6653, "step": 108280 }, { "epoch": 1.2, "learning_rate": 3.0016498361236605e-05, "loss": 0.6881, "step": 108285 }, { "epoch": 1.2, "learning_rate": 3.0015575634098093e-05, "loss": 0.6862, "step": 108290 }, { "epoch": 1.2, "learning_rate": 3.0014652906959577e-05, "loss": 0.641, "step": 108295 }, { "epoch": 1.2, "learning_rate": 3.0013730179821065e-05, "loss": 0.6554, "step": 108300 }, { "epoch": 1.2, "learning_rate": 3.0012807452682556e-05, "loss": 0.5944, "step": 108305 }, { "epoch": 1.2, "learning_rate": 3.0011884725544044e-05, "loss": 0.7146, "step": 108310 }, { "epoch": 1.2, "learning_rate": 3.001096199840553e-05, "loss": 0.6193, "step": 108315 }, { "epoch": 1.2, "learning_rate": 3.0010039271267017e-05, "loss": 0.714, "step": 108320 }, { "epoch": 1.2, "learning_rate": 3.00091165441285e-05, "loss": 0.6251, "step": 108325 }, { "epoch": 1.2, "learning_rate": 3.0008193816989992e-05, "loss": 0.6828, "step": 108330 }, { "epoch": 1.2, "learning_rate": 3.000727108985148e-05, "loss": 0.6679, "step": 108335 }, { "epoch": 1.2, "learning_rate": 3.0006348362712965e-05, "loss": 0.621, "step": 108340 }, { "epoch": 1.2, "learning_rate": 3.0005425635574452e-05, "loss": 0.6873, "step": 108345 }, { "epoch": 1.2, "learning_rate": 3.0004502908435944e-05, "loss": 0.6785, "step": 108350 }, { "epoch": 1.2, "learning_rate": 3.000358018129743e-05, "loss": 0.684, "step": 108355 }, { "epoch": 1.2, "learning_rate": 3.0002657454158916e-05, "loss": 0.6868, "step": 108360 }, { "epoch": 1.2, "learning_rate": 3.0001734727020404e-05, "loss": 0.6752, "step": 108365 }, { "epoch": 1.2, "learning_rate": 3.0000811999881895e-05, "loss": 0.6871, "step": 108370 }, { "epoch": 1.2, "learning_rate": 2.999988927274338e-05, "loss": 0.6851, "step": 108375 }, { "epoch": 1.2, "learning_rate": 2.9998966545604867e-05, "loss": 0.6323, "step": 108380 }, { "epoch": 1.2, "learning_rate": 2.9998043818466355e-05, "loss": 0.6655, "step": 108385 }, { "epoch": 1.2, "learning_rate": 2.999712109132784e-05, "loss": 0.6512, "step": 108390 }, { "epoch": 1.2, "learning_rate": 2.999619836418933e-05, "loss": 0.6672, "step": 108395 }, { "epoch": 1.2, "learning_rate": 2.999527563705082e-05, "loss": 0.6258, "step": 108400 }, { "epoch": 1.2, "learning_rate": 2.9994352909912303e-05, "loss": 0.6953, "step": 108405 }, { "epoch": 1.2, "learning_rate": 2.999343018277379e-05, "loss": 0.6241, "step": 108410 }, { "epoch": 1.2, "learning_rate": 2.9992507455635282e-05, "loss": 0.72, "step": 108415 }, { "epoch": 1.2, "learning_rate": 2.999158472849677e-05, "loss": 0.6337, "step": 108420 }, { "epoch": 1.2, "learning_rate": 2.9990662001358255e-05, "loss": 0.6782, "step": 108425 }, { "epoch": 1.2, "learning_rate": 2.9989739274219743e-05, "loss": 0.7269, "step": 108430 }, { "epoch": 1.2, "learning_rate": 2.9988816547081234e-05, "loss": 0.669, "step": 108435 }, { "epoch": 1.2, "learning_rate": 2.9987893819942718e-05, "loss": 0.6856, "step": 108440 }, { "epoch": 1.2, "learning_rate": 2.9986971092804206e-05, "loss": 0.6448, "step": 108445 }, { "epoch": 1.2, "learning_rate": 2.998604836566569e-05, "loss": 0.6334, "step": 108450 }, { "epoch": 1.2, "learning_rate": 2.9985125638527185e-05, "loss": 0.6024, "step": 108455 }, { "epoch": 1.2, "learning_rate": 2.998420291138867e-05, "loss": 0.6421, "step": 108460 }, { "epoch": 1.2, "learning_rate": 2.9983280184250157e-05, "loss": 0.6645, "step": 108465 }, { "epoch": 1.2, "learning_rate": 2.9982357457111642e-05, "loss": 0.705, "step": 108470 }, { "epoch": 1.2, "learning_rate": 2.998143472997313e-05, "loss": 0.6311, "step": 108475 }, { "epoch": 1.2, "learning_rate": 2.998051200283462e-05, "loss": 0.6999, "step": 108480 }, { "epoch": 1.2, "learning_rate": 2.997958927569611e-05, "loss": 0.6979, "step": 108485 }, { "epoch": 1.2, "learning_rate": 2.9978666548557593e-05, "loss": 0.689, "step": 108490 }, { "epoch": 1.2, "learning_rate": 2.997774382141908e-05, "loss": 0.6393, "step": 108495 }, { "epoch": 1.2, "learning_rate": 2.9976821094280572e-05, "loss": 0.6904, "step": 108500 }, { "epoch": 1.2, "learning_rate": 2.9975898367142057e-05, "loss": 0.6896, "step": 108505 }, { "epoch": 1.2, "learning_rate": 2.9974975640003545e-05, "loss": 0.665, "step": 108510 }, { "epoch": 1.2, "learning_rate": 2.997405291286503e-05, "loss": 0.7116, "step": 108515 }, { "epoch": 1.2, "learning_rate": 2.9973130185726524e-05, "loss": 0.6288, "step": 108520 }, { "epoch": 1.2, "learning_rate": 2.9972207458588008e-05, "loss": 0.686, "step": 108525 }, { "epoch": 1.2, "learning_rate": 2.9971284731449496e-05, "loss": 0.6888, "step": 108530 }, { "epoch": 1.2, "learning_rate": 2.997036200431098e-05, "loss": 0.6205, "step": 108535 }, { "epoch": 1.2, "learning_rate": 2.996943927717247e-05, "loss": 0.6845, "step": 108540 }, { "epoch": 1.2, "learning_rate": 2.996851655003396e-05, "loss": 0.6741, "step": 108545 }, { "epoch": 1.2, "learning_rate": 2.9967593822895444e-05, "loss": 0.6508, "step": 108550 }, { "epoch": 1.2, "learning_rate": 2.9966671095756932e-05, "loss": 0.6368, "step": 108555 }, { "epoch": 1.2, "learning_rate": 2.996574836861842e-05, "loss": 0.663, "step": 108560 }, { "epoch": 1.2, "learning_rate": 2.996482564147991e-05, "loss": 0.6254, "step": 108565 }, { "epoch": 1.2, "learning_rate": 2.9963902914341396e-05, "loss": 0.6684, "step": 108570 }, { "epoch": 1.2, "learning_rate": 2.9962980187202883e-05, "loss": 0.6796, "step": 108575 }, { "epoch": 1.2, "learning_rate": 2.9962057460064368e-05, "loss": 0.6832, "step": 108580 }, { "epoch": 1.2, "learning_rate": 2.996113473292586e-05, "loss": 0.6684, "step": 108585 }, { "epoch": 1.2, "learning_rate": 2.9960212005787347e-05, "loss": 0.6679, "step": 108590 }, { "epoch": 1.2, "learning_rate": 2.9959289278648835e-05, "loss": 0.6378, "step": 108595 }, { "epoch": 1.2, "learning_rate": 2.995836655151032e-05, "loss": 0.7051, "step": 108600 }, { "epoch": 1.2, "learning_rate": 2.995744382437181e-05, "loss": 0.6559, "step": 108605 }, { "epoch": 1.2, "learning_rate": 2.9956521097233298e-05, "loss": 0.637, "step": 108610 }, { "epoch": 1.2, "learning_rate": 2.9955598370094783e-05, "loss": 0.7044, "step": 108615 }, { "epoch": 1.2, "learning_rate": 2.995467564295627e-05, "loss": 0.6434, "step": 108620 }, { "epoch": 1.2, "learning_rate": 2.9953752915817755e-05, "loss": 0.6289, "step": 108625 }, { "epoch": 1.2, "learning_rate": 2.995283018867925e-05, "loss": 0.6793, "step": 108630 }, { "epoch": 1.2, "learning_rate": 2.9951907461540734e-05, "loss": 0.639, "step": 108635 }, { "epoch": 1.2, "learning_rate": 2.9950984734402222e-05, "loss": 0.6047, "step": 108640 }, { "epoch": 1.2, "learning_rate": 2.9950062007263706e-05, "loss": 0.6414, "step": 108645 }, { "epoch": 1.2, "learning_rate": 2.9949139280125198e-05, "loss": 0.6696, "step": 108650 }, { "epoch": 1.2, "learning_rate": 2.9948216552986686e-05, "loss": 0.7146, "step": 108655 }, { "epoch": 1.2, "learning_rate": 2.994729382584817e-05, "loss": 0.6991, "step": 108660 }, { "epoch": 1.2, "learning_rate": 2.9946371098709658e-05, "loss": 0.636, "step": 108665 }, { "epoch": 1.2, "learning_rate": 2.994544837157115e-05, "loss": 0.6878, "step": 108670 }, { "epoch": 1.2, "learning_rate": 2.9944525644432637e-05, "loss": 0.7052, "step": 108675 }, { "epoch": 1.2, "learning_rate": 2.994360291729412e-05, "loss": 0.7456, "step": 108680 }, { "epoch": 1.2, "learning_rate": 2.994268019015561e-05, "loss": 0.6755, "step": 108685 }, { "epoch": 1.2, "learning_rate": 2.99417574630171e-05, "loss": 0.6343, "step": 108690 }, { "epoch": 1.2, "learning_rate": 2.994083473587859e-05, "loss": 0.6409, "step": 108695 }, { "epoch": 1.2, "learning_rate": 2.9939912008740073e-05, "loss": 0.7057, "step": 108700 }, { "epoch": 1.2, "learning_rate": 2.993898928160156e-05, "loss": 0.6497, "step": 108705 }, { "epoch": 1.2, "learning_rate": 2.9938066554463045e-05, "loss": 0.6638, "step": 108710 }, { "epoch": 1.2, "learning_rate": 2.9937143827324536e-05, "loss": 0.6287, "step": 108715 }, { "epoch": 1.2, "learning_rate": 2.9936221100186024e-05, "loss": 0.7187, "step": 108720 }, { "epoch": 1.2, "learning_rate": 2.993529837304751e-05, "loss": 0.6153, "step": 108725 }, { "epoch": 1.2, "learning_rate": 2.9934375645908997e-05, "loss": 0.6864, "step": 108730 }, { "epoch": 1.2, "learning_rate": 2.9933452918770488e-05, "loss": 0.6592, "step": 108735 }, { "epoch": 1.2, "learning_rate": 2.9932530191631976e-05, "loss": 0.7039, "step": 108740 }, { "epoch": 1.2, "learning_rate": 2.993160746449346e-05, "loss": 0.6692, "step": 108745 }, { "epoch": 1.2, "learning_rate": 2.9930684737354948e-05, "loss": 0.6766, "step": 108750 }, { "epoch": 1.2, "learning_rate": 2.992976201021644e-05, "loss": 0.7096, "step": 108755 }, { "epoch": 1.2, "learning_rate": 2.9928839283077924e-05, "loss": 0.6544, "step": 108760 }, { "epoch": 1.2, "learning_rate": 2.992791655593941e-05, "loss": 0.6875, "step": 108765 }, { "epoch": 1.2, "learning_rate": 2.99269938288009e-05, "loss": 0.6467, "step": 108770 }, { "epoch": 1.2, "learning_rate": 2.9926071101662384e-05, "loss": 0.6578, "step": 108775 }, { "epoch": 1.2, "learning_rate": 2.9925148374523875e-05, "loss": 0.6755, "step": 108780 }, { "epoch": 1.2, "learning_rate": 2.9924225647385363e-05, "loss": 0.6832, "step": 108785 }, { "epoch": 1.2, "learning_rate": 2.9923302920246847e-05, "loss": 0.6382, "step": 108790 }, { "epoch": 1.2, "learning_rate": 2.9922380193108335e-05, "loss": 0.6284, "step": 108795 }, { "epoch": 1.2, "learning_rate": 2.9921457465969826e-05, "loss": 0.6729, "step": 108800 }, { "epoch": 1.2, "learning_rate": 2.9920534738831314e-05, "loss": 0.6903, "step": 108805 }, { "epoch": 1.2, "learning_rate": 2.99196120116928e-05, "loss": 0.6432, "step": 108810 }, { "epoch": 1.2, "learning_rate": 2.9918689284554287e-05, "loss": 0.6776, "step": 108815 }, { "epoch": 1.2, "learning_rate": 2.9917766557415778e-05, "loss": 0.6335, "step": 108820 }, { "epoch": 1.2, "learning_rate": 2.9916843830277262e-05, "loss": 0.6409, "step": 108825 }, { "epoch": 1.21, "learning_rate": 2.991592110313875e-05, "loss": 0.7099, "step": 108830 }, { "epoch": 1.21, "learning_rate": 2.9914998376000235e-05, "loss": 0.6367, "step": 108835 }, { "epoch": 1.21, "learning_rate": 2.991407564886173e-05, "loss": 0.6174, "step": 108840 }, { "epoch": 1.21, "learning_rate": 2.9913152921723214e-05, "loss": 0.6655, "step": 108845 }, { "epoch": 1.21, "learning_rate": 2.99122301945847e-05, "loss": 0.6394, "step": 108850 }, { "epoch": 1.21, "learning_rate": 2.9911307467446186e-05, "loss": 0.6764, "step": 108855 }, { "epoch": 1.21, "learning_rate": 2.9910384740307674e-05, "loss": 0.678, "step": 108860 }, { "epoch": 1.21, "learning_rate": 2.9909462013169165e-05, "loss": 0.6874, "step": 108865 }, { "epoch": 1.21, "learning_rate": 2.9908539286030653e-05, "loss": 0.597, "step": 108870 }, { "epoch": 1.21, "learning_rate": 2.9907616558892137e-05, "loss": 0.6784, "step": 108875 }, { "epoch": 1.21, "learning_rate": 2.9906693831753625e-05, "loss": 0.6361, "step": 108880 }, { "epoch": 1.21, "learning_rate": 2.9905771104615116e-05, "loss": 0.7224, "step": 108885 }, { "epoch": 1.21, "learning_rate": 2.99048483774766e-05, "loss": 0.6876, "step": 108890 }, { "epoch": 1.21, "learning_rate": 2.990392565033809e-05, "loss": 0.6798, "step": 108895 }, { "epoch": 1.21, "learning_rate": 2.9903002923199573e-05, "loss": 0.6665, "step": 108900 }, { "epoch": 1.21, "learning_rate": 2.9902080196061068e-05, "loss": 0.6903, "step": 108905 }, { "epoch": 1.21, "learning_rate": 2.9901157468922552e-05, "loss": 0.6858, "step": 108910 }, { "epoch": 1.21, "learning_rate": 2.990023474178404e-05, "loss": 0.6969, "step": 108915 }, { "epoch": 1.21, "learning_rate": 2.9899312014645525e-05, "loss": 0.6308, "step": 108920 }, { "epoch": 1.21, "learning_rate": 2.9898389287507012e-05, "loss": 0.6353, "step": 108925 }, { "epoch": 1.21, "learning_rate": 2.9897466560368504e-05, "loss": 0.6794, "step": 108930 }, { "epoch": 1.21, "learning_rate": 2.9896543833229988e-05, "loss": 0.7435, "step": 108935 }, { "epoch": 1.21, "learning_rate": 2.9895621106091476e-05, "loss": 0.6708, "step": 108940 }, { "epoch": 1.21, "learning_rate": 2.9894698378952964e-05, "loss": 0.7069, "step": 108945 }, { "epoch": 1.21, "learning_rate": 2.9893775651814455e-05, "loss": 0.6624, "step": 108950 }, { "epoch": 1.21, "learning_rate": 2.989285292467594e-05, "loss": 0.6492, "step": 108955 }, { "epoch": 1.21, "learning_rate": 2.9891930197537427e-05, "loss": 0.6309, "step": 108960 }, { "epoch": 1.21, "learning_rate": 2.9891007470398912e-05, "loss": 0.7056, "step": 108965 }, { "epoch": 1.21, "learning_rate": 2.9890084743260403e-05, "loss": 0.6658, "step": 108970 }, { "epoch": 1.21, "learning_rate": 2.988916201612189e-05, "loss": 0.6908, "step": 108975 }, { "epoch": 1.21, "learning_rate": 2.988823928898338e-05, "loss": 0.6547, "step": 108980 }, { "epoch": 1.21, "learning_rate": 2.9887316561844863e-05, "loss": 0.6562, "step": 108985 }, { "epoch": 1.21, "learning_rate": 2.9886393834706354e-05, "loss": 0.646, "step": 108990 }, { "epoch": 1.21, "learning_rate": 2.9885471107567842e-05, "loss": 0.6276, "step": 108995 }, { "epoch": 1.21, "learning_rate": 2.9884548380429327e-05, "loss": 0.7494, "step": 109000 }, { "epoch": 1.21, "eval_loss": 0.6345028281211853, "eval_runtime": 69.2691, "eval_samples_per_second": 28.873, "eval_steps_per_second": 14.436, "step": 109000 }, { "epoch": 1.21, "learning_rate": 2.9883625653290815e-05, "loss": 0.6687, "step": 109005 }, { "epoch": 1.21, "learning_rate": 2.98827029261523e-05, "loss": 0.6261, "step": 109010 }, { "epoch": 1.21, "learning_rate": 2.9881780199013794e-05, "loss": 0.6237, "step": 109015 }, { "epoch": 1.21, "learning_rate": 2.9880857471875278e-05, "loss": 0.7029, "step": 109020 }, { "epoch": 1.21, "learning_rate": 2.9879934744736766e-05, "loss": 0.635, "step": 109025 }, { "epoch": 1.21, "learning_rate": 2.987901201759825e-05, "loss": 0.7057, "step": 109030 }, { "epoch": 1.21, "learning_rate": 2.9878089290459742e-05, "loss": 0.7155, "step": 109035 }, { "epoch": 1.21, "learning_rate": 2.987716656332123e-05, "loss": 0.6734, "step": 109040 }, { "epoch": 1.21, "learning_rate": 2.9876243836182714e-05, "loss": 0.6579, "step": 109045 }, { "epoch": 1.21, "learning_rate": 2.9875321109044202e-05, "loss": 0.7564, "step": 109050 }, { "epoch": 1.21, "learning_rate": 2.9874398381905693e-05, "loss": 0.7069, "step": 109055 }, { "epoch": 1.21, "learning_rate": 2.987347565476718e-05, "loss": 0.6653, "step": 109060 }, { "epoch": 1.21, "learning_rate": 2.9872552927628665e-05, "loss": 0.6246, "step": 109065 }, { "epoch": 1.21, "learning_rate": 2.9871630200490153e-05, "loss": 0.7026, "step": 109070 }, { "epoch": 1.21, "learning_rate": 2.9870707473351638e-05, "loss": 0.6835, "step": 109075 }, { "epoch": 1.21, "learning_rate": 2.9869784746213132e-05, "loss": 0.6966, "step": 109080 }, { "epoch": 1.21, "learning_rate": 2.9868862019074617e-05, "loss": 0.646, "step": 109085 }, { "epoch": 1.21, "learning_rate": 2.9867939291936105e-05, "loss": 0.6238, "step": 109090 }, { "epoch": 1.21, "learning_rate": 2.986701656479759e-05, "loss": 0.666, "step": 109095 }, { "epoch": 1.21, "learning_rate": 2.986609383765908e-05, "loss": 0.6759, "step": 109100 }, { "epoch": 1.21, "learning_rate": 2.9865171110520568e-05, "loss": 0.6792, "step": 109105 }, { "epoch": 1.21, "learning_rate": 2.9864248383382053e-05, "loss": 0.67, "step": 109110 }, { "epoch": 1.21, "learning_rate": 2.986332565624354e-05, "loss": 0.6635, "step": 109115 }, { "epoch": 1.21, "learning_rate": 2.9862402929105032e-05, "loss": 0.69, "step": 109120 }, { "epoch": 1.21, "learning_rate": 2.986148020196652e-05, "loss": 0.7001, "step": 109125 }, { "epoch": 1.21, "learning_rate": 2.9860557474828004e-05, "loss": 0.6806, "step": 109130 }, { "epoch": 1.21, "learning_rate": 2.9859634747689492e-05, "loss": 0.6631, "step": 109135 }, { "epoch": 1.21, "learning_rate": 2.9858712020550983e-05, "loss": 0.6747, "step": 109140 }, { "epoch": 1.21, "learning_rate": 2.9857789293412468e-05, "loss": 0.6895, "step": 109145 }, { "epoch": 1.21, "learning_rate": 2.9856866566273955e-05, "loss": 0.6095, "step": 109150 }, { "epoch": 1.21, "learning_rate": 2.9855943839135443e-05, "loss": 0.6076, "step": 109155 }, { "epoch": 1.21, "learning_rate": 2.9855021111996928e-05, "loss": 0.6598, "step": 109160 }, { "epoch": 1.21, "learning_rate": 2.985409838485842e-05, "loss": 0.667, "step": 109165 }, { "epoch": 1.21, "learning_rate": 2.9853175657719907e-05, "loss": 0.6419, "step": 109170 }, { "epoch": 1.21, "learning_rate": 2.985225293058139e-05, "loss": 0.6753, "step": 109175 }, { "epoch": 1.21, "learning_rate": 2.985133020344288e-05, "loss": 0.6017, "step": 109180 }, { "epoch": 1.21, "learning_rate": 2.985040747630437e-05, "loss": 0.7046, "step": 109185 }, { "epoch": 1.21, "learning_rate": 2.9849484749165858e-05, "loss": 0.6407, "step": 109190 }, { "epoch": 1.21, "learning_rate": 2.9848562022027343e-05, "loss": 0.6576, "step": 109195 }, { "epoch": 1.21, "learning_rate": 2.984763929488883e-05, "loss": 0.6649, "step": 109200 }, { "epoch": 1.21, "learning_rate": 2.9846716567750322e-05, "loss": 0.6733, "step": 109205 }, { "epoch": 1.21, "learning_rate": 2.9845793840611806e-05, "loss": 0.6416, "step": 109210 }, { "epoch": 1.21, "learning_rate": 2.9844871113473294e-05, "loss": 0.6874, "step": 109215 }, { "epoch": 1.21, "learning_rate": 2.984394838633478e-05, "loss": 0.6634, "step": 109220 }, { "epoch": 1.21, "learning_rate": 2.9843025659196266e-05, "loss": 0.7111, "step": 109225 }, { "epoch": 1.21, "learning_rate": 2.9842102932057758e-05, "loss": 0.6461, "step": 109230 }, { "epoch": 1.21, "learning_rate": 2.9841180204919245e-05, "loss": 0.6599, "step": 109235 }, { "epoch": 1.21, "learning_rate": 2.984025747778073e-05, "loss": 0.6536, "step": 109240 }, { "epoch": 1.21, "learning_rate": 2.9839334750642218e-05, "loss": 0.6458, "step": 109245 }, { "epoch": 1.21, "learning_rate": 2.983841202350371e-05, "loss": 0.683, "step": 109250 }, { "epoch": 1.21, "learning_rate": 2.9837489296365197e-05, "loss": 0.7083, "step": 109255 }, { "epoch": 1.21, "learning_rate": 2.983656656922668e-05, "loss": 0.6714, "step": 109260 }, { "epoch": 1.21, "learning_rate": 2.983564384208817e-05, "loss": 0.664, "step": 109265 }, { "epoch": 1.21, "learning_rate": 2.983472111494966e-05, "loss": 0.6449, "step": 109270 }, { "epoch": 1.21, "learning_rate": 2.9833798387811145e-05, "loss": 0.6733, "step": 109275 }, { "epoch": 1.21, "learning_rate": 2.9832875660672633e-05, "loss": 0.6461, "step": 109280 }, { "epoch": 1.21, "learning_rate": 2.9831952933534117e-05, "loss": 0.6488, "step": 109285 }, { "epoch": 1.21, "learning_rate": 2.9831030206395612e-05, "loss": 0.7199, "step": 109290 }, { "epoch": 1.21, "learning_rate": 2.9830107479257096e-05, "loss": 0.6624, "step": 109295 }, { "epoch": 1.21, "learning_rate": 2.9829184752118584e-05, "loss": 0.6908, "step": 109300 }, { "epoch": 1.21, "learning_rate": 2.982826202498007e-05, "loss": 0.6648, "step": 109305 }, { "epoch": 1.21, "learning_rate": 2.9827339297841556e-05, "loss": 0.6779, "step": 109310 }, { "epoch": 1.21, "learning_rate": 2.9826416570703048e-05, "loss": 0.6437, "step": 109315 }, { "epoch": 1.21, "learning_rate": 2.9825493843564532e-05, "loss": 0.6557, "step": 109320 }, { "epoch": 1.21, "learning_rate": 2.982457111642602e-05, "loss": 0.6168, "step": 109325 }, { "epoch": 1.21, "learning_rate": 2.9823648389287508e-05, "loss": 0.6613, "step": 109330 }, { "epoch": 1.21, "learning_rate": 2.9822725662149e-05, "loss": 0.7065, "step": 109335 }, { "epoch": 1.21, "learning_rate": 2.9821802935010484e-05, "loss": 0.7312, "step": 109340 }, { "epoch": 1.21, "learning_rate": 2.982088020787197e-05, "loss": 0.7039, "step": 109345 }, { "epoch": 1.21, "learning_rate": 2.9819957480733456e-05, "loss": 0.6819, "step": 109350 }, { "epoch": 1.21, "learning_rate": 2.981903475359495e-05, "loss": 0.6976, "step": 109355 }, { "epoch": 1.21, "learning_rate": 2.9818112026456435e-05, "loss": 0.7237, "step": 109360 }, { "epoch": 1.21, "learning_rate": 2.9817189299317923e-05, "loss": 0.6376, "step": 109365 }, { "epoch": 1.21, "learning_rate": 2.9816266572179407e-05, "loss": 0.7227, "step": 109370 }, { "epoch": 1.21, "learning_rate": 2.9815343845040895e-05, "loss": 0.6425, "step": 109375 }, { "epoch": 1.21, "learning_rate": 2.9814421117902386e-05, "loss": 0.6711, "step": 109380 }, { "epoch": 1.21, "learning_rate": 2.981349839076387e-05, "loss": 0.6559, "step": 109385 }, { "epoch": 1.21, "learning_rate": 2.981257566362536e-05, "loss": 0.6348, "step": 109390 }, { "epoch": 1.21, "learning_rate": 2.9811652936486843e-05, "loss": 0.6495, "step": 109395 }, { "epoch": 1.21, "learning_rate": 2.9810730209348338e-05, "loss": 0.6818, "step": 109400 }, { "epoch": 1.21, "learning_rate": 2.9809807482209822e-05, "loss": 0.6602, "step": 109405 }, { "epoch": 1.21, "learning_rate": 2.980888475507131e-05, "loss": 0.6017, "step": 109410 }, { "epoch": 1.21, "learning_rate": 2.9807962027932794e-05, "loss": 0.6573, "step": 109415 }, { "epoch": 1.21, "learning_rate": 2.9807039300794286e-05, "loss": 0.5909, "step": 109420 }, { "epoch": 1.21, "learning_rate": 2.9806116573655774e-05, "loss": 0.6428, "step": 109425 }, { "epoch": 1.21, "learning_rate": 2.980519384651726e-05, "loss": 0.6565, "step": 109430 }, { "epoch": 1.21, "learning_rate": 2.9804271119378746e-05, "loss": 0.6411, "step": 109435 }, { "epoch": 1.21, "learning_rate": 2.9803348392240237e-05, "loss": 0.7136, "step": 109440 }, { "epoch": 1.21, "learning_rate": 2.9802425665101725e-05, "loss": 0.6773, "step": 109445 }, { "epoch": 1.21, "learning_rate": 2.980150293796321e-05, "loss": 0.6861, "step": 109450 }, { "epoch": 1.21, "learning_rate": 2.9800580210824697e-05, "loss": 0.691, "step": 109455 }, { "epoch": 1.21, "learning_rate": 2.9799657483686182e-05, "loss": 0.6452, "step": 109460 }, { "epoch": 1.21, "learning_rate": 2.9798734756547676e-05, "loss": 0.6778, "step": 109465 }, { "epoch": 1.21, "learning_rate": 2.979781202940916e-05, "loss": 0.6509, "step": 109470 }, { "epoch": 1.21, "learning_rate": 2.979688930227065e-05, "loss": 0.7303, "step": 109475 }, { "epoch": 1.21, "learning_rate": 2.9795966575132133e-05, "loss": 0.686, "step": 109480 }, { "epoch": 1.21, "learning_rate": 2.9795043847993624e-05, "loss": 0.7092, "step": 109485 }, { "epoch": 1.21, "learning_rate": 2.9794121120855112e-05, "loss": 0.6823, "step": 109490 }, { "epoch": 1.21, "learning_rate": 2.9793198393716597e-05, "loss": 0.6403, "step": 109495 }, { "epoch": 1.21, "learning_rate": 2.9792275666578085e-05, "loss": 0.7092, "step": 109500 }, { "epoch": 1.21, "learning_rate": 2.9791352939439576e-05, "loss": 0.6555, "step": 109505 }, { "epoch": 1.21, "learning_rate": 2.9790430212301064e-05, "loss": 0.6644, "step": 109510 }, { "epoch": 1.21, "learning_rate": 2.9789507485162548e-05, "loss": 0.6293, "step": 109515 }, { "epoch": 1.21, "learning_rate": 2.9788584758024036e-05, "loss": 0.6163, "step": 109520 }, { "epoch": 1.21, "learning_rate": 2.9787662030885527e-05, "loss": 0.6501, "step": 109525 }, { "epoch": 1.21, "learning_rate": 2.978673930374701e-05, "loss": 0.6839, "step": 109530 }, { "epoch": 1.21, "learning_rate": 2.97858165766085e-05, "loss": 0.647, "step": 109535 }, { "epoch": 1.21, "learning_rate": 2.9784893849469987e-05, "loss": 0.6734, "step": 109540 }, { "epoch": 1.21, "learning_rate": 2.9783971122331472e-05, "loss": 0.6073, "step": 109545 }, { "epoch": 1.21, "learning_rate": 2.9783048395192963e-05, "loss": 0.6618, "step": 109550 }, { "epoch": 1.21, "learning_rate": 2.978212566805445e-05, "loss": 0.6649, "step": 109555 }, { "epoch": 1.21, "learning_rate": 2.9781202940915935e-05, "loss": 0.7001, "step": 109560 }, { "epoch": 1.21, "learning_rate": 2.9780280213777423e-05, "loss": 0.7098, "step": 109565 }, { "epoch": 1.21, "learning_rate": 2.9779357486638914e-05, "loss": 0.7159, "step": 109570 }, { "epoch": 1.21, "learning_rate": 2.9778434759500402e-05, "loss": 0.683, "step": 109575 }, { "epoch": 1.21, "learning_rate": 2.9777512032361887e-05, "loss": 0.6166, "step": 109580 }, { "epoch": 1.21, "learning_rate": 2.9776589305223375e-05, "loss": 0.6891, "step": 109585 }, { "epoch": 1.21, "learning_rate": 2.9775666578084866e-05, "loss": 0.6312, "step": 109590 }, { "epoch": 1.21, "learning_rate": 2.977474385094635e-05, "loss": 0.6861, "step": 109595 }, { "epoch": 1.21, "learning_rate": 2.9773821123807838e-05, "loss": 0.6676, "step": 109600 }, { "epoch": 1.21, "learning_rate": 2.9772898396669323e-05, "loss": 0.6965, "step": 109605 }, { "epoch": 1.21, "learning_rate": 2.977197566953081e-05, "loss": 0.6664, "step": 109610 }, { "epoch": 1.21, "learning_rate": 2.97710529423923e-05, "loss": 0.6381, "step": 109615 }, { "epoch": 1.21, "learning_rate": 2.977013021525379e-05, "loss": 0.6568, "step": 109620 }, { "epoch": 1.21, "learning_rate": 2.9769207488115274e-05, "loss": 0.7158, "step": 109625 }, { "epoch": 1.21, "learning_rate": 2.9768284760976762e-05, "loss": 0.6378, "step": 109630 }, { "epoch": 1.21, "learning_rate": 2.9767362033838253e-05, "loss": 0.6435, "step": 109635 }, { "epoch": 1.21, "learning_rate": 2.976643930669974e-05, "loss": 0.6914, "step": 109640 }, { "epoch": 1.21, "learning_rate": 2.9765516579561225e-05, "loss": 0.6432, "step": 109645 }, { "epoch": 1.21, "learning_rate": 2.9764593852422713e-05, "loss": 0.6726, "step": 109650 }, { "epoch": 1.21, "learning_rate": 2.9763671125284204e-05, "loss": 0.7108, "step": 109655 }, { "epoch": 1.21, "learning_rate": 2.976274839814569e-05, "loss": 0.6563, "step": 109660 }, { "epoch": 1.21, "learning_rate": 2.9761825671007177e-05, "loss": 0.6721, "step": 109665 }, { "epoch": 1.21, "learning_rate": 2.976090294386866e-05, "loss": 0.7096, "step": 109670 }, { "epoch": 1.21, "learning_rate": 2.9759980216730156e-05, "loss": 0.6847, "step": 109675 }, { "epoch": 1.21, "learning_rate": 2.975905748959164e-05, "loss": 0.6369, "step": 109680 }, { "epoch": 1.21, "learning_rate": 2.9758134762453128e-05, "loss": 0.6227, "step": 109685 }, { "epoch": 1.21, "learning_rate": 2.9757212035314613e-05, "loss": 0.6446, "step": 109690 }, { "epoch": 1.21, "learning_rate": 2.97562893081761e-05, "loss": 0.697, "step": 109695 }, { "epoch": 1.21, "learning_rate": 2.9755366581037592e-05, "loss": 0.6805, "step": 109700 }, { "epoch": 1.21, "learning_rate": 2.9754443853899076e-05, "loss": 0.6921, "step": 109705 }, { "epoch": 1.21, "learning_rate": 2.9753521126760564e-05, "loss": 0.6381, "step": 109710 }, { "epoch": 1.21, "learning_rate": 2.9752598399622052e-05, "loss": 0.6866, "step": 109715 }, { "epoch": 1.21, "learning_rate": 2.9751675672483543e-05, "loss": 0.7092, "step": 109720 }, { "epoch": 1.21, "learning_rate": 2.9750752945345028e-05, "loss": 0.653, "step": 109725 }, { "epoch": 1.22, "learning_rate": 2.9749830218206515e-05, "loss": 0.6715, "step": 109730 }, { "epoch": 1.22, "learning_rate": 2.9748907491068e-05, "loss": 0.662, "step": 109735 }, { "epoch": 1.22, "learning_rate": 2.9747984763929494e-05, "loss": 0.6435, "step": 109740 }, { "epoch": 1.22, "learning_rate": 2.974706203679098e-05, "loss": 0.6771, "step": 109745 }, { "epoch": 1.22, "learning_rate": 2.9746139309652467e-05, "loss": 0.6499, "step": 109750 }, { "epoch": 1.22, "learning_rate": 2.974521658251395e-05, "loss": 0.6693, "step": 109755 }, { "epoch": 1.22, "learning_rate": 2.974429385537544e-05, "loss": 0.6826, "step": 109760 }, { "epoch": 1.22, "learning_rate": 2.974337112823693e-05, "loss": 0.6628, "step": 109765 }, { "epoch": 1.22, "learning_rate": 2.9742448401098415e-05, "loss": 0.5828, "step": 109770 }, { "epoch": 1.22, "learning_rate": 2.9741525673959903e-05, "loss": 0.6976, "step": 109775 }, { "epoch": 1.22, "learning_rate": 2.9740602946821387e-05, "loss": 0.6652, "step": 109780 }, { "epoch": 1.22, "learning_rate": 2.9739680219682882e-05, "loss": 0.6244, "step": 109785 }, { "epoch": 1.22, "learning_rate": 2.9738757492544366e-05, "loss": 0.6741, "step": 109790 }, { "epoch": 1.22, "learning_rate": 2.9737834765405854e-05, "loss": 0.6668, "step": 109795 }, { "epoch": 1.22, "learning_rate": 2.973691203826734e-05, "loss": 0.6718, "step": 109800 }, { "epoch": 1.22, "learning_rate": 2.973598931112883e-05, "loss": 0.6531, "step": 109805 }, { "epoch": 1.22, "learning_rate": 2.9735066583990318e-05, "loss": 0.684, "step": 109810 }, { "epoch": 1.22, "learning_rate": 2.9734143856851805e-05, "loss": 0.6048, "step": 109815 }, { "epoch": 1.22, "learning_rate": 2.973322112971329e-05, "loss": 0.6934, "step": 109820 }, { "epoch": 1.22, "learning_rate": 2.973229840257478e-05, "loss": 0.6174, "step": 109825 }, { "epoch": 1.22, "learning_rate": 2.973137567543627e-05, "loss": 0.6624, "step": 109830 }, { "epoch": 1.22, "learning_rate": 2.9730452948297753e-05, "loss": 0.6989, "step": 109835 }, { "epoch": 1.22, "learning_rate": 2.972953022115924e-05, "loss": 0.6331, "step": 109840 }, { "epoch": 1.22, "learning_rate": 2.9728607494020726e-05, "loss": 0.7047, "step": 109845 }, { "epoch": 1.22, "learning_rate": 2.972768476688222e-05, "loss": 0.6736, "step": 109850 }, { "epoch": 1.22, "learning_rate": 2.9726762039743705e-05, "loss": 0.5943, "step": 109855 }, { "epoch": 1.22, "learning_rate": 2.9725839312605193e-05, "loss": 0.6526, "step": 109860 }, { "epoch": 1.22, "learning_rate": 2.9724916585466677e-05, "loss": 0.7176, "step": 109865 }, { "epoch": 1.22, "learning_rate": 2.972399385832817e-05, "loss": 0.6687, "step": 109870 }, { "epoch": 1.22, "learning_rate": 2.9723071131189656e-05, "loss": 0.657, "step": 109875 }, { "epoch": 1.22, "learning_rate": 2.972214840405114e-05, "loss": 0.6628, "step": 109880 }, { "epoch": 1.22, "learning_rate": 2.972122567691263e-05, "loss": 0.657, "step": 109885 }, { "epoch": 1.22, "learning_rate": 2.972030294977412e-05, "loss": 0.6546, "step": 109890 }, { "epoch": 1.22, "learning_rate": 2.9719380222635608e-05, "loss": 0.6301, "step": 109895 }, { "epoch": 1.22, "learning_rate": 2.9718457495497092e-05, "loss": 0.6774, "step": 109900 }, { "epoch": 1.22, "learning_rate": 2.971753476835858e-05, "loss": 0.6923, "step": 109905 }, { "epoch": 1.22, "learning_rate": 2.9716612041220064e-05, "loss": 0.6693, "step": 109910 }, { "epoch": 1.22, "learning_rate": 2.9715689314081556e-05, "loss": 0.6489, "step": 109915 }, { "epoch": 1.22, "learning_rate": 2.9714766586943043e-05, "loss": 0.6792, "step": 109920 }, { "epoch": 1.22, "learning_rate": 2.971384385980453e-05, "loss": 0.6016, "step": 109925 }, { "epoch": 1.22, "learning_rate": 2.9712921132666016e-05, "loss": 0.6892, "step": 109930 }, { "epoch": 1.22, "learning_rate": 2.9711998405527507e-05, "loss": 0.6803, "step": 109935 }, { "epoch": 1.22, "learning_rate": 2.9711075678388995e-05, "loss": 0.6494, "step": 109940 }, { "epoch": 1.22, "learning_rate": 2.971015295125048e-05, "loss": 0.6561, "step": 109945 }, { "epoch": 1.22, "learning_rate": 2.9709230224111967e-05, "loss": 0.616, "step": 109950 }, { "epoch": 1.22, "learning_rate": 2.970830749697346e-05, "loss": 0.6633, "step": 109955 }, { "epoch": 1.22, "learning_rate": 2.9707384769834946e-05, "loss": 0.6362, "step": 109960 }, { "epoch": 1.22, "learning_rate": 2.970646204269643e-05, "loss": 0.6831, "step": 109965 }, { "epoch": 1.22, "learning_rate": 2.970553931555792e-05, "loss": 0.6634, "step": 109970 }, { "epoch": 1.22, "learning_rate": 2.970461658841941e-05, "loss": 0.6126, "step": 109975 }, { "epoch": 1.22, "learning_rate": 2.9703693861280894e-05, "loss": 0.6621, "step": 109980 }, { "epoch": 1.22, "learning_rate": 2.9702771134142382e-05, "loss": 0.7041, "step": 109985 }, { "epoch": 1.22, "learning_rate": 2.9701848407003867e-05, "loss": 0.6486, "step": 109990 }, { "epoch": 1.22, "learning_rate": 2.9700925679865354e-05, "loss": 0.6524, "step": 109995 }, { "epoch": 1.22, "learning_rate": 2.9700002952726846e-05, "loss": 0.691, "step": 110000 }, { "epoch": 1.22, "eval_loss": 0.6322246193885803, "eval_runtime": 69.3515, "eval_samples_per_second": 28.839, "eval_steps_per_second": 14.419, "step": 110000 }, { "epoch": 1.22, "learning_rate": 2.9699080225588334e-05, "loss": 0.6661, "step": 110005 }, { "epoch": 1.22, "learning_rate": 2.9698157498449818e-05, "loss": 0.6636, "step": 110010 }, { "epoch": 1.22, "learning_rate": 2.9697234771311306e-05, "loss": 0.6736, "step": 110015 }, { "epoch": 1.22, "learning_rate": 2.9696312044172797e-05, "loss": 0.625, "step": 110020 }, { "epoch": 1.22, "learning_rate": 2.9695389317034285e-05, "loss": 0.6391, "step": 110025 }, { "epoch": 1.22, "learning_rate": 2.969446658989577e-05, "loss": 0.6954, "step": 110030 }, { "epoch": 1.22, "learning_rate": 2.9693543862757257e-05, "loss": 0.6034, "step": 110035 }, { "epoch": 1.22, "learning_rate": 2.969262113561875e-05, "loss": 0.7341, "step": 110040 }, { "epoch": 1.22, "learning_rate": 2.9691698408480233e-05, "loss": 0.6388, "step": 110045 }, { "epoch": 1.22, "learning_rate": 2.969077568134172e-05, "loss": 0.687, "step": 110050 }, { "epoch": 1.22, "learning_rate": 2.9689852954203205e-05, "loss": 0.67, "step": 110055 }, { "epoch": 1.22, "learning_rate": 2.9688930227064693e-05, "loss": 0.6648, "step": 110060 }, { "epoch": 1.22, "learning_rate": 2.9688007499926184e-05, "loss": 0.6314, "step": 110065 }, { "epoch": 1.22, "learning_rate": 2.9687084772787672e-05, "loss": 0.7092, "step": 110070 }, { "epoch": 1.22, "learning_rate": 2.9686162045649157e-05, "loss": 0.7092, "step": 110075 }, { "epoch": 1.22, "learning_rate": 2.9685239318510644e-05, "loss": 0.6205, "step": 110080 }, { "epoch": 1.22, "learning_rate": 2.9684316591372136e-05, "loss": 0.7128, "step": 110085 }, { "epoch": 1.22, "learning_rate": 2.968339386423362e-05, "loss": 0.6314, "step": 110090 }, { "epoch": 1.22, "learning_rate": 2.9682471137095108e-05, "loss": 0.6705, "step": 110095 }, { "epoch": 1.22, "learning_rate": 2.9681548409956596e-05, "loss": 0.6821, "step": 110100 }, { "epoch": 1.22, "learning_rate": 2.9680625682818087e-05, "loss": 0.6908, "step": 110105 }, { "epoch": 1.22, "learning_rate": 2.967970295567957e-05, "loss": 0.7216, "step": 110110 }, { "epoch": 1.22, "learning_rate": 2.967878022854106e-05, "loss": 0.75, "step": 110115 }, { "epoch": 1.22, "learning_rate": 2.9677857501402544e-05, "loss": 0.6873, "step": 110120 }, { "epoch": 1.22, "learning_rate": 2.967693477426404e-05, "loss": 0.7005, "step": 110125 }, { "epoch": 1.22, "learning_rate": 2.9676012047125523e-05, "loss": 0.6985, "step": 110130 }, { "epoch": 1.22, "learning_rate": 2.967508931998701e-05, "loss": 0.645, "step": 110135 }, { "epoch": 1.22, "learning_rate": 2.9674166592848495e-05, "loss": 0.7089, "step": 110140 }, { "epoch": 1.22, "learning_rate": 2.9673243865709983e-05, "loss": 0.7401, "step": 110145 }, { "epoch": 1.22, "learning_rate": 2.9672321138571474e-05, "loss": 0.7157, "step": 110150 }, { "epoch": 1.22, "learning_rate": 2.967139841143296e-05, "loss": 0.6197, "step": 110155 }, { "epoch": 1.22, "learning_rate": 2.9670475684294447e-05, "loss": 0.6374, "step": 110160 }, { "epoch": 1.22, "learning_rate": 2.966955295715593e-05, "loss": 0.6592, "step": 110165 }, { "epoch": 1.22, "learning_rate": 2.9668630230017426e-05, "loss": 0.7007, "step": 110170 }, { "epoch": 1.22, "learning_rate": 2.966770750287891e-05, "loss": 0.7196, "step": 110175 }, { "epoch": 1.22, "learning_rate": 2.9666784775740398e-05, "loss": 0.6918, "step": 110180 }, { "epoch": 1.22, "learning_rate": 2.9665862048601883e-05, "loss": 0.6229, "step": 110185 }, { "epoch": 1.22, "learning_rate": 2.9664939321463374e-05, "loss": 0.6186, "step": 110190 }, { "epoch": 1.22, "learning_rate": 2.966401659432486e-05, "loss": 0.6199, "step": 110195 }, { "epoch": 1.22, "learning_rate": 2.966309386718635e-05, "loss": 0.7347, "step": 110200 }, { "epoch": 1.22, "learning_rate": 2.9662171140047834e-05, "loss": 0.6988, "step": 110205 }, { "epoch": 1.22, "learning_rate": 2.9661248412909322e-05, "loss": 0.6308, "step": 110210 }, { "epoch": 1.22, "learning_rate": 2.9660325685770813e-05, "loss": 0.6723, "step": 110215 }, { "epoch": 1.22, "learning_rate": 2.9659402958632297e-05, "loss": 0.6581, "step": 110220 }, { "epoch": 1.22, "learning_rate": 2.9658480231493785e-05, "loss": 0.6232, "step": 110225 }, { "epoch": 1.22, "learning_rate": 2.965755750435527e-05, "loss": 0.714, "step": 110230 }, { "epoch": 1.22, "learning_rate": 2.9656634777216764e-05, "loss": 0.6336, "step": 110235 }, { "epoch": 1.22, "learning_rate": 2.965571205007825e-05, "loss": 0.7232, "step": 110240 }, { "epoch": 1.22, "learning_rate": 2.9654789322939737e-05, "loss": 0.638, "step": 110245 }, { "epoch": 1.22, "learning_rate": 2.965386659580122e-05, "loss": 0.7003, "step": 110250 }, { "epoch": 1.22, "learning_rate": 2.9652943868662712e-05, "loss": 0.6621, "step": 110255 }, { "epoch": 1.22, "learning_rate": 2.96520211415242e-05, "loss": 0.668, "step": 110260 }, { "epoch": 1.22, "learning_rate": 2.9651098414385685e-05, "loss": 0.7349, "step": 110265 }, { "epoch": 1.22, "learning_rate": 2.9650175687247173e-05, "loss": 0.6318, "step": 110270 }, { "epoch": 1.22, "learning_rate": 2.9649252960108664e-05, "loss": 0.681, "step": 110275 }, { "epoch": 1.22, "learning_rate": 2.964833023297015e-05, "loss": 0.7144, "step": 110280 }, { "epoch": 1.22, "learning_rate": 2.9647407505831636e-05, "loss": 0.6327, "step": 110285 }, { "epoch": 1.22, "learning_rate": 2.9646484778693124e-05, "loss": 0.715, "step": 110290 }, { "epoch": 1.22, "learning_rate": 2.964556205155461e-05, "loss": 0.6529, "step": 110295 }, { "epoch": 1.22, "learning_rate": 2.9644639324416103e-05, "loss": 0.6706, "step": 110300 }, { "epoch": 1.22, "learning_rate": 2.9643716597277588e-05, "loss": 0.6484, "step": 110305 }, { "epoch": 1.22, "learning_rate": 2.9642793870139075e-05, "loss": 0.6243, "step": 110310 }, { "epoch": 1.22, "learning_rate": 2.964187114300056e-05, "loss": 0.6597, "step": 110315 }, { "epoch": 1.22, "learning_rate": 2.964094841586205e-05, "loss": 0.6817, "step": 110320 }, { "epoch": 1.22, "learning_rate": 2.964002568872354e-05, "loss": 0.7398, "step": 110325 }, { "epoch": 1.22, "learning_rate": 2.9639102961585023e-05, "loss": 0.6878, "step": 110330 }, { "epoch": 1.22, "learning_rate": 2.963818023444651e-05, "loss": 0.5832, "step": 110335 }, { "epoch": 1.22, "learning_rate": 2.9637257507308002e-05, "loss": 0.6501, "step": 110340 }, { "epoch": 1.22, "learning_rate": 2.963633478016949e-05, "loss": 0.639, "step": 110345 }, { "epoch": 1.22, "learning_rate": 2.9635412053030975e-05, "loss": 0.6691, "step": 110350 }, { "epoch": 1.22, "learning_rate": 2.9634489325892463e-05, "loss": 0.6532, "step": 110355 }, { "epoch": 1.22, "learning_rate": 2.9633566598753947e-05, "loss": 0.6315, "step": 110360 }, { "epoch": 1.22, "learning_rate": 2.963264387161544e-05, "loss": 0.686, "step": 110365 }, { "epoch": 1.22, "learning_rate": 2.9631721144476926e-05, "loss": 0.6503, "step": 110370 }, { "epoch": 1.22, "learning_rate": 2.9630798417338414e-05, "loss": 0.6679, "step": 110375 }, { "epoch": 1.22, "learning_rate": 2.96298756901999e-05, "loss": 0.6743, "step": 110380 }, { "epoch": 1.22, "learning_rate": 2.962895296306139e-05, "loss": 0.6718, "step": 110385 }, { "epoch": 1.22, "learning_rate": 2.9628030235922878e-05, "loss": 0.6426, "step": 110390 }, { "epoch": 1.22, "learning_rate": 2.9627107508784362e-05, "loss": 0.6599, "step": 110395 }, { "epoch": 1.22, "learning_rate": 2.962618478164585e-05, "loss": 0.6579, "step": 110400 }, { "epoch": 1.22, "learning_rate": 2.962526205450734e-05, "loss": 0.6475, "step": 110405 }, { "epoch": 1.22, "learning_rate": 2.962433932736883e-05, "loss": 0.681, "step": 110410 }, { "epoch": 1.22, "learning_rate": 2.9623416600230313e-05, "loss": 0.6588, "step": 110415 }, { "epoch": 1.22, "learning_rate": 2.96224938730918e-05, "loss": 0.6373, "step": 110420 }, { "epoch": 1.22, "learning_rate": 2.9621571145953292e-05, "loss": 0.6033, "step": 110425 }, { "epoch": 1.22, "learning_rate": 2.9620648418814777e-05, "loss": 0.6589, "step": 110430 }, { "epoch": 1.22, "learning_rate": 2.9619725691676265e-05, "loss": 0.7408, "step": 110435 }, { "epoch": 1.22, "learning_rate": 2.961880296453775e-05, "loss": 0.67, "step": 110440 }, { "epoch": 1.22, "learning_rate": 2.9617880237399237e-05, "loss": 0.6526, "step": 110445 }, { "epoch": 1.22, "learning_rate": 2.961695751026073e-05, "loss": 0.7418, "step": 110450 }, { "epoch": 1.22, "learning_rate": 2.9616034783122216e-05, "loss": 0.6209, "step": 110455 }, { "epoch": 1.22, "learning_rate": 2.96151120559837e-05, "loss": 0.6369, "step": 110460 }, { "epoch": 1.22, "learning_rate": 2.961418932884519e-05, "loss": 0.691, "step": 110465 }, { "epoch": 1.22, "learning_rate": 2.961326660170668e-05, "loss": 0.7308, "step": 110470 }, { "epoch": 1.22, "learning_rate": 2.9612343874568164e-05, "loss": 0.6072, "step": 110475 }, { "epoch": 1.22, "learning_rate": 2.9611421147429652e-05, "loss": 0.6577, "step": 110480 }, { "epoch": 1.22, "learning_rate": 2.961049842029114e-05, "loss": 0.6617, "step": 110485 }, { "epoch": 1.22, "learning_rate": 2.960957569315263e-05, "loss": 0.6781, "step": 110490 }, { "epoch": 1.22, "learning_rate": 2.9608652966014116e-05, "loss": 0.7005, "step": 110495 }, { "epoch": 1.22, "learning_rate": 2.9607730238875603e-05, "loss": 0.6824, "step": 110500 }, { "epoch": 1.22, "learning_rate": 2.9606807511737088e-05, "loss": 0.6087, "step": 110505 }, { "epoch": 1.22, "learning_rate": 2.9605884784598583e-05, "loss": 0.6218, "step": 110510 }, { "epoch": 1.22, "learning_rate": 2.9604962057460067e-05, "loss": 0.729, "step": 110515 }, { "epoch": 1.22, "learning_rate": 2.9604039330321555e-05, "loss": 0.6152, "step": 110520 }, { "epoch": 1.22, "learning_rate": 2.960311660318304e-05, "loss": 0.663, "step": 110525 }, { "epoch": 1.22, "learning_rate": 2.9602193876044527e-05, "loss": 0.6908, "step": 110530 }, { "epoch": 1.22, "learning_rate": 2.960127114890602e-05, "loss": 0.6626, "step": 110535 }, { "epoch": 1.22, "learning_rate": 2.9600348421767503e-05, "loss": 0.6525, "step": 110540 }, { "epoch": 1.22, "learning_rate": 2.959942569462899e-05, "loss": 0.6549, "step": 110545 }, { "epoch": 1.22, "learning_rate": 2.9598502967490475e-05, "loss": 0.6528, "step": 110550 }, { "epoch": 1.22, "learning_rate": 2.959758024035197e-05, "loss": 0.6877, "step": 110555 }, { "epoch": 1.22, "learning_rate": 2.9596657513213454e-05, "loss": 0.6852, "step": 110560 }, { "epoch": 1.22, "learning_rate": 2.9595734786074942e-05, "loss": 0.713, "step": 110565 }, { "epoch": 1.22, "learning_rate": 2.9594812058936427e-05, "loss": 0.6058, "step": 110570 }, { "epoch": 1.22, "learning_rate": 2.9593889331797918e-05, "loss": 0.6311, "step": 110575 }, { "epoch": 1.22, "learning_rate": 2.9592966604659406e-05, "loss": 0.6629, "step": 110580 }, { "epoch": 1.22, "learning_rate": 2.9592043877520893e-05, "loss": 0.6626, "step": 110585 }, { "epoch": 1.22, "learning_rate": 2.9591121150382378e-05, "loss": 0.6441, "step": 110590 }, { "epoch": 1.22, "learning_rate": 2.9590198423243866e-05, "loss": 0.6781, "step": 110595 }, { "epoch": 1.22, "learning_rate": 2.9589275696105357e-05, "loss": 0.6122, "step": 110600 }, { "epoch": 1.22, "learning_rate": 2.958835296896684e-05, "loss": 0.6553, "step": 110605 }, { "epoch": 1.22, "learning_rate": 2.958743024182833e-05, "loss": 0.706, "step": 110610 }, { "epoch": 1.22, "learning_rate": 2.9586507514689814e-05, "loss": 0.6392, "step": 110615 }, { "epoch": 1.22, "learning_rate": 2.958558478755131e-05, "loss": 0.6357, "step": 110620 }, { "epoch": 1.22, "learning_rate": 2.9584662060412793e-05, "loss": 0.638, "step": 110625 }, { "epoch": 1.22, "learning_rate": 2.958373933327428e-05, "loss": 0.649, "step": 110630 }, { "epoch": 1.23, "learning_rate": 2.9582816606135765e-05, "loss": 0.6244, "step": 110635 }, { "epoch": 1.23, "learning_rate": 2.9581893878997256e-05, "loss": 0.6636, "step": 110640 }, { "epoch": 1.23, "learning_rate": 2.9580971151858744e-05, "loss": 0.7038, "step": 110645 }, { "epoch": 1.23, "learning_rate": 2.958004842472023e-05, "loss": 0.6697, "step": 110650 }, { "epoch": 1.23, "learning_rate": 2.9579125697581717e-05, "loss": 0.6545, "step": 110655 }, { "epoch": 1.23, "learning_rate": 2.9578202970443208e-05, "loss": 0.6474, "step": 110660 }, { "epoch": 1.23, "learning_rate": 2.9577280243304696e-05, "loss": 0.6313, "step": 110665 }, { "epoch": 1.23, "learning_rate": 2.957635751616618e-05, "loss": 0.6761, "step": 110670 }, { "epoch": 1.23, "learning_rate": 2.9575434789027668e-05, "loss": 0.6566, "step": 110675 }, { "epoch": 1.23, "learning_rate": 2.9574512061889152e-05, "loss": 0.7038, "step": 110680 }, { "epoch": 1.23, "learning_rate": 2.9573589334750647e-05, "loss": 0.6109, "step": 110685 }, { "epoch": 1.23, "learning_rate": 2.957266660761213e-05, "loss": 0.6301, "step": 110690 }, { "epoch": 1.23, "learning_rate": 2.957174388047362e-05, "loss": 0.6422, "step": 110695 }, { "epoch": 1.23, "learning_rate": 2.9570821153335104e-05, "loss": 0.6558, "step": 110700 }, { "epoch": 1.23, "learning_rate": 2.9569898426196595e-05, "loss": 0.6581, "step": 110705 }, { "epoch": 1.23, "learning_rate": 2.9568975699058083e-05, "loss": 0.6696, "step": 110710 }, { "epoch": 1.23, "learning_rate": 2.9568052971919567e-05, "loss": 0.6519, "step": 110715 }, { "epoch": 1.23, "learning_rate": 2.9567130244781055e-05, "loss": 0.6171, "step": 110720 }, { "epoch": 1.23, "learning_rate": 2.9566207517642546e-05, "loss": 0.6302, "step": 110725 }, { "epoch": 1.23, "learning_rate": 2.9565284790504034e-05, "loss": 0.6715, "step": 110730 }, { "epoch": 1.23, "learning_rate": 2.956436206336552e-05, "loss": 0.661, "step": 110735 }, { "epoch": 1.23, "learning_rate": 2.9563439336227007e-05, "loss": 0.6643, "step": 110740 }, { "epoch": 1.23, "learning_rate": 2.956251660908849e-05, "loss": 0.6589, "step": 110745 }, { "epoch": 1.23, "learning_rate": 2.9561593881949982e-05, "loss": 0.6957, "step": 110750 }, { "epoch": 1.23, "learning_rate": 2.956067115481147e-05, "loss": 0.6985, "step": 110755 }, { "epoch": 1.23, "learning_rate": 2.9559748427672958e-05, "loss": 0.6419, "step": 110760 }, { "epoch": 1.23, "learning_rate": 2.9558825700534442e-05, "loss": 0.6027, "step": 110765 }, { "epoch": 1.23, "learning_rate": 2.9557902973395934e-05, "loss": 0.6942, "step": 110770 }, { "epoch": 1.23, "learning_rate": 2.955698024625742e-05, "loss": 0.643, "step": 110775 }, { "epoch": 1.23, "learning_rate": 2.9556057519118906e-05, "loss": 0.687, "step": 110780 }, { "epoch": 1.23, "learning_rate": 2.9555134791980394e-05, "loss": 0.6691, "step": 110785 }, { "epoch": 1.23, "learning_rate": 2.9554212064841885e-05, "loss": 0.6462, "step": 110790 }, { "epoch": 1.23, "learning_rate": 2.9553289337703373e-05, "loss": 0.6384, "step": 110795 }, { "epoch": 1.23, "learning_rate": 2.9552366610564857e-05, "loss": 0.6924, "step": 110800 }, { "epoch": 1.23, "learning_rate": 2.9551443883426345e-05, "loss": 0.6676, "step": 110805 }, { "epoch": 1.23, "learning_rate": 2.9550521156287837e-05, "loss": 0.6669, "step": 110810 }, { "epoch": 1.23, "learning_rate": 2.954959842914932e-05, "loss": 0.6788, "step": 110815 }, { "epoch": 1.23, "learning_rate": 2.954867570201081e-05, "loss": 0.6765, "step": 110820 }, { "epoch": 1.23, "learning_rate": 2.9547752974872293e-05, "loss": 0.6829, "step": 110825 }, { "epoch": 1.23, "learning_rate": 2.954683024773378e-05, "loss": 0.6278, "step": 110830 }, { "epoch": 1.23, "learning_rate": 2.9545907520595272e-05, "loss": 0.6391, "step": 110835 }, { "epoch": 1.23, "learning_rate": 2.954498479345676e-05, "loss": 0.7416, "step": 110840 }, { "epoch": 1.23, "learning_rate": 2.9544062066318245e-05, "loss": 0.652, "step": 110845 }, { "epoch": 1.23, "learning_rate": 2.9543139339179733e-05, "loss": 0.6509, "step": 110850 }, { "epoch": 1.23, "learning_rate": 2.9542216612041224e-05, "loss": 0.6547, "step": 110855 }, { "epoch": 1.23, "learning_rate": 2.9541293884902708e-05, "loss": 0.6458, "step": 110860 }, { "epoch": 1.23, "learning_rate": 2.9540371157764196e-05, "loss": 0.66, "step": 110865 }, { "epoch": 1.23, "learning_rate": 2.9539448430625684e-05, "loss": 0.5982, "step": 110870 }, { "epoch": 1.23, "learning_rate": 2.9538525703487175e-05, "loss": 0.7228, "step": 110875 }, { "epoch": 1.23, "learning_rate": 2.953760297634866e-05, "loss": 0.6835, "step": 110880 }, { "epoch": 1.23, "learning_rate": 2.9536680249210147e-05, "loss": 0.6612, "step": 110885 }, { "epoch": 1.23, "learning_rate": 2.9535757522071632e-05, "loss": 0.6482, "step": 110890 }, { "epoch": 1.23, "learning_rate": 2.953483479493312e-05, "loss": 0.6941, "step": 110895 }, { "epoch": 1.23, "learning_rate": 2.953391206779461e-05, "loss": 0.6156, "step": 110900 }, { "epoch": 1.23, "learning_rate": 2.95329893406561e-05, "loss": 0.6224, "step": 110905 }, { "epoch": 1.23, "learning_rate": 2.9532066613517583e-05, "loss": 0.6444, "step": 110910 }, { "epoch": 1.23, "learning_rate": 2.953114388637907e-05, "loss": 0.6574, "step": 110915 }, { "epoch": 1.23, "learning_rate": 2.9530221159240562e-05, "loss": 0.6862, "step": 110920 }, { "epoch": 1.23, "learning_rate": 2.9529298432102047e-05, "loss": 0.6733, "step": 110925 }, { "epoch": 1.23, "learning_rate": 2.9528375704963535e-05, "loss": 0.711, "step": 110930 }, { "epoch": 1.23, "learning_rate": 2.952745297782502e-05, "loss": 0.6966, "step": 110935 }, { "epoch": 1.23, "learning_rate": 2.9526530250686514e-05, "loss": 0.6668, "step": 110940 }, { "epoch": 1.23, "learning_rate": 2.9525607523547998e-05, "loss": 0.6883, "step": 110945 }, { "epoch": 1.23, "learning_rate": 2.9524684796409486e-05, "loss": 0.7096, "step": 110950 }, { "epoch": 1.23, "learning_rate": 2.952376206927097e-05, "loss": 0.6294, "step": 110955 }, { "epoch": 1.23, "learning_rate": 2.9522839342132462e-05, "loss": 0.6687, "step": 110960 }, { "epoch": 1.23, "learning_rate": 2.952191661499395e-05, "loss": 0.7019, "step": 110965 }, { "epoch": 1.23, "learning_rate": 2.9520993887855438e-05, "loss": 0.6852, "step": 110970 }, { "epoch": 1.23, "learning_rate": 2.9520071160716922e-05, "loss": 0.6582, "step": 110975 }, { "epoch": 1.23, "learning_rate": 2.951914843357841e-05, "loss": 0.7348, "step": 110980 }, { "epoch": 1.23, "learning_rate": 2.95182257064399e-05, "loss": 0.6635, "step": 110985 }, { "epoch": 1.23, "learning_rate": 2.9517302979301386e-05, "loss": 0.6261, "step": 110990 }, { "epoch": 1.23, "learning_rate": 2.9516380252162873e-05, "loss": 0.6808, "step": 110995 }, { "epoch": 1.23, "learning_rate": 2.9515457525024358e-05, "loss": 0.6568, "step": 111000 }, { "epoch": 1.23, "eval_loss": 0.6265407204627991, "eval_runtime": 69.6661, "eval_samples_per_second": 28.708, "eval_steps_per_second": 14.354, "step": 111000 }, { "epoch": 1.23, "learning_rate": 2.9514534797885852e-05, "loss": 0.6539, "step": 111005 }, { "epoch": 1.23, "learning_rate": 2.9513612070747337e-05, "loss": 0.6512, "step": 111010 }, { "epoch": 1.23, "learning_rate": 2.9512689343608825e-05, "loss": 0.7045, "step": 111015 }, { "epoch": 1.23, "learning_rate": 2.951176661647031e-05, "loss": 0.6911, "step": 111020 }, { "epoch": 1.23, "learning_rate": 2.95108438893318e-05, "loss": 0.6477, "step": 111025 }, { "epoch": 1.23, "learning_rate": 2.950992116219329e-05, "loss": 0.6236, "step": 111030 }, { "epoch": 1.23, "learning_rate": 2.9508998435054773e-05, "loss": 0.6321, "step": 111035 }, { "epoch": 1.23, "learning_rate": 2.950807570791626e-05, "loss": 0.6111, "step": 111040 }, { "epoch": 1.23, "learning_rate": 2.950715298077775e-05, "loss": 0.7014, "step": 111045 }, { "epoch": 1.23, "learning_rate": 2.950623025363924e-05, "loss": 0.6546, "step": 111050 }, { "epoch": 1.23, "learning_rate": 2.9505307526500724e-05, "loss": 0.6544, "step": 111055 }, { "epoch": 1.23, "learning_rate": 2.9504384799362212e-05, "loss": 0.715, "step": 111060 }, { "epoch": 1.23, "learning_rate": 2.9503462072223696e-05, "loss": 0.6635, "step": 111065 }, { "epoch": 1.23, "learning_rate": 2.950253934508519e-05, "loss": 0.6911, "step": 111070 }, { "epoch": 1.23, "learning_rate": 2.9501616617946676e-05, "loss": 0.6554, "step": 111075 }, { "epoch": 1.23, "learning_rate": 2.9500693890808163e-05, "loss": 0.7093, "step": 111080 }, { "epoch": 1.23, "learning_rate": 2.9499771163669648e-05, "loss": 0.6202, "step": 111085 }, { "epoch": 1.23, "learning_rate": 2.949884843653114e-05, "loss": 0.6505, "step": 111090 }, { "epoch": 1.23, "learning_rate": 2.9497925709392627e-05, "loss": 0.6139, "step": 111095 }, { "epoch": 1.23, "learning_rate": 2.949700298225411e-05, "loss": 0.6443, "step": 111100 }, { "epoch": 1.23, "learning_rate": 2.94960802551156e-05, "loss": 0.6785, "step": 111105 }, { "epoch": 1.23, "learning_rate": 2.949515752797709e-05, "loss": 0.71, "step": 111110 }, { "epoch": 1.23, "learning_rate": 2.949423480083858e-05, "loss": 0.7079, "step": 111115 }, { "epoch": 1.23, "learning_rate": 2.9493312073700063e-05, "loss": 0.6876, "step": 111120 }, { "epoch": 1.23, "learning_rate": 2.949238934656155e-05, "loss": 0.6852, "step": 111125 }, { "epoch": 1.23, "learning_rate": 2.9491466619423035e-05, "loss": 0.6658, "step": 111130 }, { "epoch": 1.23, "learning_rate": 2.9490543892284526e-05, "loss": 0.6555, "step": 111135 }, { "epoch": 1.23, "learning_rate": 2.9489621165146014e-05, "loss": 0.73, "step": 111140 }, { "epoch": 1.23, "learning_rate": 2.9488698438007502e-05, "loss": 0.6328, "step": 111145 }, { "epoch": 1.23, "learning_rate": 2.9487775710868987e-05, "loss": 0.6528, "step": 111150 }, { "epoch": 1.23, "learning_rate": 2.9486852983730478e-05, "loss": 0.6189, "step": 111155 }, { "epoch": 1.23, "learning_rate": 2.9485930256591966e-05, "loss": 0.6456, "step": 111160 }, { "epoch": 1.23, "learning_rate": 2.948500752945345e-05, "loss": 0.7062, "step": 111165 }, { "epoch": 1.23, "learning_rate": 2.9484084802314938e-05, "loss": 0.6262, "step": 111170 }, { "epoch": 1.23, "learning_rate": 2.948316207517643e-05, "loss": 0.6168, "step": 111175 }, { "epoch": 1.23, "learning_rate": 2.9482239348037917e-05, "loss": 0.6241, "step": 111180 }, { "epoch": 1.23, "learning_rate": 2.94813166208994e-05, "loss": 0.7007, "step": 111185 }, { "epoch": 1.23, "learning_rate": 2.948039389376089e-05, "loss": 0.6696, "step": 111190 }, { "epoch": 1.23, "learning_rate": 2.9479471166622374e-05, "loss": 0.6107, "step": 111195 }, { "epoch": 1.23, "learning_rate": 2.9478548439483865e-05, "loss": 0.7079, "step": 111200 }, { "epoch": 1.23, "learning_rate": 2.9477625712345353e-05, "loss": 0.6758, "step": 111205 }, { "epoch": 1.23, "learning_rate": 2.9476702985206837e-05, "loss": 0.6863, "step": 111210 }, { "epoch": 1.23, "learning_rate": 2.9475780258068325e-05, "loss": 0.6668, "step": 111215 }, { "epoch": 1.23, "learning_rate": 2.9474857530929816e-05, "loss": 0.6576, "step": 111220 }, { "epoch": 1.23, "learning_rate": 2.9473934803791304e-05, "loss": 0.6937, "step": 111225 }, { "epoch": 1.23, "learning_rate": 2.947301207665279e-05, "loss": 0.6252, "step": 111230 }, { "epoch": 1.23, "learning_rate": 2.9472089349514277e-05, "loss": 0.6586, "step": 111235 }, { "epoch": 1.23, "learning_rate": 2.9471166622375768e-05, "loss": 0.6698, "step": 111240 }, { "epoch": 1.23, "learning_rate": 2.9470243895237252e-05, "loss": 0.6517, "step": 111245 }, { "epoch": 1.23, "learning_rate": 2.946932116809874e-05, "loss": 0.6123, "step": 111250 }, { "epoch": 1.23, "learning_rate": 2.9468398440960228e-05, "loss": 0.671, "step": 111255 }, { "epoch": 1.23, "learning_rate": 2.946747571382172e-05, "loss": 0.6816, "step": 111260 }, { "epoch": 1.23, "learning_rate": 2.9466552986683204e-05, "loss": 0.6449, "step": 111265 }, { "epoch": 1.23, "learning_rate": 2.946563025954469e-05, "loss": 0.6866, "step": 111270 }, { "epoch": 1.23, "learning_rate": 2.9464707532406176e-05, "loss": 0.6521, "step": 111275 }, { "epoch": 1.23, "learning_rate": 2.9463784805267664e-05, "loss": 0.6127, "step": 111280 }, { "epoch": 1.23, "learning_rate": 2.9462862078129155e-05, "loss": 0.6606, "step": 111285 }, { "epoch": 1.23, "learning_rate": 2.9461939350990643e-05, "loss": 0.7244, "step": 111290 }, { "epoch": 1.23, "learning_rate": 2.9461016623852127e-05, "loss": 0.6682, "step": 111295 }, { "epoch": 1.23, "learning_rate": 2.9460093896713615e-05, "loss": 0.7291, "step": 111300 }, { "epoch": 1.23, "learning_rate": 2.9459171169575106e-05, "loss": 0.6609, "step": 111305 }, { "epoch": 1.23, "learning_rate": 2.945824844243659e-05, "loss": 0.6172, "step": 111310 }, { "epoch": 1.23, "learning_rate": 2.945732571529808e-05, "loss": 0.6649, "step": 111315 }, { "epoch": 1.23, "learning_rate": 2.9456402988159563e-05, "loss": 0.7782, "step": 111320 }, { "epoch": 1.23, "learning_rate": 2.9455480261021058e-05, "loss": 0.6784, "step": 111325 }, { "epoch": 1.23, "learning_rate": 2.9454557533882542e-05, "loss": 0.7039, "step": 111330 }, { "epoch": 1.23, "learning_rate": 2.945363480674403e-05, "loss": 0.7134, "step": 111335 }, { "epoch": 1.23, "learning_rate": 2.9452712079605515e-05, "loss": 0.6549, "step": 111340 }, { "epoch": 1.23, "learning_rate": 2.9451789352467006e-05, "loss": 0.6751, "step": 111345 }, { "epoch": 1.23, "learning_rate": 2.9450866625328494e-05, "loss": 0.6835, "step": 111350 }, { "epoch": 1.23, "learning_rate": 2.944994389818998e-05, "loss": 0.6232, "step": 111355 }, { "epoch": 1.23, "learning_rate": 2.9449021171051466e-05, "loss": 0.6676, "step": 111360 }, { "epoch": 1.23, "learning_rate": 2.9448098443912954e-05, "loss": 0.7483, "step": 111365 }, { "epoch": 1.23, "learning_rate": 2.9447175716774445e-05, "loss": 0.6397, "step": 111370 }, { "epoch": 1.23, "learning_rate": 2.944625298963593e-05, "loss": 0.6281, "step": 111375 }, { "epoch": 1.23, "learning_rate": 2.9445330262497417e-05, "loss": 0.6471, "step": 111380 }, { "epoch": 1.23, "learning_rate": 2.9444407535358902e-05, "loss": 0.668, "step": 111385 }, { "epoch": 1.23, "learning_rate": 2.9443484808220396e-05, "loss": 0.6336, "step": 111390 }, { "epoch": 1.23, "learning_rate": 2.944256208108188e-05, "loss": 0.6272, "step": 111395 }, { "epoch": 1.23, "learning_rate": 2.944163935394337e-05, "loss": 0.6018, "step": 111400 }, { "epoch": 1.23, "learning_rate": 2.9440716626804853e-05, "loss": 0.6503, "step": 111405 }, { "epoch": 1.23, "learning_rate": 2.9439793899666344e-05, "loss": 0.65, "step": 111410 }, { "epoch": 1.23, "learning_rate": 2.9438871172527832e-05, "loss": 0.6513, "step": 111415 }, { "epoch": 1.23, "learning_rate": 2.9437948445389317e-05, "loss": 0.6698, "step": 111420 }, { "epoch": 1.23, "learning_rate": 2.9437025718250805e-05, "loss": 0.6851, "step": 111425 }, { "epoch": 1.23, "learning_rate": 2.9436102991112292e-05, "loss": 0.6557, "step": 111430 }, { "epoch": 1.23, "learning_rate": 2.9435180263973784e-05, "loss": 0.6752, "step": 111435 }, { "epoch": 1.23, "learning_rate": 2.9434257536835268e-05, "loss": 0.6428, "step": 111440 }, { "epoch": 1.23, "learning_rate": 2.9433334809696756e-05, "loss": 0.6313, "step": 111445 }, { "epoch": 1.23, "learning_rate": 2.943241208255824e-05, "loss": 0.6266, "step": 111450 }, { "epoch": 1.23, "learning_rate": 2.9431489355419735e-05, "loss": 0.6946, "step": 111455 }, { "epoch": 1.23, "learning_rate": 2.943056662828122e-05, "loss": 0.6774, "step": 111460 }, { "epoch": 1.23, "learning_rate": 2.9429643901142707e-05, "loss": 0.7355, "step": 111465 }, { "epoch": 1.23, "learning_rate": 2.9428721174004192e-05, "loss": 0.6663, "step": 111470 }, { "epoch": 1.23, "learning_rate": 2.9427798446865683e-05, "loss": 0.6152, "step": 111475 }, { "epoch": 1.23, "learning_rate": 2.942687571972717e-05, "loss": 0.6221, "step": 111480 }, { "epoch": 1.23, "learning_rate": 2.9425952992588655e-05, "loss": 0.6402, "step": 111485 }, { "epoch": 1.23, "learning_rate": 2.9425030265450143e-05, "loss": 0.6518, "step": 111490 }, { "epoch": 1.23, "learning_rate": 2.9424107538311635e-05, "loss": 0.6115, "step": 111495 }, { "epoch": 1.23, "learning_rate": 2.9423184811173122e-05, "loss": 0.6364, "step": 111500 }, { "epoch": 1.23, "learning_rate": 2.9422262084034607e-05, "loss": 0.6163, "step": 111505 }, { "epoch": 1.23, "learning_rate": 2.9421339356896095e-05, "loss": 0.6538, "step": 111510 }, { "epoch": 1.23, "learning_rate": 2.942041662975758e-05, "loss": 0.7308, "step": 111515 }, { "epoch": 1.23, "learning_rate": 2.941949390261907e-05, "loss": 0.6819, "step": 111520 }, { "epoch": 1.23, "learning_rate": 2.9418571175480558e-05, "loss": 0.6363, "step": 111525 }, { "epoch": 1.23, "learning_rate": 2.9417648448342046e-05, "loss": 0.722, "step": 111530 }, { "epoch": 1.23, "learning_rate": 2.941672572120353e-05, "loss": 0.668, "step": 111535 }, { "epoch": 1.24, "learning_rate": 2.9415802994065022e-05, "loss": 0.6363, "step": 111540 }, { "epoch": 1.24, "learning_rate": 2.941488026692651e-05, "loss": 0.6582, "step": 111545 }, { "epoch": 1.24, "learning_rate": 2.9413957539787994e-05, "loss": 0.6757, "step": 111550 }, { "epoch": 1.24, "learning_rate": 2.9413034812649482e-05, "loss": 0.6512, "step": 111555 }, { "epoch": 1.24, "learning_rate": 2.9412112085510973e-05, "loss": 0.6206, "step": 111560 }, { "epoch": 1.24, "learning_rate": 2.941118935837246e-05, "loss": 0.6598, "step": 111565 }, { "epoch": 1.24, "learning_rate": 2.9410266631233945e-05, "loss": 0.6006, "step": 111570 }, { "epoch": 1.24, "learning_rate": 2.9409343904095433e-05, "loss": 0.6455, "step": 111575 }, { "epoch": 1.24, "learning_rate": 2.9408421176956918e-05, "loss": 0.6416, "step": 111580 }, { "epoch": 1.24, "learning_rate": 2.940749844981841e-05, "loss": 0.6479, "step": 111585 }, { "epoch": 1.24, "learning_rate": 2.9406575722679897e-05, "loss": 0.6537, "step": 111590 }, { "epoch": 1.24, "learning_rate": 2.940565299554138e-05, "loss": 0.6636, "step": 111595 }, { "epoch": 1.24, "learning_rate": 2.940473026840287e-05, "loss": 0.6853, "step": 111600 }, { "epoch": 1.24, "learning_rate": 2.940380754126436e-05, "loss": 0.6117, "step": 111605 }, { "epoch": 1.24, "learning_rate": 2.9402884814125848e-05, "loss": 0.6787, "step": 111610 }, { "epoch": 1.24, "learning_rate": 2.9401962086987333e-05, "loss": 0.6843, "step": 111615 }, { "epoch": 1.24, "learning_rate": 2.940103935984882e-05, "loss": 0.6577, "step": 111620 }, { "epoch": 1.24, "learning_rate": 2.9400116632710312e-05, "loss": 0.673, "step": 111625 }, { "epoch": 1.24, "learning_rate": 2.93991939055718e-05, "loss": 0.6682, "step": 111630 }, { "epoch": 1.24, "learning_rate": 2.9398271178433284e-05, "loss": 0.7036, "step": 111635 }, { "epoch": 1.24, "learning_rate": 2.9397348451294772e-05, "loss": 0.6768, "step": 111640 }, { "epoch": 1.24, "learning_rate": 2.9396425724156263e-05, "loss": 0.6358, "step": 111645 }, { "epoch": 1.24, "learning_rate": 2.9395502997017748e-05, "loss": 0.6989, "step": 111650 }, { "epoch": 1.24, "learning_rate": 2.9394580269879236e-05, "loss": 0.636, "step": 111655 }, { "epoch": 1.24, "learning_rate": 2.939365754274072e-05, "loss": 0.6408, "step": 111660 }, { "epoch": 1.24, "learning_rate": 2.9392734815602208e-05, "loss": 0.6384, "step": 111665 }, { "epoch": 1.24, "learning_rate": 2.93918120884637e-05, "loss": 0.7176, "step": 111670 }, { "epoch": 1.24, "learning_rate": 2.9390889361325187e-05, "loss": 0.6404, "step": 111675 }, { "epoch": 1.24, "learning_rate": 2.938996663418667e-05, "loss": 0.6301, "step": 111680 }, { "epoch": 1.24, "learning_rate": 2.938904390704816e-05, "loss": 0.6882, "step": 111685 }, { "epoch": 1.24, "learning_rate": 2.938812117990965e-05, "loss": 0.6328, "step": 111690 }, { "epoch": 1.24, "learning_rate": 2.9387198452771135e-05, "loss": 0.6772, "step": 111695 }, { "epoch": 1.24, "learning_rate": 2.9386275725632623e-05, "loss": 0.6909, "step": 111700 }, { "epoch": 1.24, "learning_rate": 2.938535299849411e-05, "loss": 0.7086, "step": 111705 }, { "epoch": 1.24, "learning_rate": 2.9384430271355602e-05, "loss": 0.7507, "step": 111710 }, { "epoch": 1.24, "learning_rate": 2.9383507544217086e-05, "loss": 0.6644, "step": 111715 }, { "epoch": 1.24, "learning_rate": 2.9382584817078574e-05, "loss": 0.6582, "step": 111720 }, { "epoch": 1.24, "learning_rate": 2.938166208994006e-05, "loss": 0.7122, "step": 111725 }, { "epoch": 1.24, "learning_rate": 2.9380739362801546e-05, "loss": 0.6298, "step": 111730 }, { "epoch": 1.24, "learning_rate": 2.9379816635663038e-05, "loss": 0.6648, "step": 111735 }, { "epoch": 1.24, "learning_rate": 2.9378893908524526e-05, "loss": 0.6561, "step": 111740 }, { "epoch": 1.24, "learning_rate": 2.937797118138601e-05, "loss": 0.6116, "step": 111745 }, { "epoch": 1.24, "learning_rate": 2.9377048454247498e-05, "loss": 0.6154, "step": 111750 }, { "epoch": 1.24, "learning_rate": 2.937612572710899e-05, "loss": 0.627, "step": 111755 }, { "epoch": 1.24, "learning_rate": 2.9375202999970474e-05, "loss": 0.5834, "step": 111760 }, { "epoch": 1.24, "learning_rate": 2.937428027283196e-05, "loss": 0.6468, "step": 111765 }, { "epoch": 1.24, "learning_rate": 2.9373357545693446e-05, "loss": 0.625, "step": 111770 }, { "epoch": 1.24, "learning_rate": 2.937243481855494e-05, "loss": 0.6656, "step": 111775 }, { "epoch": 1.24, "learning_rate": 2.9371512091416425e-05, "loss": 0.651, "step": 111780 }, { "epoch": 1.24, "learning_rate": 2.9370589364277913e-05, "loss": 0.7388, "step": 111785 }, { "epoch": 1.24, "learning_rate": 2.9369666637139397e-05, "loss": 0.6284, "step": 111790 }, { "epoch": 1.24, "learning_rate": 2.936874391000089e-05, "loss": 0.6849, "step": 111795 }, { "epoch": 1.24, "learning_rate": 2.9367821182862376e-05, "loss": 0.6653, "step": 111800 }, { "epoch": 1.24, "learning_rate": 2.936689845572386e-05, "loss": 0.6561, "step": 111805 }, { "epoch": 1.24, "learning_rate": 2.936597572858535e-05, "loss": 0.6697, "step": 111810 }, { "epoch": 1.24, "learning_rate": 2.9365053001446837e-05, "loss": 0.662, "step": 111815 }, { "epoch": 1.24, "learning_rate": 2.9364130274308328e-05, "loss": 0.6888, "step": 111820 }, { "epoch": 1.24, "learning_rate": 2.9363207547169812e-05, "loss": 0.7237, "step": 111825 }, { "epoch": 1.24, "learning_rate": 2.93622848200313e-05, "loss": 0.6562, "step": 111830 }, { "epoch": 1.24, "learning_rate": 2.9361362092892785e-05, "loss": 0.642, "step": 111835 }, { "epoch": 1.24, "learning_rate": 2.936043936575428e-05, "loss": 0.6922, "step": 111840 }, { "epoch": 1.24, "learning_rate": 2.9359516638615764e-05, "loss": 0.742, "step": 111845 }, { "epoch": 1.24, "learning_rate": 2.935859391147725e-05, "loss": 0.6543, "step": 111850 }, { "epoch": 1.24, "learning_rate": 2.9357671184338736e-05, "loss": 0.6489, "step": 111855 }, { "epoch": 1.24, "learning_rate": 2.9356748457200227e-05, "loss": 0.6317, "step": 111860 }, { "epoch": 1.24, "learning_rate": 2.9355825730061715e-05, "loss": 0.7168, "step": 111865 }, { "epoch": 1.24, "learning_rate": 2.93549030029232e-05, "loss": 0.6246, "step": 111870 }, { "epoch": 1.24, "learning_rate": 2.9353980275784687e-05, "loss": 0.6328, "step": 111875 }, { "epoch": 1.24, "learning_rate": 2.9353057548646172e-05, "loss": 0.6403, "step": 111880 }, { "epoch": 1.24, "learning_rate": 2.9352134821507666e-05, "loss": 0.6981, "step": 111885 }, { "epoch": 1.24, "learning_rate": 2.935121209436915e-05, "loss": 0.6514, "step": 111890 }, { "epoch": 1.24, "learning_rate": 2.935028936723064e-05, "loss": 0.6259, "step": 111895 }, { "epoch": 1.24, "learning_rate": 2.9349366640092123e-05, "loss": 0.7114, "step": 111900 }, { "epoch": 1.24, "learning_rate": 2.9348443912953614e-05, "loss": 0.7079, "step": 111905 }, { "epoch": 1.24, "learning_rate": 2.9347521185815102e-05, "loss": 0.6828, "step": 111910 }, { "epoch": 1.24, "learning_rate": 2.934659845867659e-05, "loss": 0.6878, "step": 111915 }, { "epoch": 1.24, "learning_rate": 2.9345675731538075e-05, "loss": 0.6396, "step": 111920 }, { "epoch": 1.24, "learning_rate": 2.9344753004399566e-05, "loss": 0.6272, "step": 111925 }, { "epoch": 1.24, "learning_rate": 2.9343830277261054e-05, "loss": 0.6754, "step": 111930 }, { "epoch": 1.24, "learning_rate": 2.9342907550122538e-05, "loss": 0.6933, "step": 111935 }, { "epoch": 1.24, "learning_rate": 2.9341984822984026e-05, "loss": 0.6564, "step": 111940 }, { "epoch": 1.24, "learning_rate": 2.9341062095845517e-05, "loss": 0.6519, "step": 111945 }, { "epoch": 1.24, "learning_rate": 2.9340139368707005e-05, "loss": 0.6758, "step": 111950 }, { "epoch": 1.24, "learning_rate": 2.933921664156849e-05, "loss": 0.6237, "step": 111955 }, { "epoch": 1.24, "learning_rate": 2.9338293914429977e-05, "loss": 0.6558, "step": 111960 }, { "epoch": 1.24, "learning_rate": 2.9337371187291462e-05, "loss": 0.6093, "step": 111965 }, { "epoch": 1.24, "learning_rate": 2.9336448460152953e-05, "loss": 0.6501, "step": 111970 }, { "epoch": 1.24, "learning_rate": 2.933552573301444e-05, "loss": 0.6567, "step": 111975 }, { "epoch": 1.24, "learning_rate": 2.9334603005875925e-05, "loss": 0.6695, "step": 111980 }, { "epoch": 1.24, "learning_rate": 2.9333680278737413e-05, "loss": 0.6847, "step": 111985 }, { "epoch": 1.24, "learning_rate": 2.9332757551598904e-05, "loss": 0.6248, "step": 111990 }, { "epoch": 1.24, "learning_rate": 2.9331834824460392e-05, "loss": 0.6956, "step": 111995 }, { "epoch": 1.24, "learning_rate": 2.9330912097321877e-05, "loss": 0.705, "step": 112000 }, { "epoch": 1.24, "eval_loss": 0.6280691623687744, "eval_runtime": 69.7558, "eval_samples_per_second": 28.671, "eval_steps_per_second": 14.336, "step": 112000 }, { "epoch": 1.24, "learning_rate": 2.9329989370183365e-05, "loss": 0.679, "step": 112005 }, { "epoch": 1.24, "learning_rate": 2.9329066643044856e-05, "loss": 0.6818, "step": 112010 }, { "epoch": 1.24, "learning_rate": 2.9328143915906344e-05, "loss": 0.6736, "step": 112015 }, { "epoch": 1.24, "learning_rate": 2.9327221188767828e-05, "loss": 0.6855, "step": 112020 }, { "epoch": 1.24, "learning_rate": 2.9326298461629316e-05, "loss": 0.6319, "step": 112025 }, { "epoch": 1.24, "learning_rate": 2.93253757344908e-05, "loss": 0.6596, "step": 112030 }, { "epoch": 1.24, "learning_rate": 2.932445300735229e-05, "loss": 0.6063, "step": 112035 }, { "epoch": 1.24, "learning_rate": 2.932353028021378e-05, "loss": 0.6544, "step": 112040 }, { "epoch": 1.24, "learning_rate": 2.9322607553075264e-05, "loss": 0.6754, "step": 112045 }, { "epoch": 1.24, "learning_rate": 2.9321684825936752e-05, "loss": 0.6828, "step": 112050 }, { "epoch": 1.24, "learning_rate": 2.9320762098798243e-05, "loss": 0.7087, "step": 112055 }, { "epoch": 1.24, "learning_rate": 2.931983937165973e-05, "loss": 0.6746, "step": 112060 }, { "epoch": 1.24, "learning_rate": 2.9318916644521215e-05, "loss": 0.6039, "step": 112065 }, { "epoch": 1.24, "learning_rate": 2.9317993917382703e-05, "loss": 0.5809, "step": 112070 }, { "epoch": 1.24, "learning_rate": 2.9317071190244194e-05, "loss": 0.6383, "step": 112075 }, { "epoch": 1.24, "learning_rate": 2.931614846310568e-05, "loss": 0.6828, "step": 112080 }, { "epoch": 1.24, "learning_rate": 2.9315225735967167e-05, "loss": 0.6931, "step": 112085 }, { "epoch": 1.24, "learning_rate": 2.9314303008828655e-05, "loss": 0.6357, "step": 112090 }, { "epoch": 1.24, "learning_rate": 2.9313380281690146e-05, "loss": 0.6816, "step": 112095 }, { "epoch": 1.24, "learning_rate": 2.931245755455163e-05, "loss": 0.6882, "step": 112100 }, { "epoch": 1.24, "learning_rate": 2.9311534827413118e-05, "loss": 0.6509, "step": 112105 }, { "epoch": 1.24, "learning_rate": 2.9310612100274603e-05, "loss": 0.645, "step": 112110 }, { "epoch": 1.24, "learning_rate": 2.930968937313609e-05, "loss": 0.6817, "step": 112115 }, { "epoch": 1.24, "learning_rate": 2.9308766645997582e-05, "loss": 0.6645, "step": 112120 }, { "epoch": 1.24, "learning_rate": 2.930784391885907e-05, "loss": 0.6411, "step": 112125 }, { "epoch": 1.24, "learning_rate": 2.9306921191720554e-05, "loss": 0.6846, "step": 112130 }, { "epoch": 1.24, "learning_rate": 2.9305998464582042e-05, "loss": 0.5909, "step": 112135 }, { "epoch": 1.24, "learning_rate": 2.9305075737443533e-05, "loss": 0.695, "step": 112140 }, { "epoch": 1.24, "learning_rate": 2.9304153010305018e-05, "loss": 0.6845, "step": 112145 }, { "epoch": 1.24, "learning_rate": 2.9303230283166505e-05, "loss": 0.6541, "step": 112150 }, { "epoch": 1.24, "learning_rate": 2.930230755602799e-05, "loss": 0.7028, "step": 112155 }, { "epoch": 1.24, "learning_rate": 2.9301384828889485e-05, "loss": 0.6155, "step": 112160 }, { "epoch": 1.24, "learning_rate": 2.930046210175097e-05, "loss": 0.6447, "step": 112165 }, { "epoch": 1.24, "learning_rate": 2.9299539374612457e-05, "loss": 0.6859, "step": 112170 }, { "epoch": 1.24, "learning_rate": 2.929861664747394e-05, "loss": 0.6367, "step": 112175 }, { "epoch": 1.24, "learning_rate": 2.9297693920335433e-05, "loss": 0.638, "step": 112180 }, { "epoch": 1.24, "learning_rate": 2.929677119319692e-05, "loss": 0.6406, "step": 112185 }, { "epoch": 1.24, "learning_rate": 2.9295848466058405e-05, "loss": 0.5742, "step": 112190 }, { "epoch": 1.24, "learning_rate": 2.9294925738919893e-05, "loss": 0.6254, "step": 112195 }, { "epoch": 1.24, "learning_rate": 2.929400301178138e-05, "loss": 0.6342, "step": 112200 }, { "epoch": 1.24, "learning_rate": 2.9293080284642872e-05, "loss": 0.6155, "step": 112205 }, { "epoch": 1.24, "learning_rate": 2.9292157557504356e-05, "loss": 0.7348, "step": 112210 }, { "epoch": 1.24, "learning_rate": 2.9291234830365844e-05, "loss": 0.7383, "step": 112215 }, { "epoch": 1.24, "learning_rate": 2.929031210322733e-05, "loss": 0.6297, "step": 112220 }, { "epoch": 1.24, "learning_rate": 2.9289389376088823e-05, "loss": 0.6778, "step": 112225 }, { "epoch": 1.24, "learning_rate": 2.9288466648950308e-05, "loss": 0.6601, "step": 112230 }, { "epoch": 1.24, "learning_rate": 2.9287543921811795e-05, "loss": 0.6003, "step": 112235 }, { "epoch": 1.24, "learning_rate": 2.928662119467328e-05, "loss": 0.6164, "step": 112240 }, { "epoch": 1.24, "learning_rate": 2.928569846753477e-05, "loss": 0.7187, "step": 112245 }, { "epoch": 1.24, "learning_rate": 2.928477574039626e-05, "loss": 0.6663, "step": 112250 }, { "epoch": 1.24, "learning_rate": 2.9283853013257743e-05, "loss": 0.687, "step": 112255 }, { "epoch": 1.24, "learning_rate": 2.928293028611923e-05, "loss": 0.6518, "step": 112260 }, { "epoch": 1.24, "learning_rate": 2.9282007558980716e-05, "loss": 0.6199, "step": 112265 }, { "epoch": 1.24, "learning_rate": 2.928108483184221e-05, "loss": 0.7353, "step": 112270 }, { "epoch": 1.24, "learning_rate": 2.9280162104703695e-05, "loss": 0.7312, "step": 112275 }, { "epoch": 1.24, "learning_rate": 2.9279239377565183e-05, "loss": 0.7408, "step": 112280 }, { "epoch": 1.24, "learning_rate": 2.9278316650426667e-05, "loss": 0.6551, "step": 112285 }, { "epoch": 1.24, "learning_rate": 2.927739392328816e-05, "loss": 0.6843, "step": 112290 }, { "epoch": 1.24, "learning_rate": 2.9276471196149646e-05, "loss": 0.602, "step": 112295 }, { "epoch": 1.24, "learning_rate": 2.9275548469011134e-05, "loss": 0.6649, "step": 112300 }, { "epoch": 1.24, "learning_rate": 2.927462574187262e-05, "loss": 0.6625, "step": 112305 }, { "epoch": 1.24, "learning_rate": 2.927370301473411e-05, "loss": 0.6726, "step": 112310 }, { "epoch": 1.24, "learning_rate": 2.9272780287595598e-05, "loss": 0.6482, "step": 112315 }, { "epoch": 1.24, "learning_rate": 2.9271857560457082e-05, "loss": 0.7041, "step": 112320 }, { "epoch": 1.24, "learning_rate": 2.927093483331857e-05, "loss": 0.6542, "step": 112325 }, { "epoch": 1.24, "learning_rate": 2.927001210618006e-05, "loss": 0.6868, "step": 112330 }, { "epoch": 1.24, "learning_rate": 2.926908937904155e-05, "loss": 0.6315, "step": 112335 }, { "epoch": 1.24, "learning_rate": 2.9268166651903034e-05, "loss": 0.6923, "step": 112340 }, { "epoch": 1.24, "learning_rate": 2.926724392476452e-05, "loss": 0.6551, "step": 112345 }, { "epoch": 1.24, "learning_rate": 2.9266321197626006e-05, "loss": 0.6433, "step": 112350 }, { "epoch": 1.24, "learning_rate": 2.9265398470487497e-05, "loss": 0.6827, "step": 112355 }, { "epoch": 1.24, "learning_rate": 2.9264475743348985e-05, "loss": 0.7369, "step": 112360 }, { "epoch": 1.24, "learning_rate": 2.926355301621047e-05, "loss": 0.5736, "step": 112365 }, { "epoch": 1.24, "learning_rate": 2.9262630289071957e-05, "loss": 0.6419, "step": 112370 }, { "epoch": 1.24, "learning_rate": 2.926170756193345e-05, "loss": 0.6973, "step": 112375 }, { "epoch": 1.24, "learning_rate": 2.9260784834794936e-05, "loss": 0.6243, "step": 112380 }, { "epoch": 1.24, "learning_rate": 2.925986210765642e-05, "loss": 0.6874, "step": 112385 }, { "epoch": 1.24, "learning_rate": 2.925893938051791e-05, "loss": 0.6582, "step": 112390 }, { "epoch": 1.24, "learning_rate": 2.92580166533794e-05, "loss": 0.6484, "step": 112395 }, { "epoch": 1.24, "learning_rate": 2.9257093926240888e-05, "loss": 0.6399, "step": 112400 }, { "epoch": 1.24, "learning_rate": 2.9256171199102372e-05, "loss": 0.7172, "step": 112405 }, { "epoch": 1.24, "learning_rate": 2.925524847196386e-05, "loss": 0.7036, "step": 112410 }, { "epoch": 1.24, "learning_rate": 2.9254325744825344e-05, "loss": 0.666, "step": 112415 }, { "epoch": 1.24, "learning_rate": 2.9253403017686836e-05, "loss": 0.6568, "step": 112420 }, { "epoch": 1.24, "learning_rate": 2.9252480290548324e-05, "loss": 0.6264, "step": 112425 }, { "epoch": 1.24, "learning_rate": 2.9251557563409808e-05, "loss": 0.6838, "step": 112430 }, { "epoch": 1.24, "learning_rate": 2.9250634836271296e-05, "loss": 0.6477, "step": 112435 }, { "epoch": 1.25, "learning_rate": 2.9249712109132787e-05, "loss": 0.7083, "step": 112440 }, { "epoch": 1.25, "learning_rate": 2.9248789381994275e-05, "loss": 0.649, "step": 112445 }, { "epoch": 1.25, "learning_rate": 2.924786665485576e-05, "loss": 0.6499, "step": 112450 }, { "epoch": 1.25, "learning_rate": 2.9246943927717247e-05, "loss": 0.6284, "step": 112455 }, { "epoch": 1.25, "learning_rate": 2.924602120057874e-05, "loss": 0.6817, "step": 112460 }, { "epoch": 1.25, "learning_rate": 2.9245098473440223e-05, "loss": 0.6774, "step": 112465 }, { "epoch": 1.25, "learning_rate": 2.924417574630171e-05, "loss": 0.644, "step": 112470 }, { "epoch": 1.25, "learning_rate": 2.92432530191632e-05, "loss": 0.6544, "step": 112475 }, { "epoch": 1.25, "learning_rate": 2.924233029202469e-05, "loss": 0.6511, "step": 112480 }, { "epoch": 1.25, "learning_rate": 2.9241407564886174e-05, "loss": 0.6619, "step": 112485 }, { "epoch": 1.25, "learning_rate": 2.9240484837747662e-05, "loss": 0.6213, "step": 112490 }, { "epoch": 1.25, "learning_rate": 2.9239562110609147e-05, "loss": 0.6772, "step": 112495 }, { "epoch": 1.25, "learning_rate": 2.9238639383470635e-05, "loss": 0.6194, "step": 112500 }, { "epoch": 1.25, "learning_rate": 2.9237716656332126e-05, "loss": 0.6789, "step": 112505 }, { "epoch": 1.25, "learning_rate": 2.9236793929193614e-05, "loss": 0.7005, "step": 112510 }, { "epoch": 1.25, "learning_rate": 2.9235871202055098e-05, "loss": 0.6714, "step": 112515 }, { "epoch": 1.25, "learning_rate": 2.9234948474916586e-05, "loss": 0.7131, "step": 112520 }, { "epoch": 1.25, "learning_rate": 2.9234025747778077e-05, "loss": 0.6862, "step": 112525 }, { "epoch": 1.25, "learning_rate": 2.923310302063956e-05, "loss": 0.6004, "step": 112530 }, { "epoch": 1.25, "learning_rate": 2.923218029350105e-05, "loss": 0.7046, "step": 112535 }, { "epoch": 1.25, "learning_rate": 2.9231257566362534e-05, "loss": 0.6862, "step": 112540 }, { "epoch": 1.25, "learning_rate": 2.923033483922403e-05, "loss": 0.636, "step": 112545 }, { "epoch": 1.25, "learning_rate": 2.9229412112085513e-05, "loss": 0.6758, "step": 112550 }, { "epoch": 1.25, "learning_rate": 2.9228489384947e-05, "loss": 0.6582, "step": 112555 }, { "epoch": 1.25, "learning_rate": 2.9227566657808485e-05, "loss": 0.6706, "step": 112560 }, { "epoch": 1.25, "learning_rate": 2.9226643930669973e-05, "loss": 0.6563, "step": 112565 }, { "epoch": 1.25, "learning_rate": 2.9225721203531464e-05, "loss": 0.637, "step": 112570 }, { "epoch": 1.25, "learning_rate": 2.9224798476392952e-05, "loss": 0.7113, "step": 112575 }, { "epoch": 1.25, "learning_rate": 2.9223875749254437e-05, "loss": 0.6431, "step": 112580 }, { "epoch": 1.25, "learning_rate": 2.9222953022115925e-05, "loss": 0.667, "step": 112585 }, { "epoch": 1.25, "learning_rate": 2.9222030294977416e-05, "loss": 0.6326, "step": 112590 }, { "epoch": 1.25, "learning_rate": 2.92211075678389e-05, "loss": 0.6706, "step": 112595 }, { "epoch": 1.25, "learning_rate": 2.9220184840700388e-05, "loss": 0.6307, "step": 112600 }, { "epoch": 1.25, "learning_rate": 2.9219262113561873e-05, "loss": 0.6885, "step": 112605 }, { "epoch": 1.25, "learning_rate": 2.9218339386423367e-05, "loss": 0.5912, "step": 112610 }, { "epoch": 1.25, "learning_rate": 2.921741665928485e-05, "loss": 0.6701, "step": 112615 }, { "epoch": 1.25, "learning_rate": 2.921649393214634e-05, "loss": 0.6448, "step": 112620 }, { "epoch": 1.25, "learning_rate": 2.9215571205007824e-05, "loss": 0.6362, "step": 112625 }, { "epoch": 1.25, "learning_rate": 2.9214648477869315e-05, "loss": 0.6621, "step": 112630 }, { "epoch": 1.25, "learning_rate": 2.9213725750730803e-05, "loss": 0.6777, "step": 112635 }, { "epoch": 1.25, "learning_rate": 2.9212803023592287e-05, "loss": 0.7083, "step": 112640 }, { "epoch": 1.25, "learning_rate": 2.9211880296453775e-05, "loss": 0.6322, "step": 112645 }, { "epoch": 1.25, "learning_rate": 2.9210957569315263e-05, "loss": 0.6931, "step": 112650 }, { "epoch": 1.25, "learning_rate": 2.9210034842176754e-05, "loss": 0.6545, "step": 112655 }, { "epoch": 1.25, "learning_rate": 2.920911211503824e-05, "loss": 0.6294, "step": 112660 }, { "epoch": 1.25, "learning_rate": 2.9208189387899727e-05, "loss": 0.6686, "step": 112665 }, { "epoch": 1.25, "learning_rate": 2.920726666076121e-05, "loss": 0.6883, "step": 112670 }, { "epoch": 1.25, "learning_rate": 2.9206343933622702e-05, "loss": 0.7054, "step": 112675 }, { "epoch": 1.25, "learning_rate": 2.920542120648419e-05, "loss": 0.7036, "step": 112680 }, { "epoch": 1.25, "learning_rate": 2.9204498479345678e-05, "loss": 0.626, "step": 112685 }, { "epoch": 1.25, "learning_rate": 2.9203575752207163e-05, "loss": 0.6303, "step": 112690 }, { "epoch": 1.25, "learning_rate": 2.9202653025068654e-05, "loss": 0.6697, "step": 112695 }, { "epoch": 1.25, "learning_rate": 2.920173029793014e-05, "loss": 0.661, "step": 112700 }, { "epoch": 1.25, "learning_rate": 2.9200807570791626e-05, "loss": 0.6731, "step": 112705 }, { "epoch": 1.25, "learning_rate": 2.9199884843653114e-05, "loss": 0.656, "step": 112710 }, { "epoch": 1.25, "learning_rate": 2.91989621165146e-05, "loss": 0.7054, "step": 112715 }, { "epoch": 1.25, "learning_rate": 2.9198039389376093e-05, "loss": 0.6544, "step": 112720 }, { "epoch": 1.25, "learning_rate": 2.9197116662237578e-05, "loss": 0.6663, "step": 112725 }, { "epoch": 1.25, "learning_rate": 2.9196193935099065e-05, "loss": 0.6363, "step": 112730 }, { "epoch": 1.25, "learning_rate": 2.919527120796055e-05, "loss": 0.6424, "step": 112735 }, { "epoch": 1.25, "learning_rate": 2.919434848082204e-05, "loss": 0.6538, "step": 112740 }, { "epoch": 1.25, "learning_rate": 2.919342575368353e-05, "loss": 0.7099, "step": 112745 }, { "epoch": 1.25, "learning_rate": 2.9192503026545013e-05, "loss": 0.7623, "step": 112750 }, { "epoch": 1.25, "learning_rate": 2.91915802994065e-05, "loss": 0.5958, "step": 112755 }, { "epoch": 1.25, "learning_rate": 2.9190657572267992e-05, "loss": 0.6145, "step": 112760 }, { "epoch": 1.25, "learning_rate": 2.918973484512948e-05, "loss": 0.6087, "step": 112765 }, { "epoch": 1.25, "learning_rate": 2.9188812117990965e-05, "loss": 0.6184, "step": 112770 }, { "epoch": 1.25, "learning_rate": 2.9187889390852453e-05, "loss": 0.6735, "step": 112775 }, { "epoch": 1.25, "learning_rate": 2.9186966663713944e-05, "loss": 0.6216, "step": 112780 }, { "epoch": 1.25, "learning_rate": 2.9186043936575432e-05, "loss": 0.7017, "step": 112785 }, { "epoch": 1.25, "learning_rate": 2.9185121209436916e-05, "loss": 0.6138, "step": 112790 }, { "epoch": 1.25, "learning_rate": 2.9184198482298404e-05, "loss": 0.6678, "step": 112795 }, { "epoch": 1.25, "learning_rate": 2.918327575515989e-05, "loss": 0.6548, "step": 112800 }, { "epoch": 1.25, "learning_rate": 2.918235302802138e-05, "loss": 0.67, "step": 112805 }, { "epoch": 1.25, "learning_rate": 2.9181430300882868e-05, "loss": 0.7073, "step": 112810 }, { "epoch": 1.25, "learning_rate": 2.9180507573744352e-05, "loss": 0.687, "step": 112815 }, { "epoch": 1.25, "learning_rate": 2.917958484660584e-05, "loss": 0.6764, "step": 112820 }, { "epoch": 1.25, "learning_rate": 2.917866211946733e-05, "loss": 0.6445, "step": 112825 }, { "epoch": 1.25, "learning_rate": 2.917773939232882e-05, "loss": 0.6216, "step": 112830 }, { "epoch": 1.25, "learning_rate": 2.9176816665190303e-05, "loss": 0.6245, "step": 112835 }, { "epoch": 1.25, "learning_rate": 2.917589393805179e-05, "loss": 0.7057, "step": 112840 }, { "epoch": 1.25, "learning_rate": 2.9174971210913283e-05, "loss": 0.6818, "step": 112845 }, { "epoch": 1.25, "learning_rate": 2.9174048483774767e-05, "loss": 0.6391, "step": 112850 }, { "epoch": 1.25, "learning_rate": 2.9173125756636255e-05, "loss": 0.7014, "step": 112855 }, { "epoch": 1.25, "learning_rate": 2.9172203029497743e-05, "loss": 0.7314, "step": 112860 }, { "epoch": 1.25, "learning_rate": 2.9171280302359227e-05, "loss": 0.6792, "step": 112865 }, { "epoch": 1.25, "learning_rate": 2.917035757522072e-05, "loss": 0.6641, "step": 112870 }, { "epoch": 1.25, "learning_rate": 2.9169434848082206e-05, "loss": 0.6394, "step": 112875 }, { "epoch": 1.25, "learning_rate": 2.916851212094369e-05, "loss": 0.6912, "step": 112880 }, { "epoch": 1.25, "learning_rate": 2.916758939380518e-05, "loss": 0.6461, "step": 112885 }, { "epoch": 1.25, "learning_rate": 2.916666666666667e-05, "loss": 0.6305, "step": 112890 }, { "epoch": 1.25, "learning_rate": 2.9165743939528158e-05, "loss": 0.6389, "step": 112895 }, { "epoch": 1.25, "learning_rate": 2.9164821212389642e-05, "loss": 0.6675, "step": 112900 }, { "epoch": 1.25, "learning_rate": 2.916389848525113e-05, "loss": 0.6395, "step": 112905 }, { "epoch": 1.25, "learning_rate": 2.916297575811262e-05, "loss": 0.6778, "step": 112910 }, { "epoch": 1.25, "learning_rate": 2.9162053030974106e-05, "loss": 0.5985, "step": 112915 }, { "epoch": 1.25, "learning_rate": 2.9161130303835593e-05, "loss": 0.681, "step": 112920 }, { "epoch": 1.25, "learning_rate": 2.9160207576697078e-05, "loss": 0.6696, "step": 112925 }, { "epoch": 1.25, "learning_rate": 2.9159284849558573e-05, "loss": 0.6479, "step": 112930 }, { "epoch": 1.25, "learning_rate": 2.9158362122420057e-05, "loss": 0.5778, "step": 112935 }, { "epoch": 1.25, "learning_rate": 2.9157439395281545e-05, "loss": 0.6692, "step": 112940 }, { "epoch": 1.25, "learning_rate": 2.915651666814303e-05, "loss": 0.6507, "step": 112945 }, { "epoch": 1.25, "learning_rate": 2.9155593941004517e-05, "loss": 0.6066, "step": 112950 }, { "epoch": 1.25, "learning_rate": 2.915467121386601e-05, "loss": 0.6495, "step": 112955 }, { "epoch": 1.25, "learning_rate": 2.9153748486727496e-05, "loss": 0.6625, "step": 112960 }, { "epoch": 1.25, "learning_rate": 2.915282575958898e-05, "loss": 0.6466, "step": 112965 }, { "epoch": 1.25, "learning_rate": 2.915190303245047e-05, "loss": 0.6455, "step": 112970 }, { "epoch": 1.25, "learning_rate": 2.915098030531196e-05, "loss": 0.6357, "step": 112975 }, { "epoch": 1.25, "learning_rate": 2.9150057578173444e-05, "loss": 0.6417, "step": 112980 }, { "epoch": 1.25, "learning_rate": 2.9149134851034932e-05, "loss": 0.6639, "step": 112985 }, { "epoch": 1.25, "learning_rate": 2.9148212123896417e-05, "loss": 0.6424, "step": 112990 }, { "epoch": 1.25, "learning_rate": 2.914728939675791e-05, "loss": 0.6642, "step": 112995 }, { "epoch": 1.25, "learning_rate": 2.9146366669619396e-05, "loss": 0.6307, "step": 113000 }, { "epoch": 1.25, "eval_loss": 0.6202114224433899, "eval_runtime": 70.0815, "eval_samples_per_second": 28.538, "eval_steps_per_second": 14.269, "step": 113000 }, { "epoch": 1.25, "learning_rate": 2.9145443942480884e-05, "loss": 0.665, "step": 113005 }, { "epoch": 1.25, "learning_rate": 2.9144521215342368e-05, "loss": 0.6577, "step": 113010 }, { "epoch": 1.25, "learning_rate": 2.914359848820386e-05, "loss": 0.6969, "step": 113015 }, { "epoch": 1.25, "learning_rate": 2.9142675761065347e-05, "loss": 0.7053, "step": 113020 }, { "epoch": 1.25, "learning_rate": 2.914175303392683e-05, "loss": 0.6964, "step": 113025 }, { "epoch": 1.25, "learning_rate": 2.914083030678832e-05, "loss": 0.6568, "step": 113030 }, { "epoch": 1.25, "learning_rate": 2.9139907579649807e-05, "loss": 0.6567, "step": 113035 }, { "epoch": 1.25, "learning_rate": 2.91389848525113e-05, "loss": 0.6071, "step": 113040 }, { "epoch": 1.25, "learning_rate": 2.9138062125372783e-05, "loss": 0.6311, "step": 113045 }, { "epoch": 1.25, "learning_rate": 2.913713939823427e-05, "loss": 0.6398, "step": 113050 }, { "epoch": 1.25, "learning_rate": 2.9136216671095755e-05, "loss": 0.6525, "step": 113055 }, { "epoch": 1.25, "learning_rate": 2.9135293943957246e-05, "loss": 0.6398, "step": 113060 }, { "epoch": 1.25, "learning_rate": 2.9134371216818734e-05, "loss": 0.6942, "step": 113065 }, { "epoch": 1.25, "learning_rate": 2.9133448489680222e-05, "loss": 0.6579, "step": 113070 }, { "epoch": 1.25, "learning_rate": 2.9132525762541707e-05, "loss": 0.6921, "step": 113075 }, { "epoch": 1.25, "learning_rate": 2.9131603035403198e-05, "loss": 0.6901, "step": 113080 }, { "epoch": 1.25, "learning_rate": 2.9130680308264686e-05, "loss": 0.6687, "step": 113085 }, { "epoch": 1.25, "learning_rate": 2.912975758112617e-05, "loss": 0.6531, "step": 113090 }, { "epoch": 1.25, "learning_rate": 2.9128834853987658e-05, "loss": 0.6328, "step": 113095 }, { "epoch": 1.25, "learning_rate": 2.9127912126849142e-05, "loss": 0.6603, "step": 113100 }, { "epoch": 1.25, "learning_rate": 2.9126989399710637e-05, "loss": 0.6648, "step": 113105 }, { "epoch": 1.25, "learning_rate": 2.912606667257212e-05, "loss": 0.6682, "step": 113110 }, { "epoch": 1.25, "learning_rate": 2.912514394543361e-05, "loss": 0.6685, "step": 113115 }, { "epoch": 1.25, "learning_rate": 2.9124221218295094e-05, "loss": 0.6645, "step": 113120 }, { "epoch": 1.25, "learning_rate": 2.9123298491156585e-05, "loss": 0.6712, "step": 113125 }, { "epoch": 1.25, "learning_rate": 2.9122375764018073e-05, "loss": 0.6575, "step": 113130 }, { "epoch": 1.25, "learning_rate": 2.9121453036879557e-05, "loss": 0.6215, "step": 113135 }, { "epoch": 1.25, "learning_rate": 2.9120530309741045e-05, "loss": 0.6473, "step": 113140 }, { "epoch": 1.25, "learning_rate": 2.9119607582602536e-05, "loss": 0.6337, "step": 113145 }, { "epoch": 1.25, "learning_rate": 2.9118684855464024e-05, "loss": 0.6604, "step": 113150 }, { "epoch": 1.25, "learning_rate": 2.911776212832551e-05, "loss": 0.69, "step": 113155 }, { "epoch": 1.25, "learning_rate": 2.9116839401186997e-05, "loss": 0.6849, "step": 113160 }, { "epoch": 1.25, "learning_rate": 2.9115916674048488e-05, "loss": 0.6461, "step": 113165 }, { "epoch": 1.25, "learning_rate": 2.9114993946909976e-05, "loss": 0.6295, "step": 113170 }, { "epoch": 1.25, "learning_rate": 2.911407121977146e-05, "loss": 0.6293, "step": 113175 }, { "epoch": 1.25, "learning_rate": 2.9113148492632948e-05, "loss": 0.6538, "step": 113180 }, { "epoch": 1.25, "learning_rate": 2.9112225765494433e-05, "loss": 0.717, "step": 113185 }, { "epoch": 1.25, "learning_rate": 2.9111303038355924e-05, "loss": 0.6287, "step": 113190 }, { "epoch": 1.25, "learning_rate": 2.911038031121741e-05, "loss": 0.6716, "step": 113195 }, { "epoch": 1.25, "learning_rate": 2.9109457584078896e-05, "loss": 0.6519, "step": 113200 }, { "epoch": 1.25, "learning_rate": 2.9108534856940384e-05, "loss": 0.6305, "step": 113205 }, { "epoch": 1.25, "learning_rate": 2.9107612129801875e-05, "loss": 0.6656, "step": 113210 }, { "epoch": 1.25, "learning_rate": 2.9106689402663363e-05, "loss": 0.6578, "step": 113215 }, { "epoch": 1.25, "learning_rate": 2.9105766675524847e-05, "loss": 0.6651, "step": 113220 }, { "epoch": 1.25, "learning_rate": 2.9104843948386335e-05, "loss": 0.693, "step": 113225 }, { "epoch": 1.25, "learning_rate": 2.9103921221247827e-05, "loss": 0.6166, "step": 113230 }, { "epoch": 1.25, "learning_rate": 2.910299849410931e-05, "loss": 0.658, "step": 113235 }, { "epoch": 1.25, "learning_rate": 2.91020757669708e-05, "loss": 0.6452, "step": 113240 }, { "epoch": 1.25, "learning_rate": 2.9101153039832287e-05, "loss": 0.6319, "step": 113245 }, { "epoch": 1.25, "learning_rate": 2.910023031269377e-05, "loss": 0.5995, "step": 113250 }, { "epoch": 1.25, "learning_rate": 2.9099307585555262e-05, "loss": 0.6889, "step": 113255 }, { "epoch": 1.25, "learning_rate": 2.909838485841675e-05, "loss": 0.6136, "step": 113260 }, { "epoch": 1.25, "learning_rate": 2.9097462131278235e-05, "loss": 0.6114, "step": 113265 }, { "epoch": 1.25, "learning_rate": 2.9096539404139723e-05, "loss": 0.6639, "step": 113270 }, { "epoch": 1.25, "learning_rate": 2.9095616677001214e-05, "loss": 0.7115, "step": 113275 }, { "epoch": 1.25, "learning_rate": 2.90946939498627e-05, "loss": 0.6801, "step": 113280 }, { "epoch": 1.25, "learning_rate": 2.9093771222724186e-05, "loss": 0.7108, "step": 113285 }, { "epoch": 1.25, "learning_rate": 2.9092848495585674e-05, "loss": 0.6499, "step": 113290 }, { "epoch": 1.25, "learning_rate": 2.9091925768447165e-05, "loss": 0.6144, "step": 113295 }, { "epoch": 1.25, "learning_rate": 2.909100304130865e-05, "loss": 0.67, "step": 113300 }, { "epoch": 1.25, "learning_rate": 2.9090080314170137e-05, "loss": 0.6556, "step": 113305 }, { "epoch": 1.25, "learning_rate": 2.9089157587031622e-05, "loss": 0.6479, "step": 113310 }, { "epoch": 1.25, "learning_rate": 2.9088234859893117e-05, "loss": 0.6487, "step": 113315 }, { "epoch": 1.25, "learning_rate": 2.90873121327546e-05, "loss": 0.6257, "step": 113320 }, { "epoch": 1.25, "learning_rate": 2.908638940561609e-05, "loss": 0.6973, "step": 113325 }, { "epoch": 1.25, "learning_rate": 2.9085466678477573e-05, "loss": 0.64, "step": 113330 }, { "epoch": 1.25, "learning_rate": 2.908454395133906e-05, "loss": 0.6523, "step": 113335 }, { "epoch": 1.25, "learning_rate": 2.9083621224200552e-05, "loss": 0.6987, "step": 113340 }, { "epoch": 1.26, "learning_rate": 2.908269849706204e-05, "loss": 0.6612, "step": 113345 }, { "epoch": 1.26, "learning_rate": 2.9081775769923525e-05, "loss": 0.6286, "step": 113350 }, { "epoch": 1.26, "learning_rate": 2.9080853042785013e-05, "loss": 0.6633, "step": 113355 }, { "epoch": 1.26, "learning_rate": 2.9079930315646504e-05, "loss": 0.6316, "step": 113360 }, { "epoch": 1.26, "learning_rate": 2.9079007588507988e-05, "loss": 0.6812, "step": 113365 }, { "epoch": 1.26, "learning_rate": 2.9078084861369476e-05, "loss": 0.6439, "step": 113370 }, { "epoch": 1.26, "learning_rate": 2.907716213423096e-05, "loss": 0.5912, "step": 113375 }, { "epoch": 1.26, "learning_rate": 2.9076239407092455e-05, "loss": 0.6512, "step": 113380 }, { "epoch": 1.26, "learning_rate": 2.907531667995394e-05, "loss": 0.6217, "step": 113385 }, { "epoch": 1.26, "learning_rate": 2.9074393952815428e-05, "loss": 0.633, "step": 113390 }, { "epoch": 1.26, "learning_rate": 2.9073471225676912e-05, "loss": 0.6875, "step": 113395 }, { "epoch": 1.26, "learning_rate": 2.90725484985384e-05, "loss": 0.6671, "step": 113400 }, { "epoch": 1.26, "learning_rate": 2.907162577139989e-05, "loss": 0.6793, "step": 113405 }, { "epoch": 1.26, "learning_rate": 2.9070703044261376e-05, "loss": 0.664, "step": 113410 }, { "epoch": 1.26, "learning_rate": 2.9069780317122863e-05, "loss": 0.6116, "step": 113415 }, { "epoch": 1.26, "learning_rate": 2.906885758998435e-05, "loss": 0.5925, "step": 113420 }, { "epoch": 1.26, "learning_rate": 2.9067934862845842e-05, "loss": 0.709, "step": 113425 }, { "epoch": 1.26, "learning_rate": 2.9067012135707327e-05, "loss": 0.6585, "step": 113430 }, { "epoch": 1.26, "learning_rate": 2.9066089408568815e-05, "loss": 0.6204, "step": 113435 }, { "epoch": 1.26, "learning_rate": 2.90651666814303e-05, "loss": 0.6549, "step": 113440 }, { "epoch": 1.26, "learning_rate": 2.906424395429179e-05, "loss": 0.6565, "step": 113445 }, { "epoch": 1.26, "learning_rate": 2.906332122715328e-05, "loss": 0.6251, "step": 113450 }, { "epoch": 1.26, "learning_rate": 2.9062398500014766e-05, "loss": 0.6454, "step": 113455 }, { "epoch": 1.26, "learning_rate": 2.906147577287625e-05, "loss": 0.6311, "step": 113460 }, { "epoch": 1.26, "learning_rate": 2.9060553045737742e-05, "loss": 0.6898, "step": 113465 }, { "epoch": 1.26, "learning_rate": 2.905963031859923e-05, "loss": 0.6718, "step": 113470 }, { "epoch": 1.26, "learning_rate": 2.9058707591460714e-05, "loss": 0.6289, "step": 113475 }, { "epoch": 1.26, "learning_rate": 2.9057784864322202e-05, "loss": 0.6588, "step": 113480 }, { "epoch": 1.26, "learning_rate": 2.9056862137183686e-05, "loss": 0.6572, "step": 113485 }, { "epoch": 1.26, "learning_rate": 2.905593941004518e-05, "loss": 0.7072, "step": 113490 }, { "epoch": 1.26, "learning_rate": 2.9055016682906666e-05, "loss": 0.6663, "step": 113495 }, { "epoch": 1.26, "learning_rate": 2.9054093955768153e-05, "loss": 0.6776, "step": 113500 }, { "epoch": 1.26, "learning_rate": 2.9053171228629638e-05, "loss": 0.6873, "step": 113505 }, { "epoch": 1.26, "learning_rate": 2.905224850149113e-05, "loss": 0.7423, "step": 113510 }, { "epoch": 1.26, "learning_rate": 2.9051325774352617e-05, "loss": 0.5962, "step": 113515 }, { "epoch": 1.26, "learning_rate": 2.90504030472141e-05, "loss": 0.6504, "step": 113520 }, { "epoch": 1.26, "learning_rate": 2.904948032007559e-05, "loss": 0.6799, "step": 113525 }, { "epoch": 1.26, "learning_rate": 2.904855759293708e-05, "loss": 0.6683, "step": 113530 }, { "epoch": 1.26, "learning_rate": 2.904763486579857e-05, "loss": 0.6169, "step": 113535 }, { "epoch": 1.26, "learning_rate": 2.9046712138660053e-05, "loss": 0.6288, "step": 113540 }, { "epoch": 1.26, "learning_rate": 2.904578941152154e-05, "loss": 0.7218, "step": 113545 }, { "epoch": 1.26, "learning_rate": 2.9044866684383025e-05, "loss": 0.6844, "step": 113550 }, { "epoch": 1.26, "learning_rate": 2.904394395724452e-05, "loss": 0.6283, "step": 113555 }, { "epoch": 1.26, "learning_rate": 2.9043021230106004e-05, "loss": 0.6936, "step": 113560 }, { "epoch": 1.26, "learning_rate": 2.9042098502967492e-05, "loss": 0.6726, "step": 113565 }, { "epoch": 1.26, "learning_rate": 2.9041175775828977e-05, "loss": 0.7345, "step": 113570 }, { "epoch": 1.26, "learning_rate": 2.9040253048690468e-05, "loss": 0.607, "step": 113575 }, { "epoch": 1.26, "learning_rate": 2.9039330321551956e-05, "loss": 0.6531, "step": 113580 }, { "epoch": 1.26, "learning_rate": 2.903840759441344e-05, "loss": 0.675, "step": 113585 }, { "epoch": 1.26, "learning_rate": 2.9037484867274928e-05, "loss": 0.6355, "step": 113590 }, { "epoch": 1.26, "learning_rate": 2.903656214013642e-05, "loss": 0.6595, "step": 113595 }, { "epoch": 1.26, "learning_rate": 2.9035639412997907e-05, "loss": 0.657, "step": 113600 }, { "epoch": 1.26, "learning_rate": 2.903471668585939e-05, "loss": 0.6038, "step": 113605 }, { "epoch": 1.26, "learning_rate": 2.903379395872088e-05, "loss": 0.6785, "step": 113610 }, { "epoch": 1.26, "learning_rate": 2.903287123158237e-05, "loss": 0.6484, "step": 113615 }, { "epoch": 1.26, "learning_rate": 2.9031948504443855e-05, "loss": 0.6699, "step": 113620 }, { "epoch": 1.26, "learning_rate": 2.9031025777305343e-05, "loss": 0.6919, "step": 113625 }, { "epoch": 1.26, "learning_rate": 2.903010305016683e-05, "loss": 0.6485, "step": 113630 }, { "epoch": 1.26, "learning_rate": 2.9029180323028315e-05, "loss": 0.6853, "step": 113635 }, { "epoch": 1.26, "learning_rate": 2.9028257595889806e-05, "loss": 0.6514, "step": 113640 }, { "epoch": 1.26, "learning_rate": 2.9027334868751294e-05, "loss": 0.6639, "step": 113645 }, { "epoch": 1.26, "learning_rate": 2.902641214161278e-05, "loss": 0.5956, "step": 113650 }, { "epoch": 1.26, "learning_rate": 2.9025489414474267e-05, "loss": 0.6463, "step": 113655 }, { "epoch": 1.26, "learning_rate": 2.9024566687335758e-05, "loss": 0.6919, "step": 113660 }, { "epoch": 1.26, "learning_rate": 2.9023643960197246e-05, "loss": 0.643, "step": 113665 }, { "epoch": 1.26, "learning_rate": 2.902272123305873e-05, "loss": 0.6886, "step": 113670 }, { "epoch": 1.26, "learning_rate": 2.9021798505920218e-05, "loss": 0.6455, "step": 113675 }, { "epoch": 1.26, "learning_rate": 2.902087577878171e-05, "loss": 0.6924, "step": 113680 }, { "epoch": 1.26, "learning_rate": 2.9019953051643194e-05, "loss": 0.6769, "step": 113685 }, { "epoch": 1.26, "learning_rate": 2.901903032450468e-05, "loss": 0.6074, "step": 113690 }, { "epoch": 1.26, "learning_rate": 2.9018107597366166e-05, "loss": 0.7204, "step": 113695 }, { "epoch": 1.26, "learning_rate": 2.9017184870227654e-05, "loss": 0.6584, "step": 113700 }, { "epoch": 1.26, "learning_rate": 2.9016262143089145e-05, "loss": 0.6527, "step": 113705 }, { "epoch": 1.26, "learning_rate": 2.9015339415950633e-05, "loss": 0.6496, "step": 113710 }, { "epoch": 1.26, "learning_rate": 2.9014416688812117e-05, "loss": 0.6943, "step": 113715 }, { "epoch": 1.26, "learning_rate": 2.9013493961673605e-05, "loss": 0.6336, "step": 113720 }, { "epoch": 1.26, "learning_rate": 2.9012571234535096e-05, "loss": 0.6725, "step": 113725 }, { "epoch": 1.26, "learning_rate": 2.9011648507396584e-05, "loss": 0.6687, "step": 113730 }, { "epoch": 1.26, "learning_rate": 2.901072578025807e-05, "loss": 0.644, "step": 113735 }, { "epoch": 1.26, "learning_rate": 2.9009803053119557e-05, "loss": 0.7294, "step": 113740 }, { "epoch": 1.26, "learning_rate": 2.9008880325981048e-05, "loss": 0.6499, "step": 113745 }, { "epoch": 1.26, "learning_rate": 2.9007957598842532e-05, "loss": 0.6658, "step": 113750 }, { "epoch": 1.26, "learning_rate": 2.900703487170402e-05, "loss": 0.6763, "step": 113755 }, { "epoch": 1.26, "learning_rate": 2.9006112144565505e-05, "loss": 0.6771, "step": 113760 }, { "epoch": 1.26, "learning_rate": 2.9005189417427e-05, "loss": 0.6706, "step": 113765 }, { "epoch": 1.26, "learning_rate": 2.9004266690288484e-05, "loss": 0.6849, "step": 113770 }, { "epoch": 1.26, "learning_rate": 2.900334396314997e-05, "loss": 0.6683, "step": 113775 }, { "epoch": 1.26, "learning_rate": 2.9002421236011456e-05, "loss": 0.6531, "step": 113780 }, { "epoch": 1.26, "learning_rate": 2.9001498508872944e-05, "loss": 0.64, "step": 113785 }, { "epoch": 1.26, "learning_rate": 2.9000575781734435e-05, "loss": 0.6715, "step": 113790 }, { "epoch": 1.26, "learning_rate": 2.899965305459592e-05, "loss": 0.648, "step": 113795 }, { "epoch": 1.26, "learning_rate": 2.8998730327457407e-05, "loss": 0.6709, "step": 113800 }, { "epoch": 1.26, "learning_rate": 2.8997807600318895e-05, "loss": 0.682, "step": 113805 }, { "epoch": 1.26, "learning_rate": 2.8996884873180386e-05, "loss": 0.6796, "step": 113810 }, { "epoch": 1.26, "learning_rate": 2.899596214604187e-05, "loss": 0.6568, "step": 113815 }, { "epoch": 1.26, "learning_rate": 2.899503941890336e-05, "loss": 0.6707, "step": 113820 }, { "epoch": 1.26, "learning_rate": 2.8994116691764843e-05, "loss": 0.6657, "step": 113825 }, { "epoch": 1.26, "learning_rate": 2.8993193964626338e-05, "loss": 0.6635, "step": 113830 }, { "epoch": 1.26, "learning_rate": 2.8992271237487822e-05, "loss": 0.6074, "step": 113835 }, { "epoch": 1.26, "learning_rate": 2.899134851034931e-05, "loss": 0.6531, "step": 113840 }, { "epoch": 1.26, "learning_rate": 2.8990425783210795e-05, "loss": 0.6706, "step": 113845 }, { "epoch": 1.26, "learning_rate": 2.8989503056072286e-05, "loss": 0.6651, "step": 113850 }, { "epoch": 1.26, "learning_rate": 2.8988580328933774e-05, "loss": 0.6364, "step": 113855 }, { "epoch": 1.26, "learning_rate": 2.8987657601795258e-05, "loss": 0.6559, "step": 113860 }, { "epoch": 1.26, "learning_rate": 2.8986734874656746e-05, "loss": 0.6838, "step": 113865 }, { "epoch": 1.26, "learning_rate": 2.898581214751823e-05, "loss": 0.6755, "step": 113870 }, { "epoch": 1.26, "learning_rate": 2.8984889420379725e-05, "loss": 0.6567, "step": 113875 }, { "epoch": 1.26, "learning_rate": 2.898396669324121e-05, "loss": 0.5727, "step": 113880 }, { "epoch": 1.26, "learning_rate": 2.8983043966102697e-05, "loss": 0.6278, "step": 113885 }, { "epoch": 1.26, "learning_rate": 2.8982121238964182e-05, "loss": 0.6031, "step": 113890 }, { "epoch": 1.26, "learning_rate": 2.8981198511825673e-05, "loss": 0.6041, "step": 113895 }, { "epoch": 1.26, "learning_rate": 2.898027578468716e-05, "loss": 0.6658, "step": 113900 }, { "epoch": 1.26, "learning_rate": 2.897935305754865e-05, "loss": 0.7169, "step": 113905 }, { "epoch": 1.26, "learning_rate": 2.8978430330410133e-05, "loss": 0.6713, "step": 113910 }, { "epoch": 1.26, "learning_rate": 2.8977507603271625e-05, "loss": 0.7157, "step": 113915 }, { "epoch": 1.26, "learning_rate": 2.8976584876133112e-05, "loss": 0.6645, "step": 113920 }, { "epoch": 1.26, "learning_rate": 2.8975662148994597e-05, "loss": 0.6777, "step": 113925 }, { "epoch": 1.26, "learning_rate": 2.8974739421856085e-05, "loss": 0.6931, "step": 113930 }, { "epoch": 1.26, "learning_rate": 2.897381669471757e-05, "loss": 0.7068, "step": 113935 }, { "epoch": 1.26, "learning_rate": 2.8972893967579064e-05, "loss": 0.6303, "step": 113940 }, { "epoch": 1.26, "learning_rate": 2.8971971240440548e-05, "loss": 0.6882, "step": 113945 }, { "epoch": 1.26, "learning_rate": 2.8971048513302036e-05, "loss": 0.6904, "step": 113950 }, { "epoch": 1.26, "learning_rate": 2.897012578616352e-05, "loss": 0.6464, "step": 113955 }, { "epoch": 1.26, "learning_rate": 2.8969203059025012e-05, "loss": 0.6698, "step": 113960 }, { "epoch": 1.26, "learning_rate": 2.89682803318865e-05, "loss": 0.6811, "step": 113965 }, { "epoch": 1.26, "learning_rate": 2.8967357604747984e-05, "loss": 0.6682, "step": 113970 }, { "epoch": 1.26, "learning_rate": 2.8966434877609472e-05, "loss": 0.6993, "step": 113975 }, { "epoch": 1.26, "learning_rate": 2.8965512150470963e-05, "loss": 0.6938, "step": 113980 }, { "epoch": 1.26, "learning_rate": 2.896458942333245e-05, "loss": 0.6589, "step": 113985 }, { "epoch": 1.26, "learning_rate": 2.8963666696193935e-05, "loss": 0.6511, "step": 113990 }, { "epoch": 1.26, "learning_rate": 2.8962743969055423e-05, "loss": 0.6779, "step": 113995 }, { "epoch": 1.26, "learning_rate": 2.8961821241916915e-05, "loss": 0.6828, "step": 114000 }, { "epoch": 1.26, "eval_loss": 0.6158350706100464, "eval_runtime": 69.6281, "eval_samples_per_second": 28.724, "eval_steps_per_second": 14.362, "step": 114000 }, { "epoch": 1.26, "learning_rate": 2.89608985147784e-05, "loss": 0.6443, "step": 114005 }, { "epoch": 1.26, "learning_rate": 2.8959975787639887e-05, "loss": 0.6468, "step": 114010 }, { "epoch": 1.26, "learning_rate": 2.8959053060501375e-05, "loss": 0.616, "step": 114015 }, { "epoch": 1.26, "learning_rate": 2.895813033336286e-05, "loss": 0.6619, "step": 114020 }, { "epoch": 1.26, "learning_rate": 2.895720760622435e-05, "loss": 0.698, "step": 114025 }, { "epoch": 1.26, "learning_rate": 2.8956284879085838e-05, "loss": 0.6058, "step": 114030 }, { "epoch": 1.26, "learning_rate": 2.8955362151947323e-05, "loss": 0.6531, "step": 114035 }, { "epoch": 1.26, "learning_rate": 2.895443942480881e-05, "loss": 0.6681, "step": 114040 }, { "epoch": 1.26, "learning_rate": 2.8953516697670302e-05, "loss": 0.5608, "step": 114045 }, { "epoch": 1.26, "learning_rate": 2.895259397053179e-05, "loss": 0.7287, "step": 114050 }, { "epoch": 1.26, "learning_rate": 2.8951671243393274e-05, "loss": 0.6629, "step": 114055 }, { "epoch": 1.26, "learning_rate": 2.8950748516254762e-05, "loss": 0.6949, "step": 114060 }, { "epoch": 1.26, "learning_rate": 2.8949825789116253e-05, "loss": 0.7224, "step": 114065 }, { "epoch": 1.26, "learning_rate": 2.8948903061977738e-05, "loss": 0.6177, "step": 114070 }, { "epoch": 1.26, "learning_rate": 2.8947980334839226e-05, "loss": 0.7408, "step": 114075 }, { "epoch": 1.26, "learning_rate": 2.894705760770071e-05, "loss": 0.6463, "step": 114080 }, { "epoch": 1.26, "learning_rate": 2.8946134880562198e-05, "loss": 0.6436, "step": 114085 }, { "epoch": 1.26, "learning_rate": 2.894521215342369e-05, "loss": 0.6448, "step": 114090 }, { "epoch": 1.26, "learning_rate": 2.8944289426285177e-05, "loss": 0.6791, "step": 114095 }, { "epoch": 1.26, "learning_rate": 2.894336669914666e-05, "loss": 0.6671, "step": 114100 }, { "epoch": 1.26, "learning_rate": 2.894244397200815e-05, "loss": 0.652, "step": 114105 }, { "epoch": 1.26, "learning_rate": 2.894152124486964e-05, "loss": 0.7042, "step": 114110 }, { "epoch": 1.26, "learning_rate": 2.894059851773113e-05, "loss": 0.6514, "step": 114115 }, { "epoch": 1.26, "learning_rate": 2.8939675790592613e-05, "loss": 0.6304, "step": 114120 }, { "epoch": 1.26, "learning_rate": 2.89387530634541e-05, "loss": 0.6261, "step": 114125 }, { "epoch": 1.26, "learning_rate": 2.8937830336315592e-05, "loss": 0.6885, "step": 114130 }, { "epoch": 1.26, "learning_rate": 2.8936907609177076e-05, "loss": 0.6092, "step": 114135 }, { "epoch": 1.26, "learning_rate": 2.8935984882038564e-05, "loss": 0.6282, "step": 114140 }, { "epoch": 1.26, "learning_rate": 2.893506215490005e-05, "loss": 0.6698, "step": 114145 }, { "epoch": 1.26, "learning_rate": 2.8934139427761543e-05, "loss": 0.6908, "step": 114150 }, { "epoch": 1.26, "learning_rate": 2.8933216700623028e-05, "loss": 0.6821, "step": 114155 }, { "epoch": 1.26, "learning_rate": 2.8932293973484516e-05, "loss": 0.6464, "step": 114160 }, { "epoch": 1.26, "learning_rate": 2.8931371246346e-05, "loss": 0.7091, "step": 114165 }, { "epoch": 1.26, "learning_rate": 2.8930448519207488e-05, "loss": 0.6695, "step": 114170 }, { "epoch": 1.26, "learning_rate": 2.892952579206898e-05, "loss": 0.6015, "step": 114175 }, { "epoch": 1.26, "learning_rate": 2.8928603064930464e-05, "loss": 0.6663, "step": 114180 }, { "epoch": 1.26, "learning_rate": 2.892768033779195e-05, "loss": 0.6758, "step": 114185 }, { "epoch": 1.26, "learning_rate": 2.892675761065344e-05, "loss": 0.6654, "step": 114190 }, { "epoch": 1.26, "learning_rate": 2.892583488351493e-05, "loss": 0.6891, "step": 114195 }, { "epoch": 1.26, "learning_rate": 2.8924912156376415e-05, "loss": 0.6712, "step": 114200 }, { "epoch": 1.26, "learning_rate": 2.8923989429237903e-05, "loss": 0.678, "step": 114205 }, { "epoch": 1.26, "learning_rate": 2.8923066702099387e-05, "loss": 0.6276, "step": 114210 }, { "epoch": 1.26, "learning_rate": 2.8922143974960882e-05, "loss": 0.6116, "step": 114215 }, { "epoch": 1.26, "learning_rate": 2.8921221247822366e-05, "loss": 0.6652, "step": 114220 }, { "epoch": 1.26, "learning_rate": 2.8920298520683854e-05, "loss": 0.6767, "step": 114225 }, { "epoch": 1.26, "learning_rate": 2.891937579354534e-05, "loss": 0.5973, "step": 114230 }, { "epoch": 1.26, "learning_rate": 2.8918453066406827e-05, "loss": 0.6723, "step": 114235 }, { "epoch": 1.26, "learning_rate": 2.8917530339268318e-05, "loss": 0.6602, "step": 114240 }, { "epoch": 1.27, "learning_rate": 2.8916607612129802e-05, "loss": 0.7095, "step": 114245 }, { "epoch": 1.27, "learning_rate": 2.891568488499129e-05, "loss": 0.6361, "step": 114250 }, { "epoch": 1.27, "learning_rate": 2.8914762157852775e-05, "loss": 0.7153, "step": 114255 }, { "epoch": 1.27, "learning_rate": 2.891383943071427e-05, "loss": 0.6906, "step": 114260 }, { "epoch": 1.27, "learning_rate": 2.8912916703575754e-05, "loss": 0.6959, "step": 114265 }, { "epoch": 1.27, "learning_rate": 2.891199397643724e-05, "loss": 0.7196, "step": 114270 }, { "epoch": 1.27, "learning_rate": 2.8911071249298726e-05, "loss": 0.6833, "step": 114275 }, { "epoch": 1.27, "learning_rate": 2.8910148522160217e-05, "loss": 0.6754, "step": 114280 }, { "epoch": 1.27, "learning_rate": 2.8909225795021705e-05, "loss": 0.6771, "step": 114285 }, { "epoch": 1.27, "learning_rate": 2.8908303067883193e-05, "loss": 0.6713, "step": 114290 }, { "epoch": 1.27, "learning_rate": 2.8907380340744677e-05, "loss": 0.6859, "step": 114295 }, { "epoch": 1.27, "learning_rate": 2.890645761360617e-05, "loss": 0.687, "step": 114300 }, { "epoch": 1.27, "learning_rate": 2.8905534886467656e-05, "loss": 0.5952, "step": 114305 }, { "epoch": 1.27, "learning_rate": 2.890461215932914e-05, "loss": 0.6417, "step": 114310 }, { "epoch": 1.27, "learning_rate": 2.890368943219063e-05, "loss": 0.6592, "step": 114315 }, { "epoch": 1.27, "learning_rate": 2.8902766705052113e-05, "loss": 0.6553, "step": 114320 }, { "epoch": 1.27, "learning_rate": 2.8901843977913608e-05, "loss": 0.6337, "step": 114325 }, { "epoch": 1.27, "learning_rate": 2.8900921250775092e-05, "loss": 0.6503, "step": 114330 }, { "epoch": 1.27, "learning_rate": 2.889999852363658e-05, "loss": 0.6265, "step": 114335 }, { "epoch": 1.27, "learning_rate": 2.8899075796498065e-05, "loss": 0.6204, "step": 114340 }, { "epoch": 1.27, "learning_rate": 2.8898153069359556e-05, "loss": 0.6925, "step": 114345 }, { "epoch": 1.27, "learning_rate": 2.8897230342221044e-05, "loss": 0.6491, "step": 114350 }, { "epoch": 1.27, "learning_rate": 2.8896307615082528e-05, "loss": 0.6628, "step": 114355 }, { "epoch": 1.27, "learning_rate": 2.8895384887944016e-05, "loss": 0.7514, "step": 114360 }, { "epoch": 1.27, "learning_rate": 2.8894462160805507e-05, "loss": 0.5913, "step": 114365 }, { "epoch": 1.27, "learning_rate": 2.8893539433666995e-05, "loss": 0.6606, "step": 114370 }, { "epoch": 1.27, "learning_rate": 2.889261670652848e-05, "loss": 0.6149, "step": 114375 }, { "epoch": 1.27, "learning_rate": 2.8891693979389967e-05, "loss": 0.6821, "step": 114380 }, { "epoch": 1.27, "learning_rate": 2.8890771252251452e-05, "loss": 0.6704, "step": 114385 }, { "epoch": 1.27, "learning_rate": 2.8889848525112943e-05, "loss": 0.6844, "step": 114390 }, { "epoch": 1.27, "learning_rate": 2.888892579797443e-05, "loss": 0.6677, "step": 114395 }, { "epoch": 1.27, "learning_rate": 2.888800307083592e-05, "loss": 0.6596, "step": 114400 }, { "epoch": 1.27, "learning_rate": 2.8887080343697403e-05, "loss": 0.6452, "step": 114405 }, { "epoch": 1.27, "learning_rate": 2.8886157616558894e-05, "loss": 0.6528, "step": 114410 }, { "epoch": 1.27, "learning_rate": 2.8885234889420382e-05, "loss": 0.6501, "step": 114415 }, { "epoch": 1.27, "learning_rate": 2.8884312162281867e-05, "loss": 0.6723, "step": 114420 }, { "epoch": 1.27, "learning_rate": 2.8883389435143355e-05, "loss": 0.6494, "step": 114425 }, { "epoch": 1.27, "learning_rate": 2.8882466708004846e-05, "loss": 0.6837, "step": 114430 }, { "epoch": 1.27, "learning_rate": 2.8881543980866334e-05, "loss": 0.656, "step": 114435 }, { "epoch": 1.27, "learning_rate": 2.8880621253727818e-05, "loss": 0.6867, "step": 114440 }, { "epoch": 1.27, "learning_rate": 2.8879698526589306e-05, "loss": 0.6949, "step": 114445 }, { "epoch": 1.27, "learning_rate": 2.8878775799450797e-05, "loss": 0.6835, "step": 114450 }, { "epoch": 1.27, "learning_rate": 2.887785307231228e-05, "loss": 0.6823, "step": 114455 }, { "epoch": 1.27, "learning_rate": 2.887693034517377e-05, "loss": 0.6782, "step": 114460 }, { "epoch": 1.27, "learning_rate": 2.8876007618035254e-05, "loss": 0.637, "step": 114465 }, { "epoch": 1.27, "learning_rate": 2.8875084890896742e-05, "loss": 0.6281, "step": 114470 }, { "epoch": 1.27, "learning_rate": 2.8874162163758233e-05, "loss": 0.706, "step": 114475 }, { "epoch": 1.27, "learning_rate": 2.887323943661972e-05, "loss": 0.654, "step": 114480 }, { "epoch": 1.27, "learning_rate": 2.8872316709481205e-05, "loss": 0.6459, "step": 114485 }, { "epoch": 1.27, "learning_rate": 2.8871393982342693e-05, "loss": 0.6392, "step": 114490 }, { "epoch": 1.27, "learning_rate": 2.8870471255204184e-05, "loss": 0.6413, "step": 114495 }, { "epoch": 1.27, "learning_rate": 2.8869548528065672e-05, "loss": 0.6855, "step": 114500 }, { "epoch": 1.27, "learning_rate": 2.8868625800927157e-05, "loss": 0.6625, "step": 114505 }, { "epoch": 1.27, "learning_rate": 2.8867703073788645e-05, "loss": 0.6343, "step": 114510 }, { "epoch": 1.27, "learning_rate": 2.8866780346650136e-05, "loss": 0.6362, "step": 114515 }, { "epoch": 1.27, "learning_rate": 2.886585761951162e-05, "loss": 0.7278, "step": 114520 }, { "epoch": 1.27, "learning_rate": 2.8864934892373108e-05, "loss": 0.6668, "step": 114525 }, { "epoch": 1.27, "learning_rate": 2.8864012165234593e-05, "loss": 0.6489, "step": 114530 }, { "epoch": 1.27, "learning_rate": 2.886308943809608e-05, "loss": 0.5703, "step": 114535 }, { "epoch": 1.27, "learning_rate": 2.8862166710957572e-05, "loss": 0.6602, "step": 114540 }, { "epoch": 1.27, "learning_rate": 2.886124398381906e-05, "loss": 0.6515, "step": 114545 }, { "epoch": 1.27, "learning_rate": 2.8860321256680544e-05, "loss": 0.6233, "step": 114550 }, { "epoch": 1.27, "learning_rate": 2.8859398529542032e-05, "loss": 0.661, "step": 114555 }, { "epoch": 1.27, "learning_rate": 2.8858475802403523e-05, "loss": 0.6354, "step": 114560 }, { "epoch": 1.27, "learning_rate": 2.8857553075265008e-05, "loss": 0.6856, "step": 114565 }, { "epoch": 1.27, "learning_rate": 2.8856630348126495e-05, "loss": 0.6976, "step": 114570 }, { "epoch": 1.27, "learning_rate": 2.8855707620987983e-05, "loss": 0.7214, "step": 114575 }, { "epoch": 1.27, "learning_rate": 2.8854784893849475e-05, "loss": 0.6299, "step": 114580 }, { "epoch": 1.27, "learning_rate": 2.885386216671096e-05, "loss": 0.6677, "step": 114585 }, { "epoch": 1.27, "learning_rate": 2.8852939439572447e-05, "loss": 0.6937, "step": 114590 }, { "epoch": 1.27, "learning_rate": 2.885201671243393e-05, "loss": 0.6829, "step": 114595 }, { "epoch": 1.27, "learning_rate": 2.8851093985295426e-05, "loss": 0.6657, "step": 114600 }, { "epoch": 1.27, "learning_rate": 2.885017125815691e-05, "loss": 0.6724, "step": 114605 }, { "epoch": 1.27, "learning_rate": 2.8849248531018398e-05, "loss": 0.6786, "step": 114610 }, { "epoch": 1.27, "learning_rate": 2.8848325803879883e-05, "loss": 0.6403, "step": 114615 }, { "epoch": 1.27, "learning_rate": 2.884740307674137e-05, "loss": 0.6771, "step": 114620 }, { "epoch": 1.27, "learning_rate": 2.8846480349602862e-05, "loss": 0.5938, "step": 114625 }, { "epoch": 1.27, "learning_rate": 2.8845557622464346e-05, "loss": 0.6807, "step": 114630 }, { "epoch": 1.27, "learning_rate": 2.8844634895325834e-05, "loss": 0.6421, "step": 114635 }, { "epoch": 1.27, "learning_rate": 2.884371216818732e-05, "loss": 0.6166, "step": 114640 }, { "epoch": 1.27, "learning_rate": 2.8842789441048813e-05, "loss": 0.6611, "step": 114645 }, { "epoch": 1.27, "learning_rate": 2.8841866713910298e-05, "loss": 0.6423, "step": 114650 }, { "epoch": 1.27, "learning_rate": 2.8840943986771785e-05, "loss": 0.6529, "step": 114655 }, { "epoch": 1.27, "learning_rate": 2.884002125963327e-05, "loss": 0.6079, "step": 114660 }, { "epoch": 1.27, "learning_rate": 2.883909853249476e-05, "loss": 0.6117, "step": 114665 }, { "epoch": 1.27, "learning_rate": 2.883817580535625e-05, "loss": 0.6557, "step": 114670 }, { "epoch": 1.27, "learning_rate": 2.8837253078217737e-05, "loss": 0.6726, "step": 114675 }, { "epoch": 1.27, "learning_rate": 2.883633035107922e-05, "loss": 0.6686, "step": 114680 }, { "epoch": 1.27, "learning_rate": 2.8835407623940713e-05, "loss": 0.631, "step": 114685 }, { "epoch": 1.27, "learning_rate": 2.88344848968022e-05, "loss": 0.6847, "step": 114690 }, { "epoch": 1.27, "learning_rate": 2.8833562169663685e-05, "loss": 0.6016, "step": 114695 }, { "epoch": 1.27, "learning_rate": 2.8832639442525173e-05, "loss": 0.6579, "step": 114700 }, { "epoch": 1.27, "learning_rate": 2.8831716715386657e-05, "loss": 0.6675, "step": 114705 }, { "epoch": 1.27, "learning_rate": 2.8830793988248152e-05, "loss": 0.6615, "step": 114710 }, { "epoch": 1.27, "learning_rate": 2.8829871261109636e-05, "loss": 0.6788, "step": 114715 }, { "epoch": 1.27, "learning_rate": 2.8828948533971124e-05, "loss": 0.6668, "step": 114720 }, { "epoch": 1.27, "learning_rate": 2.882802580683261e-05, "loss": 0.6259, "step": 114725 }, { "epoch": 1.27, "learning_rate": 2.88271030796941e-05, "loss": 0.6706, "step": 114730 }, { "epoch": 1.27, "learning_rate": 2.8826180352555588e-05, "loss": 0.6792, "step": 114735 }, { "epoch": 1.27, "learning_rate": 2.8825257625417072e-05, "loss": 0.6273, "step": 114740 }, { "epoch": 1.27, "learning_rate": 2.882433489827856e-05, "loss": 0.6496, "step": 114745 }, { "epoch": 1.27, "learning_rate": 2.882341217114005e-05, "loss": 0.6258, "step": 114750 }, { "epoch": 1.27, "learning_rate": 2.882248944400154e-05, "loss": 0.6573, "step": 114755 }, { "epoch": 1.27, "learning_rate": 2.8821566716863024e-05, "loss": 0.6192, "step": 114760 }, { "epoch": 1.27, "learning_rate": 2.882064398972451e-05, "loss": 0.6426, "step": 114765 }, { "epoch": 1.27, "learning_rate": 2.8819721262585996e-05, "loss": 0.6842, "step": 114770 }, { "epoch": 1.27, "learning_rate": 2.8818798535447487e-05, "loss": 0.6651, "step": 114775 }, { "epoch": 1.27, "learning_rate": 2.8817875808308975e-05, "loss": 0.6641, "step": 114780 }, { "epoch": 1.27, "learning_rate": 2.8816953081170463e-05, "loss": 0.6502, "step": 114785 }, { "epoch": 1.27, "learning_rate": 2.8816030354031947e-05, "loss": 0.6555, "step": 114790 }, { "epoch": 1.27, "learning_rate": 2.881510762689344e-05, "loss": 0.652, "step": 114795 }, { "epoch": 1.27, "learning_rate": 2.8814184899754926e-05, "loss": 0.6634, "step": 114800 }, { "epoch": 1.27, "learning_rate": 2.881326217261641e-05, "loss": 0.617, "step": 114805 }, { "epoch": 1.27, "learning_rate": 2.88123394454779e-05, "loss": 0.7136, "step": 114810 }, { "epoch": 1.27, "learning_rate": 2.881141671833939e-05, "loss": 0.636, "step": 114815 }, { "epoch": 1.27, "learning_rate": 2.8810493991200878e-05, "loss": 0.618, "step": 114820 }, { "epoch": 1.27, "learning_rate": 2.8809571264062362e-05, "loss": 0.6545, "step": 114825 }, { "epoch": 1.27, "learning_rate": 2.880864853692385e-05, "loss": 0.6122, "step": 114830 }, { "epoch": 1.27, "learning_rate": 2.880772580978534e-05, "loss": 0.5985, "step": 114835 }, { "epoch": 1.27, "learning_rate": 2.8806803082646826e-05, "loss": 0.6599, "step": 114840 }, { "epoch": 1.27, "learning_rate": 2.8805880355508314e-05, "loss": 0.6662, "step": 114845 }, { "epoch": 1.27, "learning_rate": 2.88049576283698e-05, "loss": 0.6478, "step": 114850 }, { "epoch": 1.27, "learning_rate": 2.8804034901231286e-05, "loss": 0.6984, "step": 114855 }, { "epoch": 1.27, "learning_rate": 2.8803112174092777e-05, "loss": 0.5994, "step": 114860 }, { "epoch": 1.27, "learning_rate": 2.8802189446954265e-05, "loss": 0.5956, "step": 114865 }, { "epoch": 1.27, "learning_rate": 2.880126671981575e-05, "loss": 0.7387, "step": 114870 }, { "epoch": 1.27, "learning_rate": 2.8800343992677237e-05, "loss": 0.6295, "step": 114875 }, { "epoch": 1.27, "learning_rate": 2.879942126553873e-05, "loss": 0.6717, "step": 114880 }, { "epoch": 1.27, "learning_rate": 2.8798498538400216e-05, "loss": 0.6518, "step": 114885 }, { "epoch": 1.27, "learning_rate": 2.87975758112617e-05, "loss": 0.6648, "step": 114890 }, { "epoch": 1.27, "learning_rate": 2.879665308412319e-05, "loss": 0.7062, "step": 114895 }, { "epoch": 1.27, "learning_rate": 2.879573035698468e-05, "loss": 0.6528, "step": 114900 }, { "epoch": 1.27, "learning_rate": 2.8794807629846164e-05, "loss": 0.668, "step": 114905 }, { "epoch": 1.27, "learning_rate": 2.8793884902707652e-05, "loss": 0.7128, "step": 114910 }, { "epoch": 1.27, "learning_rate": 2.8792962175569137e-05, "loss": 0.6772, "step": 114915 }, { "epoch": 1.27, "learning_rate": 2.8792039448430625e-05, "loss": 0.6831, "step": 114920 }, { "epoch": 1.27, "learning_rate": 2.8791116721292116e-05, "loss": 0.6421, "step": 114925 }, { "epoch": 1.27, "learning_rate": 2.8790193994153604e-05, "loss": 0.6593, "step": 114930 }, { "epoch": 1.27, "learning_rate": 2.8789271267015088e-05, "loss": 0.6121, "step": 114935 }, { "epoch": 1.27, "learning_rate": 2.8788348539876576e-05, "loss": 0.6215, "step": 114940 }, { "epoch": 1.27, "learning_rate": 2.8787425812738067e-05, "loss": 0.6558, "step": 114945 }, { "epoch": 1.27, "learning_rate": 2.878650308559955e-05, "loss": 0.6223, "step": 114950 }, { "epoch": 1.27, "learning_rate": 2.878558035846104e-05, "loss": 0.6813, "step": 114955 }, { "epoch": 1.27, "learning_rate": 2.8784657631322527e-05, "loss": 0.6177, "step": 114960 }, { "epoch": 1.27, "learning_rate": 2.878373490418402e-05, "loss": 0.6463, "step": 114965 }, { "epoch": 1.27, "learning_rate": 2.8782812177045503e-05, "loss": 0.6228, "step": 114970 }, { "epoch": 1.27, "learning_rate": 2.878188944990699e-05, "loss": 0.6124, "step": 114975 }, { "epoch": 1.27, "learning_rate": 2.8780966722768475e-05, "loss": 0.6456, "step": 114980 }, { "epoch": 1.27, "learning_rate": 2.878004399562997e-05, "loss": 0.6602, "step": 114985 }, { "epoch": 1.27, "learning_rate": 2.8779121268491454e-05, "loss": 0.6294, "step": 114990 }, { "epoch": 1.27, "learning_rate": 2.8778198541352942e-05, "loss": 0.6787, "step": 114995 }, { "epoch": 1.27, "learning_rate": 2.8777275814214427e-05, "loss": 0.6403, "step": 115000 }, { "epoch": 1.27, "eval_loss": 0.6494573950767517, "eval_runtime": 69.6509, "eval_samples_per_second": 28.715, "eval_steps_per_second": 14.357, "step": 115000 }, { "epoch": 1.27, "learning_rate": 2.8776353087075915e-05, "loss": 0.6936, "step": 115005 }, { "epoch": 1.27, "learning_rate": 2.8775430359937406e-05, "loss": 0.6524, "step": 115010 }, { "epoch": 1.27, "learning_rate": 2.877450763279889e-05, "loss": 0.6911, "step": 115015 }, { "epoch": 1.27, "learning_rate": 2.8773584905660378e-05, "loss": 0.6763, "step": 115020 }, { "epoch": 1.27, "learning_rate": 2.8772662178521863e-05, "loss": 0.6416, "step": 115025 }, { "epoch": 1.27, "learning_rate": 2.8771739451383357e-05, "loss": 0.6875, "step": 115030 }, { "epoch": 1.27, "learning_rate": 2.877081672424484e-05, "loss": 0.6715, "step": 115035 }, { "epoch": 1.27, "learning_rate": 2.876989399710633e-05, "loss": 0.6533, "step": 115040 }, { "epoch": 1.27, "learning_rate": 2.8768971269967814e-05, "loss": 0.6156, "step": 115045 }, { "epoch": 1.27, "learning_rate": 2.8768048542829305e-05, "loss": 0.6924, "step": 115050 }, { "epoch": 1.27, "learning_rate": 2.8767125815690793e-05, "loss": 0.6652, "step": 115055 }, { "epoch": 1.27, "learning_rate": 2.876620308855228e-05, "loss": 0.6775, "step": 115060 }, { "epoch": 1.27, "learning_rate": 2.8765280361413765e-05, "loss": 0.6549, "step": 115065 }, { "epoch": 1.27, "learning_rate": 2.8764357634275253e-05, "loss": 0.7321, "step": 115070 }, { "epoch": 1.27, "learning_rate": 2.8763434907136744e-05, "loss": 0.6107, "step": 115075 }, { "epoch": 1.27, "learning_rate": 2.876251217999823e-05, "loss": 0.6396, "step": 115080 }, { "epoch": 1.27, "learning_rate": 2.8761589452859717e-05, "loss": 0.6154, "step": 115085 }, { "epoch": 1.27, "learning_rate": 2.87606667257212e-05, "loss": 0.6052, "step": 115090 }, { "epoch": 1.27, "learning_rate": 2.8759743998582696e-05, "loss": 0.6594, "step": 115095 }, { "epoch": 1.27, "learning_rate": 2.875882127144418e-05, "loss": 0.6351, "step": 115100 }, { "epoch": 1.27, "learning_rate": 2.8757898544305668e-05, "loss": 0.6359, "step": 115105 }, { "epoch": 1.27, "learning_rate": 2.8756975817167153e-05, "loss": 0.6214, "step": 115110 }, { "epoch": 1.27, "learning_rate": 2.8756053090028644e-05, "loss": 0.7378, "step": 115115 }, { "epoch": 1.27, "learning_rate": 2.875513036289013e-05, "loss": 0.6688, "step": 115120 }, { "epoch": 1.27, "learning_rate": 2.8754207635751616e-05, "loss": 0.6506, "step": 115125 }, { "epoch": 1.27, "learning_rate": 2.8753284908613104e-05, "loss": 0.638, "step": 115130 }, { "epoch": 1.27, "learning_rate": 2.8752362181474595e-05, "loss": 0.6557, "step": 115135 }, { "epoch": 1.27, "learning_rate": 2.8751439454336083e-05, "loss": 0.6513, "step": 115140 }, { "epoch": 1.27, "learning_rate": 2.8750516727197568e-05, "loss": 0.6369, "step": 115145 }, { "epoch": 1.28, "learning_rate": 2.8749594000059055e-05, "loss": 0.6608, "step": 115150 }, { "epoch": 1.28, "learning_rate": 2.874867127292054e-05, "loss": 0.6467, "step": 115155 }, { "epoch": 1.28, "learning_rate": 2.8747748545782034e-05, "loss": 0.6384, "step": 115160 }, { "epoch": 1.28, "learning_rate": 2.874682581864352e-05, "loss": 0.7035, "step": 115165 }, { "epoch": 1.28, "learning_rate": 2.8745903091505007e-05, "loss": 0.6488, "step": 115170 }, { "epoch": 1.28, "learning_rate": 2.874498036436649e-05, "loss": 0.7344, "step": 115175 }, { "epoch": 1.28, "learning_rate": 2.8744057637227982e-05, "loss": 0.6227, "step": 115180 }, { "epoch": 1.28, "learning_rate": 2.874313491008947e-05, "loss": 0.6839, "step": 115185 }, { "epoch": 1.28, "learning_rate": 2.8742212182950955e-05, "loss": 0.6681, "step": 115190 }, { "epoch": 1.28, "learning_rate": 2.8741289455812443e-05, "loss": 0.6568, "step": 115195 }, { "epoch": 1.28, "learning_rate": 2.8740366728673934e-05, "loss": 0.7027, "step": 115200 }, { "epoch": 1.28, "learning_rate": 2.8739444001535422e-05, "loss": 0.6413, "step": 115205 }, { "epoch": 1.28, "learning_rate": 2.8738521274396906e-05, "loss": 0.6637, "step": 115210 }, { "epoch": 1.28, "learning_rate": 2.8737598547258394e-05, "loss": 0.6789, "step": 115215 }, { "epoch": 1.28, "learning_rate": 2.873667582011988e-05, "loss": 0.6736, "step": 115220 }, { "epoch": 1.28, "learning_rate": 2.873575309298137e-05, "loss": 0.5992, "step": 115225 }, { "epoch": 1.28, "learning_rate": 2.8734830365842858e-05, "loss": 0.6867, "step": 115230 }, { "epoch": 1.28, "learning_rate": 2.8733907638704345e-05, "loss": 0.6881, "step": 115235 }, { "epoch": 1.28, "learning_rate": 2.873298491156583e-05, "loss": 0.6829, "step": 115240 }, { "epoch": 1.28, "learning_rate": 2.873206218442732e-05, "loss": 0.6636, "step": 115245 }, { "epoch": 1.28, "learning_rate": 2.873113945728881e-05, "loss": 0.6652, "step": 115250 }, { "epoch": 1.28, "learning_rate": 2.8730216730150293e-05, "loss": 0.6651, "step": 115255 }, { "epoch": 1.28, "learning_rate": 2.872929400301178e-05, "loss": 0.6816, "step": 115260 }, { "epoch": 1.28, "learning_rate": 2.8728371275873273e-05, "loss": 0.6445, "step": 115265 }, { "epoch": 1.28, "learning_rate": 2.872744854873476e-05, "loss": 0.6447, "step": 115270 }, { "epoch": 1.28, "learning_rate": 2.8726525821596245e-05, "loss": 0.6997, "step": 115275 }, { "epoch": 1.28, "learning_rate": 2.8725603094457733e-05, "loss": 0.7005, "step": 115280 }, { "epoch": 1.28, "learning_rate": 2.8724680367319224e-05, "loss": 0.6574, "step": 115285 }, { "epoch": 1.28, "learning_rate": 2.872375764018071e-05, "loss": 0.6754, "step": 115290 }, { "epoch": 1.28, "learning_rate": 2.8722834913042196e-05, "loss": 0.6761, "step": 115295 }, { "epoch": 1.28, "learning_rate": 2.872191218590368e-05, "loss": 0.6443, "step": 115300 }, { "epoch": 1.28, "learning_rate": 2.872098945876517e-05, "loss": 0.5977, "step": 115305 }, { "epoch": 1.28, "learning_rate": 2.872006673162666e-05, "loss": 0.6525, "step": 115310 }, { "epoch": 1.28, "learning_rate": 2.8719144004488148e-05, "loss": 0.657, "step": 115315 }, { "epoch": 1.28, "learning_rate": 2.8718221277349632e-05, "loss": 0.6664, "step": 115320 }, { "epoch": 1.28, "learning_rate": 2.871729855021112e-05, "loss": 0.6931, "step": 115325 }, { "epoch": 1.28, "learning_rate": 2.871637582307261e-05, "loss": 0.6887, "step": 115330 }, { "epoch": 1.28, "learning_rate": 2.8715453095934096e-05, "loss": 0.7376, "step": 115335 }, { "epoch": 1.28, "learning_rate": 2.8714530368795583e-05, "loss": 0.6876, "step": 115340 }, { "epoch": 1.28, "learning_rate": 2.871360764165707e-05, "loss": 0.6313, "step": 115345 }, { "epoch": 1.28, "learning_rate": 2.8712684914518563e-05, "loss": 0.6404, "step": 115350 }, { "epoch": 1.28, "learning_rate": 2.8711762187380047e-05, "loss": 0.6899, "step": 115355 }, { "epoch": 1.28, "learning_rate": 2.8710839460241535e-05, "loss": 0.7301, "step": 115360 }, { "epoch": 1.28, "learning_rate": 2.870991673310302e-05, "loss": 0.6355, "step": 115365 }, { "epoch": 1.28, "learning_rate": 2.8708994005964507e-05, "loss": 0.6906, "step": 115370 }, { "epoch": 1.28, "learning_rate": 2.8708071278826e-05, "loss": 0.6404, "step": 115375 }, { "epoch": 1.28, "learning_rate": 2.8707148551687486e-05, "loss": 0.6866, "step": 115380 }, { "epoch": 1.28, "learning_rate": 2.870622582454897e-05, "loss": 0.683, "step": 115385 }, { "epoch": 1.28, "learning_rate": 2.870530309741046e-05, "loss": 0.6698, "step": 115390 }, { "epoch": 1.28, "learning_rate": 2.870438037027195e-05, "loss": 0.7107, "step": 115395 }, { "epoch": 1.28, "learning_rate": 2.8703457643133434e-05, "loss": 0.6432, "step": 115400 }, { "epoch": 1.28, "learning_rate": 2.8702534915994922e-05, "loss": 0.6983, "step": 115405 }, { "epoch": 1.28, "learning_rate": 2.8701612188856407e-05, "loss": 0.6359, "step": 115410 }, { "epoch": 1.28, "learning_rate": 2.87006894617179e-05, "loss": 0.677, "step": 115415 }, { "epoch": 1.28, "learning_rate": 2.8699766734579386e-05, "loss": 0.6881, "step": 115420 }, { "epoch": 1.28, "learning_rate": 2.8698844007440874e-05, "loss": 0.6958, "step": 115425 }, { "epoch": 1.28, "learning_rate": 2.8697921280302358e-05, "loss": 0.6718, "step": 115430 }, { "epoch": 1.28, "learning_rate": 2.869699855316385e-05, "loss": 0.6859, "step": 115435 }, { "epoch": 1.28, "learning_rate": 2.8696075826025337e-05, "loss": 0.6497, "step": 115440 }, { "epoch": 1.28, "learning_rate": 2.8695153098886825e-05, "loss": 0.6197, "step": 115445 }, { "epoch": 1.28, "learning_rate": 2.869423037174831e-05, "loss": 0.6704, "step": 115450 }, { "epoch": 1.28, "learning_rate": 2.8693307644609797e-05, "loss": 0.6679, "step": 115455 }, { "epoch": 1.28, "learning_rate": 2.869238491747129e-05, "loss": 0.6654, "step": 115460 }, { "epoch": 1.28, "learning_rate": 2.8691462190332773e-05, "loss": 0.6991, "step": 115465 }, { "epoch": 1.28, "learning_rate": 2.869053946319426e-05, "loss": 0.6635, "step": 115470 }, { "epoch": 1.28, "learning_rate": 2.8689616736055745e-05, "loss": 0.6551, "step": 115475 }, { "epoch": 1.28, "learning_rate": 2.868869400891724e-05, "loss": 0.617, "step": 115480 }, { "epoch": 1.28, "learning_rate": 2.8687771281778724e-05, "loss": 0.6423, "step": 115485 }, { "epoch": 1.28, "learning_rate": 2.8686848554640212e-05, "loss": 0.6576, "step": 115490 }, { "epoch": 1.28, "learning_rate": 2.8685925827501697e-05, "loss": 0.6741, "step": 115495 }, { "epoch": 1.28, "learning_rate": 2.8685003100363188e-05, "loss": 0.6731, "step": 115500 }, { "epoch": 1.28, "learning_rate": 2.8684080373224676e-05, "loss": 0.635, "step": 115505 }, { "epoch": 1.28, "learning_rate": 2.868315764608616e-05, "loss": 0.6767, "step": 115510 }, { "epoch": 1.28, "learning_rate": 2.8682234918947648e-05, "loss": 0.6741, "step": 115515 }, { "epoch": 1.28, "learning_rate": 2.868131219180914e-05, "loss": 0.6669, "step": 115520 }, { "epoch": 1.28, "learning_rate": 2.8680389464670627e-05, "loss": 0.6273, "step": 115525 }, { "epoch": 1.28, "learning_rate": 2.867946673753211e-05, "loss": 0.6598, "step": 115530 }, { "epoch": 1.28, "learning_rate": 2.86785440103936e-05, "loss": 0.7431, "step": 115535 }, { "epoch": 1.28, "learning_rate": 2.8677621283255084e-05, "loss": 0.6568, "step": 115540 }, { "epoch": 1.28, "learning_rate": 2.867669855611658e-05, "loss": 0.6914, "step": 115545 }, { "epoch": 1.28, "learning_rate": 2.8675775828978063e-05, "loss": 0.6519, "step": 115550 }, { "epoch": 1.28, "learning_rate": 2.867485310183955e-05, "loss": 0.6309, "step": 115555 }, { "epoch": 1.28, "learning_rate": 2.8673930374701035e-05, "loss": 0.6197, "step": 115560 }, { "epoch": 1.28, "learning_rate": 2.8673007647562526e-05, "loss": 0.6274, "step": 115565 }, { "epoch": 1.28, "learning_rate": 2.8672084920424014e-05, "loss": 0.6678, "step": 115570 }, { "epoch": 1.28, "learning_rate": 2.86711621932855e-05, "loss": 0.6973, "step": 115575 }, { "epoch": 1.28, "learning_rate": 2.8670239466146987e-05, "loss": 0.6108, "step": 115580 }, { "epoch": 1.28, "learning_rate": 2.8669316739008478e-05, "loss": 0.5864, "step": 115585 }, { "epoch": 1.28, "learning_rate": 2.8668394011869966e-05, "loss": 0.6541, "step": 115590 }, { "epoch": 1.28, "learning_rate": 2.866747128473145e-05, "loss": 0.6511, "step": 115595 }, { "epoch": 1.28, "learning_rate": 2.8666548557592938e-05, "loss": 0.6405, "step": 115600 }, { "epoch": 1.28, "learning_rate": 2.8665625830454423e-05, "loss": 0.6373, "step": 115605 }, { "epoch": 1.28, "learning_rate": 2.8664703103315914e-05, "loss": 0.6118, "step": 115610 }, { "epoch": 1.28, "learning_rate": 2.86637803761774e-05, "loss": 0.6462, "step": 115615 }, { "epoch": 1.28, "learning_rate": 2.866285764903889e-05, "loss": 0.6164, "step": 115620 }, { "epoch": 1.28, "learning_rate": 2.8661934921900374e-05, "loss": 0.6087, "step": 115625 }, { "epoch": 1.28, "learning_rate": 2.8661012194761865e-05, "loss": 0.6727, "step": 115630 }, { "epoch": 1.28, "learning_rate": 2.8660089467623353e-05, "loss": 0.6387, "step": 115635 }, { "epoch": 1.28, "learning_rate": 2.8659166740484837e-05, "loss": 0.6444, "step": 115640 }, { "epoch": 1.28, "learning_rate": 2.8658244013346325e-05, "loss": 0.5901, "step": 115645 }, { "epoch": 1.28, "learning_rate": 2.8657321286207817e-05, "loss": 0.6622, "step": 115650 }, { "epoch": 1.28, "learning_rate": 2.8656398559069304e-05, "loss": 0.6057, "step": 115655 }, { "epoch": 1.28, "learning_rate": 2.865547583193079e-05, "loss": 0.7005, "step": 115660 }, { "epoch": 1.28, "learning_rate": 2.8654553104792277e-05, "loss": 0.6738, "step": 115665 }, { "epoch": 1.28, "learning_rate": 2.8653630377653768e-05, "loss": 0.7247, "step": 115670 }, { "epoch": 1.28, "learning_rate": 2.8652707650515252e-05, "loss": 0.6332, "step": 115675 }, { "epoch": 1.28, "learning_rate": 2.865178492337674e-05, "loss": 0.6374, "step": 115680 }, { "epoch": 1.28, "learning_rate": 2.8650862196238225e-05, "loss": 0.5935, "step": 115685 }, { "epoch": 1.28, "learning_rate": 2.8649939469099713e-05, "loss": 0.6594, "step": 115690 }, { "epoch": 1.28, "learning_rate": 2.8649016741961204e-05, "loss": 0.608, "step": 115695 }, { "epoch": 1.28, "learning_rate": 2.864809401482269e-05, "loss": 0.6929, "step": 115700 }, { "epoch": 1.28, "learning_rate": 2.8647171287684176e-05, "loss": 0.5927, "step": 115705 }, { "epoch": 1.28, "learning_rate": 2.8646248560545664e-05, "loss": 0.6522, "step": 115710 }, { "epoch": 1.28, "learning_rate": 2.8645325833407155e-05, "loss": 0.6231, "step": 115715 }, { "epoch": 1.28, "learning_rate": 2.864440310626864e-05, "loss": 0.6922, "step": 115720 }, { "epoch": 1.28, "learning_rate": 2.8643480379130127e-05, "loss": 0.692, "step": 115725 }, { "epoch": 1.28, "learning_rate": 2.8642557651991615e-05, "loss": 0.712, "step": 115730 }, { "epoch": 1.28, "learning_rate": 2.8641634924853107e-05, "loss": 0.6265, "step": 115735 }, { "epoch": 1.28, "learning_rate": 2.864071219771459e-05, "loss": 0.6901, "step": 115740 }, { "epoch": 1.28, "learning_rate": 2.863978947057608e-05, "loss": 0.644, "step": 115745 }, { "epoch": 1.28, "learning_rate": 2.8638866743437563e-05, "loss": 0.6778, "step": 115750 }, { "epoch": 1.28, "learning_rate": 2.863794401629905e-05, "loss": 0.6739, "step": 115755 }, { "epoch": 1.28, "learning_rate": 2.8637021289160542e-05, "loss": 0.6845, "step": 115760 }, { "epoch": 1.28, "learning_rate": 2.863609856202203e-05, "loss": 0.6415, "step": 115765 }, { "epoch": 1.28, "learning_rate": 2.8635175834883515e-05, "loss": 0.6907, "step": 115770 }, { "epoch": 1.28, "learning_rate": 2.8634253107745003e-05, "loss": 0.6844, "step": 115775 }, { "epoch": 1.28, "learning_rate": 2.8633330380606494e-05, "loss": 0.6603, "step": 115780 }, { "epoch": 1.28, "learning_rate": 2.8632407653467978e-05, "loss": 0.7076, "step": 115785 }, { "epoch": 1.28, "learning_rate": 2.8631484926329466e-05, "loss": 0.6601, "step": 115790 }, { "epoch": 1.28, "learning_rate": 2.863056219919095e-05, "loss": 0.6507, "step": 115795 }, { "epoch": 1.28, "learning_rate": 2.8629639472052445e-05, "loss": 0.6599, "step": 115800 }, { "epoch": 1.28, "learning_rate": 2.862871674491393e-05, "loss": 0.7179, "step": 115805 }, { "epoch": 1.28, "learning_rate": 2.8627794017775418e-05, "loss": 0.6984, "step": 115810 }, { "epoch": 1.28, "learning_rate": 2.8626871290636902e-05, "loss": 0.6304, "step": 115815 }, { "epoch": 1.28, "learning_rate": 2.8625948563498393e-05, "loss": 0.687, "step": 115820 }, { "epoch": 1.28, "learning_rate": 2.862502583635988e-05, "loss": 0.6629, "step": 115825 }, { "epoch": 1.28, "learning_rate": 2.862410310922137e-05, "loss": 0.6413, "step": 115830 }, { "epoch": 1.28, "learning_rate": 2.8623180382082853e-05, "loss": 0.6362, "step": 115835 }, { "epoch": 1.28, "learning_rate": 2.862225765494434e-05, "loss": 0.6709, "step": 115840 }, { "epoch": 1.28, "learning_rate": 2.8621334927805832e-05, "loss": 0.6414, "step": 115845 }, { "epoch": 1.28, "learning_rate": 2.8620412200667317e-05, "loss": 0.6687, "step": 115850 }, { "epoch": 1.28, "learning_rate": 2.8619489473528805e-05, "loss": 0.6362, "step": 115855 }, { "epoch": 1.28, "learning_rate": 2.861856674639029e-05, "loss": 0.6644, "step": 115860 }, { "epoch": 1.28, "learning_rate": 2.8617644019251784e-05, "loss": 0.6461, "step": 115865 }, { "epoch": 1.28, "learning_rate": 2.861672129211327e-05, "loss": 0.667, "step": 115870 }, { "epoch": 1.28, "learning_rate": 2.8615798564974756e-05, "loss": 0.6594, "step": 115875 }, { "epoch": 1.28, "learning_rate": 2.861487583783624e-05, "loss": 0.6177, "step": 115880 }, { "epoch": 1.28, "learning_rate": 2.8613953110697732e-05, "loss": 0.6308, "step": 115885 }, { "epoch": 1.28, "learning_rate": 2.861303038355922e-05, "loss": 0.6999, "step": 115890 }, { "epoch": 1.28, "learning_rate": 2.8612107656420704e-05, "loss": 0.6062, "step": 115895 }, { "epoch": 1.28, "learning_rate": 2.8611184929282192e-05, "loss": 0.622, "step": 115900 }, { "epoch": 1.28, "learning_rate": 2.861026220214368e-05, "loss": 0.6585, "step": 115905 }, { "epoch": 1.28, "learning_rate": 2.860933947500517e-05, "loss": 0.6477, "step": 115910 }, { "epoch": 1.28, "learning_rate": 2.8608416747866656e-05, "loss": 0.6256, "step": 115915 }, { "epoch": 1.28, "learning_rate": 2.8607494020728143e-05, "loss": 0.6511, "step": 115920 }, { "epoch": 1.28, "learning_rate": 2.8606571293589628e-05, "loss": 0.6934, "step": 115925 }, { "epoch": 1.28, "learning_rate": 2.8605648566451123e-05, "loss": 0.5863, "step": 115930 }, { "epoch": 1.28, "learning_rate": 2.8604725839312607e-05, "loss": 0.6677, "step": 115935 }, { "epoch": 1.28, "learning_rate": 2.8603803112174095e-05, "loss": 0.6071, "step": 115940 }, { "epoch": 1.28, "learning_rate": 2.860288038503558e-05, "loss": 0.6497, "step": 115945 }, { "epoch": 1.28, "learning_rate": 2.860195765789707e-05, "loss": 0.6771, "step": 115950 }, { "epoch": 1.28, "learning_rate": 2.860103493075856e-05, "loss": 0.6446, "step": 115955 }, { "epoch": 1.28, "learning_rate": 2.8600112203620043e-05, "loss": 0.6235, "step": 115960 }, { "epoch": 1.28, "learning_rate": 2.859918947648153e-05, "loss": 0.6437, "step": 115965 }, { "epoch": 1.28, "learning_rate": 2.8598266749343022e-05, "loss": 0.6939, "step": 115970 }, { "epoch": 1.28, "learning_rate": 2.859734402220451e-05, "loss": 0.6636, "step": 115975 }, { "epoch": 1.28, "learning_rate": 2.8596421295065994e-05, "loss": 0.6335, "step": 115980 }, { "epoch": 1.28, "learning_rate": 2.8595498567927482e-05, "loss": 0.6191, "step": 115985 }, { "epoch": 1.28, "learning_rate": 2.8594575840788967e-05, "loss": 0.6936, "step": 115990 }, { "epoch": 1.28, "learning_rate": 2.8593653113650458e-05, "loss": 0.6545, "step": 115995 }, { "epoch": 1.28, "learning_rate": 2.8592730386511946e-05, "loss": 0.6615, "step": 116000 }, { "epoch": 1.28, "eval_loss": 0.6298059225082397, "eval_runtime": 69.271, "eval_samples_per_second": 28.872, "eval_steps_per_second": 14.436, "step": 116000 }, { "epoch": 1.28, "learning_rate": 2.8591807659373433e-05, "loss": 0.6712, "step": 116005 }, { "epoch": 1.28, "learning_rate": 2.8590884932234918e-05, "loss": 0.6497, "step": 116010 }, { "epoch": 1.28, "learning_rate": 2.858996220509641e-05, "loss": 0.6083, "step": 116015 }, { "epoch": 1.28, "learning_rate": 2.8589039477957897e-05, "loss": 0.6628, "step": 116020 }, { "epoch": 1.28, "learning_rate": 2.858811675081938e-05, "loss": 0.6772, "step": 116025 }, { "epoch": 1.28, "learning_rate": 2.858719402368087e-05, "loss": 0.6905, "step": 116030 }, { "epoch": 1.28, "learning_rate": 2.858627129654236e-05, "loss": 0.678, "step": 116035 }, { "epoch": 1.28, "learning_rate": 2.858534856940385e-05, "loss": 0.6237, "step": 116040 }, { "epoch": 1.28, "learning_rate": 2.8584425842265333e-05, "loss": 0.6161, "step": 116045 }, { "epoch": 1.28, "learning_rate": 2.858350311512682e-05, "loss": 0.6698, "step": 116050 }, { "epoch": 1.29, "learning_rate": 2.8582580387988305e-05, "loss": 0.6675, "step": 116055 }, { "epoch": 1.29, "learning_rate": 2.8581657660849796e-05, "loss": 0.6637, "step": 116060 }, { "epoch": 1.29, "learning_rate": 2.8580734933711284e-05, "loss": 0.6343, "step": 116065 }, { "epoch": 1.29, "learning_rate": 2.857981220657277e-05, "loss": 0.7165, "step": 116070 }, { "epoch": 1.29, "learning_rate": 2.8578889479434257e-05, "loss": 0.6425, "step": 116075 }, { "epoch": 1.29, "learning_rate": 2.8577966752295748e-05, "loss": 0.6436, "step": 116080 }, { "epoch": 1.29, "learning_rate": 2.8577044025157236e-05, "loss": 0.6535, "step": 116085 }, { "epoch": 1.29, "learning_rate": 2.857612129801872e-05, "loss": 0.6992, "step": 116090 }, { "epoch": 1.29, "learning_rate": 2.8575198570880208e-05, "loss": 0.7107, "step": 116095 }, { "epoch": 1.29, "learning_rate": 2.85742758437417e-05, "loss": 0.6332, "step": 116100 }, { "epoch": 1.29, "learning_rate": 2.8573353116603187e-05, "loss": 0.6584, "step": 116105 }, { "epoch": 1.29, "learning_rate": 2.857243038946467e-05, "loss": 0.7196, "step": 116110 }, { "epoch": 1.29, "learning_rate": 2.857150766232616e-05, "loss": 0.6975, "step": 116115 }, { "epoch": 1.29, "learning_rate": 2.857058493518765e-05, "loss": 0.6836, "step": 116120 }, { "epoch": 1.29, "learning_rate": 2.8569662208049135e-05, "loss": 0.6762, "step": 116125 }, { "epoch": 1.29, "learning_rate": 2.8568739480910623e-05, "loss": 0.6492, "step": 116130 }, { "epoch": 1.29, "learning_rate": 2.8567816753772107e-05, "loss": 0.6261, "step": 116135 }, { "epoch": 1.29, "learning_rate": 2.8566894026633595e-05, "loss": 0.6993, "step": 116140 }, { "epoch": 1.29, "learning_rate": 2.8565971299495086e-05, "loss": 0.6796, "step": 116145 }, { "epoch": 1.29, "learning_rate": 2.8565048572356574e-05, "loss": 0.6238, "step": 116150 }, { "epoch": 1.29, "learning_rate": 2.856412584521806e-05, "loss": 0.6567, "step": 116155 }, { "epoch": 1.29, "learning_rate": 2.8563203118079547e-05, "loss": 0.6392, "step": 116160 }, { "epoch": 1.29, "learning_rate": 2.8562280390941038e-05, "loss": 0.6557, "step": 116165 }, { "epoch": 1.29, "learning_rate": 2.8561357663802522e-05, "loss": 0.6888, "step": 116170 }, { "epoch": 1.29, "learning_rate": 2.856043493666401e-05, "loss": 0.6217, "step": 116175 }, { "epoch": 1.29, "learning_rate": 2.8559512209525498e-05, "loss": 0.6612, "step": 116180 }, { "epoch": 1.29, "learning_rate": 2.855858948238699e-05, "loss": 0.6032, "step": 116185 }, { "epoch": 1.29, "learning_rate": 2.8557666755248474e-05, "loss": 0.6267, "step": 116190 }, { "epoch": 1.29, "learning_rate": 2.855674402810996e-05, "loss": 0.6377, "step": 116195 }, { "epoch": 1.29, "learning_rate": 2.8555821300971446e-05, "loss": 0.708, "step": 116200 }, { "epoch": 1.29, "learning_rate": 2.8554898573832934e-05, "loss": 0.6473, "step": 116205 }, { "epoch": 1.29, "learning_rate": 2.8553975846694425e-05, "loss": 0.6902, "step": 116210 }, { "epoch": 1.29, "learning_rate": 2.8553053119555913e-05, "loss": 0.6554, "step": 116215 }, { "epoch": 1.29, "learning_rate": 2.8552130392417397e-05, "loss": 0.6356, "step": 116220 }, { "epoch": 1.29, "learning_rate": 2.8551207665278885e-05, "loss": 0.6331, "step": 116225 }, { "epoch": 1.29, "learning_rate": 2.8550284938140376e-05, "loss": 0.6663, "step": 116230 }, { "epoch": 1.29, "learning_rate": 2.854936221100186e-05, "loss": 0.6461, "step": 116235 }, { "epoch": 1.29, "learning_rate": 2.854843948386335e-05, "loss": 0.5497, "step": 116240 }, { "epoch": 1.29, "learning_rate": 2.8547516756724833e-05, "loss": 0.6995, "step": 116245 }, { "epoch": 1.29, "learning_rate": 2.8546594029586328e-05, "loss": 0.6121, "step": 116250 }, { "epoch": 1.29, "learning_rate": 2.8545671302447812e-05, "loss": 0.6959, "step": 116255 }, { "epoch": 1.29, "learning_rate": 2.85447485753093e-05, "loss": 0.5991, "step": 116260 }, { "epoch": 1.29, "learning_rate": 2.8543825848170785e-05, "loss": 0.613, "step": 116265 }, { "epoch": 1.29, "learning_rate": 2.8542903121032276e-05, "loss": 0.624, "step": 116270 }, { "epoch": 1.29, "learning_rate": 2.8541980393893764e-05, "loss": 0.6206, "step": 116275 }, { "epoch": 1.29, "learning_rate": 2.8541057666755248e-05, "loss": 0.7033, "step": 116280 }, { "epoch": 1.29, "learning_rate": 2.8540134939616736e-05, "loss": 0.6504, "step": 116285 }, { "epoch": 1.29, "learning_rate": 2.8539212212478224e-05, "loss": 0.6121, "step": 116290 }, { "epoch": 1.29, "learning_rate": 2.8538289485339715e-05, "loss": 0.6274, "step": 116295 }, { "epoch": 1.29, "learning_rate": 2.85373667582012e-05, "loss": 0.6588, "step": 116300 }, { "epoch": 1.29, "learning_rate": 2.8536444031062687e-05, "loss": 0.6365, "step": 116305 }, { "epoch": 1.29, "learning_rate": 2.8535521303924172e-05, "loss": 0.6711, "step": 116310 }, { "epoch": 1.29, "learning_rate": 2.8534598576785667e-05, "loss": 0.634, "step": 116315 }, { "epoch": 1.29, "learning_rate": 2.853367584964715e-05, "loss": 0.6163, "step": 116320 }, { "epoch": 1.29, "learning_rate": 2.853275312250864e-05, "loss": 0.6013, "step": 116325 }, { "epoch": 1.29, "learning_rate": 2.8531830395370123e-05, "loss": 0.6598, "step": 116330 }, { "epoch": 1.29, "learning_rate": 2.8530907668231615e-05, "loss": 0.6981, "step": 116335 }, { "epoch": 1.29, "learning_rate": 2.8529984941093102e-05, "loss": 0.6818, "step": 116340 }, { "epoch": 1.29, "learning_rate": 2.8529062213954587e-05, "loss": 0.6541, "step": 116345 }, { "epoch": 1.29, "learning_rate": 2.8528139486816075e-05, "loss": 0.683, "step": 116350 }, { "epoch": 1.29, "learning_rate": 2.8527216759677566e-05, "loss": 0.6615, "step": 116355 }, { "epoch": 1.29, "learning_rate": 2.8526294032539054e-05, "loss": 0.6576, "step": 116360 }, { "epoch": 1.29, "learning_rate": 2.8525371305400538e-05, "loss": 0.666, "step": 116365 }, { "epoch": 1.29, "learning_rate": 2.8524448578262026e-05, "loss": 0.6728, "step": 116370 }, { "epoch": 1.29, "learning_rate": 2.852352585112351e-05, "loss": 0.6363, "step": 116375 }, { "epoch": 1.29, "learning_rate": 2.8522603123985002e-05, "loss": 0.7043, "step": 116380 }, { "epoch": 1.29, "learning_rate": 2.852168039684649e-05, "loss": 0.6991, "step": 116385 }, { "epoch": 1.29, "learning_rate": 2.8520757669707977e-05, "loss": 0.6796, "step": 116390 }, { "epoch": 1.29, "learning_rate": 2.8519834942569462e-05, "loss": 0.6512, "step": 116395 }, { "epoch": 1.29, "learning_rate": 2.8518912215430953e-05, "loss": 0.6584, "step": 116400 }, { "epoch": 1.29, "learning_rate": 2.851798948829244e-05, "loss": 0.6376, "step": 116405 }, { "epoch": 1.29, "learning_rate": 2.8517066761153925e-05, "loss": 0.6052, "step": 116410 }, { "epoch": 1.29, "learning_rate": 2.8516144034015413e-05, "loss": 0.7311, "step": 116415 }, { "epoch": 1.29, "learning_rate": 2.8515221306876905e-05, "loss": 0.6944, "step": 116420 }, { "epoch": 1.29, "learning_rate": 2.8514298579738392e-05, "loss": 0.6685, "step": 116425 }, { "epoch": 1.29, "learning_rate": 2.8513375852599877e-05, "loss": 0.6681, "step": 116430 }, { "epoch": 1.29, "learning_rate": 2.8512453125461365e-05, "loss": 0.6138, "step": 116435 }, { "epoch": 1.29, "learning_rate": 2.851153039832285e-05, "loss": 0.6568, "step": 116440 }, { "epoch": 1.29, "learning_rate": 2.851060767118434e-05, "loss": 0.7373, "step": 116445 }, { "epoch": 1.29, "learning_rate": 2.8509684944045828e-05, "loss": 0.6583, "step": 116450 }, { "epoch": 1.29, "learning_rate": 2.8508762216907313e-05, "loss": 0.6318, "step": 116455 }, { "epoch": 1.29, "learning_rate": 2.85078394897688e-05, "loss": 0.6542, "step": 116460 }, { "epoch": 1.29, "learning_rate": 2.8506916762630292e-05, "loss": 0.6898, "step": 116465 }, { "epoch": 1.29, "learning_rate": 2.850599403549178e-05, "loss": 0.7523, "step": 116470 }, { "epoch": 1.29, "learning_rate": 2.8505071308353264e-05, "loss": 0.6532, "step": 116475 }, { "epoch": 1.29, "learning_rate": 2.8504148581214752e-05, "loss": 0.6552, "step": 116480 }, { "epoch": 1.29, "learning_rate": 2.8503225854076243e-05, "loss": 0.6296, "step": 116485 }, { "epoch": 1.29, "learning_rate": 2.850230312693773e-05, "loss": 0.6348, "step": 116490 }, { "epoch": 1.29, "learning_rate": 2.8501380399799216e-05, "loss": 0.6796, "step": 116495 }, { "epoch": 1.29, "learning_rate": 2.8500457672660703e-05, "loss": 0.6444, "step": 116500 }, { "epoch": 1.29, "learning_rate": 2.8499534945522195e-05, "loss": 0.5877, "step": 116505 }, { "epoch": 1.29, "learning_rate": 2.849861221838368e-05, "loss": 0.5602, "step": 116510 }, { "epoch": 1.29, "learning_rate": 2.8497689491245167e-05, "loss": 0.6747, "step": 116515 }, { "epoch": 1.29, "learning_rate": 2.849676676410665e-05, "loss": 0.6428, "step": 116520 }, { "epoch": 1.29, "learning_rate": 2.849584403696814e-05, "loss": 0.6843, "step": 116525 }, { "epoch": 1.29, "learning_rate": 2.849492130982963e-05, "loss": 0.6484, "step": 116530 }, { "epoch": 1.29, "learning_rate": 2.849399858269112e-05, "loss": 0.6316, "step": 116535 }, { "epoch": 1.29, "learning_rate": 2.8493075855552603e-05, "loss": 0.7049, "step": 116540 }, { "epoch": 1.29, "learning_rate": 2.849215312841409e-05, "loss": 0.6311, "step": 116545 }, { "epoch": 1.29, "learning_rate": 2.8491230401275582e-05, "loss": 0.6025, "step": 116550 }, { "epoch": 1.29, "learning_rate": 2.8490307674137066e-05, "loss": 0.6971, "step": 116555 }, { "epoch": 1.29, "learning_rate": 2.8489384946998554e-05, "loss": 0.6409, "step": 116560 }, { "epoch": 1.29, "learning_rate": 2.8488462219860042e-05, "loss": 0.6545, "step": 116565 }, { "epoch": 1.29, "learning_rate": 2.8487539492721533e-05, "loss": 0.6672, "step": 116570 }, { "epoch": 1.29, "learning_rate": 2.8486616765583018e-05, "loss": 0.7025, "step": 116575 }, { "epoch": 1.29, "learning_rate": 2.8485694038444506e-05, "loss": 0.6567, "step": 116580 }, { "epoch": 1.29, "learning_rate": 2.848477131130599e-05, "loss": 0.6648, "step": 116585 }, { "epoch": 1.29, "learning_rate": 2.8483848584167478e-05, "loss": 0.6646, "step": 116590 }, { "epoch": 1.29, "learning_rate": 2.848292585702897e-05, "loss": 0.6781, "step": 116595 }, { "epoch": 1.29, "learning_rate": 2.8482003129890457e-05, "loss": 0.6815, "step": 116600 }, { "epoch": 1.29, "learning_rate": 2.848108040275194e-05, "loss": 0.6473, "step": 116605 }, { "epoch": 1.29, "learning_rate": 2.848015767561343e-05, "loss": 0.6553, "step": 116610 }, { "epoch": 1.29, "learning_rate": 2.847923494847492e-05, "loss": 0.659, "step": 116615 }, { "epoch": 1.29, "learning_rate": 2.8478312221336405e-05, "loss": 0.5916, "step": 116620 }, { "epoch": 1.29, "learning_rate": 2.8477389494197893e-05, "loss": 0.644, "step": 116625 }, { "epoch": 1.29, "learning_rate": 2.8476466767059377e-05, "loss": 0.7221, "step": 116630 }, { "epoch": 1.29, "learning_rate": 2.8475544039920872e-05, "loss": 0.6619, "step": 116635 }, { "epoch": 1.29, "learning_rate": 2.8474621312782356e-05, "loss": 0.7111, "step": 116640 }, { "epoch": 1.29, "learning_rate": 2.8473698585643844e-05, "loss": 0.6432, "step": 116645 }, { "epoch": 1.29, "learning_rate": 2.847277585850533e-05, "loss": 0.6731, "step": 116650 }, { "epoch": 1.29, "learning_rate": 2.847185313136682e-05, "loss": 0.671, "step": 116655 }, { "epoch": 1.29, "learning_rate": 2.8470930404228308e-05, "loss": 0.6657, "step": 116660 }, { "epoch": 1.29, "learning_rate": 2.8470007677089792e-05, "loss": 0.6484, "step": 116665 }, { "epoch": 1.29, "learning_rate": 2.846908494995128e-05, "loss": 0.6717, "step": 116670 }, { "epoch": 1.29, "learning_rate": 2.8468162222812768e-05, "loss": 0.659, "step": 116675 }, { "epoch": 1.29, "learning_rate": 2.846723949567426e-05, "loss": 0.6594, "step": 116680 }, { "epoch": 1.29, "learning_rate": 2.8466316768535744e-05, "loss": 0.632, "step": 116685 }, { "epoch": 1.29, "learning_rate": 2.846539404139723e-05, "loss": 0.681, "step": 116690 }, { "epoch": 1.29, "learning_rate": 2.8464471314258716e-05, "loss": 0.6596, "step": 116695 }, { "epoch": 1.29, "learning_rate": 2.846354858712021e-05, "loss": 0.6442, "step": 116700 }, { "epoch": 1.29, "learning_rate": 2.8462625859981695e-05, "loss": 0.6687, "step": 116705 }, { "epoch": 1.29, "learning_rate": 2.8461703132843183e-05, "loss": 0.6754, "step": 116710 }, { "epoch": 1.29, "learning_rate": 2.8460780405704667e-05, "loss": 0.6213, "step": 116715 }, { "epoch": 1.29, "learning_rate": 2.845985767856616e-05, "loss": 0.6827, "step": 116720 }, { "epoch": 1.29, "learning_rate": 2.8458934951427646e-05, "loss": 0.6833, "step": 116725 }, { "epoch": 1.29, "learning_rate": 2.845801222428913e-05, "loss": 0.6335, "step": 116730 }, { "epoch": 1.29, "learning_rate": 2.845708949715062e-05, "loss": 0.6455, "step": 116735 }, { "epoch": 1.29, "learning_rate": 2.8456166770012103e-05, "loss": 0.6387, "step": 116740 }, { "epoch": 1.29, "learning_rate": 2.8455244042873598e-05, "loss": 0.7081, "step": 116745 }, { "epoch": 1.29, "learning_rate": 2.8454321315735082e-05, "loss": 0.7026, "step": 116750 }, { "epoch": 1.29, "learning_rate": 2.845339858859657e-05, "loss": 0.6789, "step": 116755 }, { "epoch": 1.29, "learning_rate": 2.8452475861458055e-05, "loss": 0.6869, "step": 116760 }, { "epoch": 1.29, "learning_rate": 2.8451553134319546e-05, "loss": 0.717, "step": 116765 }, { "epoch": 1.29, "learning_rate": 2.8450630407181034e-05, "loss": 0.6547, "step": 116770 }, { "epoch": 1.29, "learning_rate": 2.844970768004252e-05, "loss": 0.6698, "step": 116775 }, { "epoch": 1.29, "learning_rate": 2.8448784952904006e-05, "loss": 0.684, "step": 116780 }, { "epoch": 1.29, "learning_rate": 2.8447862225765497e-05, "loss": 0.7599, "step": 116785 }, { "epoch": 1.29, "learning_rate": 2.8446939498626985e-05, "loss": 0.6692, "step": 116790 }, { "epoch": 1.29, "learning_rate": 2.844601677148847e-05, "loss": 0.6425, "step": 116795 }, { "epoch": 1.29, "learning_rate": 2.8445094044349957e-05, "loss": 0.7303, "step": 116800 }, { "epoch": 1.29, "learning_rate": 2.844417131721145e-05, "loss": 0.6391, "step": 116805 }, { "epoch": 1.29, "learning_rate": 2.8443248590072936e-05, "loss": 0.6972, "step": 116810 }, { "epoch": 1.29, "learning_rate": 2.844232586293442e-05, "loss": 0.6917, "step": 116815 }, { "epoch": 1.29, "learning_rate": 2.844140313579591e-05, "loss": 0.6433, "step": 116820 }, { "epoch": 1.29, "learning_rate": 2.8440480408657393e-05, "loss": 0.6307, "step": 116825 }, { "epoch": 1.29, "learning_rate": 2.8439557681518884e-05, "loss": 0.6361, "step": 116830 }, { "epoch": 1.29, "learning_rate": 2.8438634954380372e-05, "loss": 0.7002, "step": 116835 }, { "epoch": 1.29, "learning_rate": 2.8437712227241857e-05, "loss": 0.6857, "step": 116840 }, { "epoch": 1.29, "learning_rate": 2.8436789500103345e-05, "loss": 0.6162, "step": 116845 }, { "epoch": 1.29, "learning_rate": 2.8435866772964836e-05, "loss": 0.6927, "step": 116850 }, { "epoch": 1.29, "learning_rate": 2.8434944045826324e-05, "loss": 0.6489, "step": 116855 }, { "epoch": 1.29, "learning_rate": 2.8434021318687808e-05, "loss": 0.6613, "step": 116860 }, { "epoch": 1.29, "learning_rate": 2.8433098591549296e-05, "loss": 0.6598, "step": 116865 }, { "epoch": 1.29, "learning_rate": 2.8432175864410787e-05, "loss": 0.6417, "step": 116870 }, { "epoch": 1.29, "learning_rate": 2.8431253137272275e-05, "loss": 0.6822, "step": 116875 }, { "epoch": 1.29, "learning_rate": 2.843033041013376e-05, "loss": 0.6707, "step": 116880 }, { "epoch": 1.29, "learning_rate": 2.8429407682995247e-05, "loss": 0.6393, "step": 116885 }, { "epoch": 1.29, "learning_rate": 2.8428484955856732e-05, "loss": 0.6817, "step": 116890 }, { "epoch": 1.29, "learning_rate": 2.8427562228718223e-05, "loss": 0.6901, "step": 116895 }, { "epoch": 1.29, "learning_rate": 2.842663950157971e-05, "loss": 0.6779, "step": 116900 }, { "epoch": 1.29, "learning_rate": 2.8425716774441195e-05, "loss": 0.585, "step": 116905 }, { "epoch": 1.29, "learning_rate": 2.8424794047302683e-05, "loss": 0.7116, "step": 116910 }, { "epoch": 1.29, "learning_rate": 2.8423871320164174e-05, "loss": 0.6474, "step": 116915 }, { "epoch": 1.29, "learning_rate": 2.8422948593025662e-05, "loss": 0.663, "step": 116920 }, { "epoch": 1.29, "learning_rate": 2.8422025865887147e-05, "loss": 0.721, "step": 116925 }, { "epoch": 1.29, "learning_rate": 2.8421103138748635e-05, "loss": 0.6451, "step": 116930 }, { "epoch": 1.29, "learning_rate": 2.8420180411610126e-05, "loss": 0.6891, "step": 116935 }, { "epoch": 1.29, "learning_rate": 2.841925768447161e-05, "loss": 0.7354, "step": 116940 }, { "epoch": 1.29, "learning_rate": 2.8418334957333098e-05, "loss": 0.6435, "step": 116945 }, { "epoch": 1.29, "learning_rate": 2.8417412230194586e-05, "loss": 0.6907, "step": 116950 }, { "epoch": 1.3, "learning_rate": 2.8416489503056077e-05, "loss": 0.6481, "step": 116955 }, { "epoch": 1.3, "learning_rate": 2.8415566775917562e-05, "loss": 0.617, "step": 116960 }, { "epoch": 1.3, "learning_rate": 2.841464404877905e-05, "loss": 0.5954, "step": 116965 }, { "epoch": 1.3, "learning_rate": 2.8413721321640534e-05, "loss": 0.6828, "step": 116970 }, { "epoch": 1.3, "learning_rate": 2.8412798594502022e-05, "loss": 0.6463, "step": 116975 }, { "epoch": 1.3, "learning_rate": 2.8411875867363513e-05, "loss": 0.626, "step": 116980 }, { "epoch": 1.3, "learning_rate": 2.8410953140225e-05, "loss": 0.6545, "step": 116985 }, { "epoch": 1.3, "learning_rate": 2.8410030413086485e-05, "loss": 0.6842, "step": 116990 }, { "epoch": 1.3, "learning_rate": 2.8409107685947973e-05, "loss": 0.7471, "step": 116995 }, { "epoch": 1.3, "learning_rate": 2.8408184958809465e-05, "loss": 0.6237, "step": 117000 }, { "epoch": 1.3, "eval_loss": 0.6233593821525574, "eval_runtime": 69.2564, "eval_samples_per_second": 28.878, "eval_steps_per_second": 14.439, "step": 117000 }, { "epoch": 1.3, "learning_rate": 2.840726223167095e-05, "loss": 0.6837, "step": 117005 }, { "epoch": 1.3, "learning_rate": 2.8406339504532437e-05, "loss": 0.6679, "step": 117010 }, { "epoch": 1.3, "learning_rate": 2.840541677739392e-05, "loss": 0.6315, "step": 117015 }, { "epoch": 1.3, "learning_rate": 2.8404494050255416e-05, "loss": 0.6955, "step": 117020 }, { "epoch": 1.3, "learning_rate": 2.84035713231169e-05, "loss": 0.6695, "step": 117025 }, { "epoch": 1.3, "learning_rate": 2.8402648595978388e-05, "loss": 0.6466, "step": 117030 }, { "epoch": 1.3, "learning_rate": 2.8401725868839873e-05, "loss": 0.6607, "step": 117035 }, { "epoch": 1.3, "learning_rate": 2.840080314170136e-05, "loss": 0.6929, "step": 117040 }, { "epoch": 1.3, "learning_rate": 2.8399880414562852e-05, "loss": 0.6655, "step": 117045 }, { "epoch": 1.3, "learning_rate": 2.8398957687424336e-05, "loss": 0.6348, "step": 117050 }, { "epoch": 1.3, "learning_rate": 2.8398034960285824e-05, "loss": 0.5861, "step": 117055 }, { "epoch": 1.3, "learning_rate": 2.8397112233147312e-05, "loss": 0.6891, "step": 117060 }, { "epoch": 1.3, "learning_rate": 2.8396189506008803e-05, "loss": 0.6492, "step": 117065 }, { "epoch": 1.3, "learning_rate": 2.8395266778870288e-05, "loss": 0.6726, "step": 117070 }, { "epoch": 1.3, "learning_rate": 2.8394344051731775e-05, "loss": 0.6852, "step": 117075 }, { "epoch": 1.3, "learning_rate": 2.839342132459326e-05, "loss": 0.6798, "step": 117080 }, { "epoch": 1.3, "learning_rate": 2.8392498597454755e-05, "loss": 0.6672, "step": 117085 }, { "epoch": 1.3, "learning_rate": 2.839157587031624e-05, "loss": 0.7037, "step": 117090 }, { "epoch": 1.3, "learning_rate": 2.8390653143177727e-05, "loss": 0.7145, "step": 117095 }, { "epoch": 1.3, "learning_rate": 2.838973041603921e-05, "loss": 0.6611, "step": 117100 }, { "epoch": 1.3, "learning_rate": 2.8388807688900703e-05, "loss": 0.6334, "step": 117105 }, { "epoch": 1.3, "learning_rate": 2.838788496176219e-05, "loss": 0.6967, "step": 117110 }, { "epoch": 1.3, "learning_rate": 2.8386962234623675e-05, "loss": 0.6367, "step": 117115 }, { "epoch": 1.3, "learning_rate": 2.8386039507485163e-05, "loss": 0.7302, "step": 117120 }, { "epoch": 1.3, "learning_rate": 2.838511678034665e-05, "loss": 0.6601, "step": 117125 }, { "epoch": 1.3, "learning_rate": 2.8384194053208142e-05, "loss": 0.6333, "step": 117130 }, { "epoch": 1.3, "learning_rate": 2.8383271326069626e-05, "loss": 0.6567, "step": 117135 }, { "epoch": 1.3, "learning_rate": 2.8382348598931114e-05, "loss": 0.6621, "step": 117140 }, { "epoch": 1.3, "learning_rate": 2.83814258717926e-05, "loss": 0.5912, "step": 117145 }, { "epoch": 1.3, "learning_rate": 2.838050314465409e-05, "loss": 0.6553, "step": 117150 }, { "epoch": 1.3, "learning_rate": 2.8379580417515578e-05, "loss": 0.6131, "step": 117155 }, { "epoch": 1.3, "learning_rate": 2.8378657690377066e-05, "loss": 0.683, "step": 117160 }, { "epoch": 1.3, "learning_rate": 2.837773496323855e-05, "loss": 0.5908, "step": 117165 }, { "epoch": 1.3, "learning_rate": 2.837681223610004e-05, "loss": 0.6747, "step": 117170 }, { "epoch": 1.3, "learning_rate": 2.837588950896153e-05, "loss": 0.737, "step": 117175 }, { "epoch": 1.3, "learning_rate": 2.8374966781823014e-05, "loss": 0.6024, "step": 117180 }, { "epoch": 1.3, "learning_rate": 2.83740440546845e-05, "loss": 0.6462, "step": 117185 }, { "epoch": 1.3, "learning_rate": 2.8373121327545993e-05, "loss": 0.6559, "step": 117190 }, { "epoch": 1.3, "learning_rate": 2.837219860040748e-05, "loss": 0.6354, "step": 117195 }, { "epoch": 1.3, "learning_rate": 2.8371275873268965e-05, "loss": 0.6537, "step": 117200 }, { "epoch": 1.3, "learning_rate": 2.8370353146130453e-05, "loss": 0.6394, "step": 117205 }, { "epoch": 1.3, "learning_rate": 2.8369430418991937e-05, "loss": 0.6401, "step": 117210 }, { "epoch": 1.3, "learning_rate": 2.836850769185343e-05, "loss": 0.6697, "step": 117215 }, { "epoch": 1.3, "learning_rate": 2.8367584964714916e-05, "loss": 0.672, "step": 117220 }, { "epoch": 1.3, "learning_rate": 2.83666622375764e-05, "loss": 0.6827, "step": 117225 }, { "epoch": 1.3, "learning_rate": 2.836573951043789e-05, "loss": 0.5966, "step": 117230 }, { "epoch": 1.3, "learning_rate": 2.836481678329938e-05, "loss": 0.6426, "step": 117235 }, { "epoch": 1.3, "learning_rate": 2.8363894056160868e-05, "loss": 0.6368, "step": 117240 }, { "epoch": 1.3, "learning_rate": 2.8362971329022352e-05, "loss": 0.6332, "step": 117245 }, { "epoch": 1.3, "learning_rate": 2.836204860188384e-05, "loss": 0.6786, "step": 117250 }, { "epoch": 1.3, "learning_rate": 2.836112587474533e-05, "loss": 0.6914, "step": 117255 }, { "epoch": 1.3, "learning_rate": 2.836020314760682e-05, "loss": 0.6282, "step": 117260 }, { "epoch": 1.3, "learning_rate": 2.8359280420468304e-05, "loss": 0.7109, "step": 117265 }, { "epoch": 1.3, "learning_rate": 2.835835769332979e-05, "loss": 0.712, "step": 117270 }, { "epoch": 1.3, "learning_rate": 2.8357434966191276e-05, "loss": 0.6859, "step": 117275 }, { "epoch": 1.3, "learning_rate": 2.8356512239052767e-05, "loss": 0.6146, "step": 117280 }, { "epoch": 1.3, "learning_rate": 2.8355589511914255e-05, "loss": 0.6652, "step": 117285 }, { "epoch": 1.3, "learning_rate": 2.835466678477574e-05, "loss": 0.6218, "step": 117290 }, { "epoch": 1.3, "learning_rate": 2.8353744057637227e-05, "loss": 0.6203, "step": 117295 }, { "epoch": 1.3, "learning_rate": 2.835282133049872e-05, "loss": 0.661, "step": 117300 }, { "epoch": 1.3, "learning_rate": 2.8351898603360206e-05, "loss": 0.6556, "step": 117305 }, { "epoch": 1.3, "learning_rate": 2.835097587622169e-05, "loss": 0.5977, "step": 117310 }, { "epoch": 1.3, "learning_rate": 2.835005314908318e-05, "loss": 0.6685, "step": 117315 }, { "epoch": 1.3, "learning_rate": 2.834913042194467e-05, "loss": 0.6544, "step": 117320 }, { "epoch": 1.3, "learning_rate": 2.8348207694806154e-05, "loss": 0.6776, "step": 117325 }, { "epoch": 1.3, "learning_rate": 2.8347284967667642e-05, "loss": 0.6066, "step": 117330 }, { "epoch": 1.3, "learning_rate": 2.834636224052913e-05, "loss": 0.6004, "step": 117335 }, { "epoch": 1.3, "learning_rate": 2.834543951339062e-05, "loss": 0.6245, "step": 117340 }, { "epoch": 1.3, "learning_rate": 2.8344516786252106e-05, "loss": 0.6943, "step": 117345 }, { "epoch": 1.3, "learning_rate": 2.8343594059113594e-05, "loss": 0.6849, "step": 117350 }, { "epoch": 1.3, "learning_rate": 2.8342671331975078e-05, "loss": 0.6551, "step": 117355 }, { "epoch": 1.3, "learning_rate": 2.8341748604836566e-05, "loss": 0.6189, "step": 117360 }, { "epoch": 1.3, "learning_rate": 2.8340825877698057e-05, "loss": 0.6952, "step": 117365 }, { "epoch": 1.3, "learning_rate": 2.8339903150559545e-05, "loss": 0.6163, "step": 117370 }, { "epoch": 1.3, "learning_rate": 2.833898042342103e-05, "loss": 0.6312, "step": 117375 }, { "epoch": 1.3, "learning_rate": 2.8338057696282517e-05, "loss": 0.724, "step": 117380 }, { "epoch": 1.3, "learning_rate": 2.833713496914401e-05, "loss": 0.6047, "step": 117385 }, { "epoch": 1.3, "learning_rate": 2.8336212242005493e-05, "loss": 0.6825, "step": 117390 }, { "epoch": 1.3, "learning_rate": 2.833528951486698e-05, "loss": 0.6647, "step": 117395 }, { "epoch": 1.3, "learning_rate": 2.8334366787728465e-05, "loss": 0.6543, "step": 117400 }, { "epoch": 1.3, "learning_rate": 2.833344406058996e-05, "loss": 0.6648, "step": 117405 }, { "epoch": 1.3, "learning_rate": 2.8332521333451444e-05, "loss": 0.6357, "step": 117410 }, { "epoch": 1.3, "learning_rate": 2.8331598606312932e-05, "loss": 0.6153, "step": 117415 }, { "epoch": 1.3, "learning_rate": 2.8330675879174417e-05, "loss": 0.6777, "step": 117420 }, { "epoch": 1.3, "learning_rate": 2.8329753152035905e-05, "loss": 0.6403, "step": 117425 }, { "epoch": 1.3, "learning_rate": 2.8328830424897396e-05, "loss": 0.6347, "step": 117430 }, { "epoch": 1.3, "learning_rate": 2.8327907697758884e-05, "loss": 0.6093, "step": 117435 }, { "epoch": 1.3, "learning_rate": 2.8326984970620368e-05, "loss": 0.6712, "step": 117440 }, { "epoch": 1.3, "learning_rate": 2.8326062243481856e-05, "loss": 0.6947, "step": 117445 }, { "epoch": 1.3, "learning_rate": 2.8325139516343347e-05, "loss": 0.6662, "step": 117450 }, { "epoch": 1.3, "learning_rate": 2.832421678920483e-05, "loss": 0.6615, "step": 117455 }, { "epoch": 1.3, "learning_rate": 2.832329406206632e-05, "loss": 0.6734, "step": 117460 }, { "epoch": 1.3, "learning_rate": 2.8322371334927804e-05, "loss": 0.6368, "step": 117465 }, { "epoch": 1.3, "learning_rate": 2.83214486077893e-05, "loss": 0.7033, "step": 117470 }, { "epoch": 1.3, "learning_rate": 2.8320525880650783e-05, "loss": 0.655, "step": 117475 }, { "epoch": 1.3, "learning_rate": 2.831960315351227e-05, "loss": 0.6516, "step": 117480 }, { "epoch": 1.3, "learning_rate": 2.8318680426373755e-05, "loss": 0.6383, "step": 117485 }, { "epoch": 1.3, "learning_rate": 2.8317757699235247e-05, "loss": 0.6188, "step": 117490 }, { "epoch": 1.3, "learning_rate": 2.8316834972096734e-05, "loss": 0.689, "step": 117495 }, { "epoch": 1.3, "learning_rate": 2.831591224495822e-05, "loss": 0.632, "step": 117500 }, { "epoch": 1.3, "learning_rate": 2.8314989517819707e-05, "loss": 0.6484, "step": 117505 }, { "epoch": 1.3, "learning_rate": 2.8314066790681195e-05, "loss": 0.6595, "step": 117510 }, { "epoch": 1.3, "learning_rate": 2.8313144063542686e-05, "loss": 0.6384, "step": 117515 }, { "epoch": 1.3, "learning_rate": 2.831222133640417e-05, "loss": 0.6554, "step": 117520 }, { "epoch": 1.3, "learning_rate": 2.8311298609265658e-05, "loss": 0.6186, "step": 117525 }, { "epoch": 1.3, "learning_rate": 2.8310375882127143e-05, "loss": 0.67, "step": 117530 }, { "epoch": 1.3, "learning_rate": 2.8309453154988634e-05, "loss": 0.6668, "step": 117535 }, { "epoch": 1.3, "learning_rate": 2.830853042785012e-05, "loss": 0.6737, "step": 117540 }, { "epoch": 1.3, "learning_rate": 2.830760770071161e-05, "loss": 0.7006, "step": 117545 }, { "epoch": 1.3, "learning_rate": 2.8306684973573094e-05, "loss": 0.6378, "step": 117550 }, { "epoch": 1.3, "learning_rate": 2.8305762246434585e-05, "loss": 0.6675, "step": 117555 }, { "epoch": 1.3, "learning_rate": 2.8304839519296073e-05, "loss": 0.7153, "step": 117560 }, { "epoch": 1.3, "learning_rate": 2.8303916792157558e-05, "loss": 0.6238, "step": 117565 }, { "epoch": 1.3, "learning_rate": 2.8302994065019045e-05, "loss": 0.5999, "step": 117570 }, { "epoch": 1.3, "learning_rate": 2.830207133788053e-05, "loss": 0.7007, "step": 117575 }, { "epoch": 1.3, "learning_rate": 2.8301148610742024e-05, "loss": 0.6828, "step": 117580 }, { "epoch": 1.3, "learning_rate": 2.830022588360351e-05, "loss": 0.6863, "step": 117585 }, { "epoch": 1.3, "learning_rate": 2.8299303156464997e-05, "loss": 0.6834, "step": 117590 }, { "epoch": 1.3, "learning_rate": 2.829838042932648e-05, "loss": 0.7163, "step": 117595 }, { "epoch": 1.3, "learning_rate": 2.8297457702187972e-05, "loss": 0.67, "step": 117600 }, { "epoch": 1.3, "learning_rate": 2.829653497504946e-05, "loss": 0.667, "step": 117605 }, { "epoch": 1.3, "learning_rate": 2.8295612247910945e-05, "loss": 0.6476, "step": 117610 }, { "epoch": 1.3, "learning_rate": 2.8294689520772433e-05, "loss": 0.6426, "step": 117615 }, { "epoch": 1.3, "learning_rate": 2.8293766793633924e-05, "loss": 0.6525, "step": 117620 }, { "epoch": 1.3, "learning_rate": 2.8292844066495412e-05, "loss": 0.6657, "step": 117625 }, { "epoch": 1.3, "learning_rate": 2.8291921339356896e-05, "loss": 0.6386, "step": 117630 }, { "epoch": 1.3, "learning_rate": 2.8290998612218384e-05, "loss": 0.6508, "step": 117635 }, { "epoch": 1.3, "learning_rate": 2.8290075885079875e-05, "loss": 0.6751, "step": 117640 }, { "epoch": 1.3, "learning_rate": 2.8289153157941363e-05, "loss": 0.6756, "step": 117645 }, { "epoch": 1.3, "learning_rate": 2.8288230430802848e-05, "loss": 0.6799, "step": 117650 }, { "epoch": 1.3, "learning_rate": 2.8287307703664335e-05, "loss": 0.6809, "step": 117655 }, { "epoch": 1.3, "learning_rate": 2.828638497652582e-05, "loss": 0.6453, "step": 117660 }, { "epoch": 1.3, "learning_rate": 2.828546224938731e-05, "loss": 0.6166, "step": 117665 }, { "epoch": 1.3, "learning_rate": 2.82845395222488e-05, "loss": 0.6413, "step": 117670 }, { "epoch": 1.3, "learning_rate": 2.8283616795110283e-05, "loss": 0.6532, "step": 117675 }, { "epoch": 1.3, "learning_rate": 2.828269406797177e-05, "loss": 0.6742, "step": 117680 }, { "epoch": 1.3, "learning_rate": 2.8281771340833263e-05, "loss": 0.6727, "step": 117685 }, { "epoch": 1.3, "learning_rate": 2.828084861369475e-05, "loss": 0.6515, "step": 117690 }, { "epoch": 1.3, "learning_rate": 2.8279925886556235e-05, "loss": 0.6384, "step": 117695 }, { "epoch": 1.3, "learning_rate": 2.8279003159417723e-05, "loss": 0.6463, "step": 117700 }, { "epoch": 1.3, "learning_rate": 2.8278080432279214e-05, "loss": 0.6636, "step": 117705 }, { "epoch": 1.3, "learning_rate": 2.82771577051407e-05, "loss": 0.6429, "step": 117710 }, { "epoch": 1.3, "learning_rate": 2.8276234978002186e-05, "loss": 0.6549, "step": 117715 }, { "epoch": 1.3, "learning_rate": 2.8275312250863674e-05, "loss": 0.6016, "step": 117720 }, { "epoch": 1.3, "learning_rate": 2.827438952372516e-05, "loss": 0.6183, "step": 117725 }, { "epoch": 1.3, "learning_rate": 2.827346679658665e-05, "loss": 0.5898, "step": 117730 }, { "epoch": 1.3, "learning_rate": 2.8272544069448138e-05, "loss": 0.5961, "step": 117735 }, { "epoch": 1.3, "learning_rate": 2.8271621342309622e-05, "loss": 0.6735, "step": 117740 }, { "epoch": 1.3, "learning_rate": 2.827069861517111e-05, "loss": 0.6706, "step": 117745 }, { "epoch": 1.3, "learning_rate": 2.82697758880326e-05, "loss": 0.6657, "step": 117750 }, { "epoch": 1.3, "learning_rate": 2.826885316089409e-05, "loss": 0.603, "step": 117755 }, { "epoch": 1.3, "learning_rate": 2.8267930433755573e-05, "loss": 0.5753, "step": 117760 }, { "epoch": 1.3, "learning_rate": 2.826700770661706e-05, "loss": 0.6296, "step": 117765 }, { "epoch": 1.3, "learning_rate": 2.8266084979478553e-05, "loss": 0.6266, "step": 117770 }, { "epoch": 1.3, "learning_rate": 2.8265162252340037e-05, "loss": 0.6357, "step": 117775 }, { "epoch": 1.3, "learning_rate": 2.8264239525201525e-05, "loss": 0.7345, "step": 117780 }, { "epoch": 1.3, "learning_rate": 2.826331679806301e-05, "loss": 0.6265, "step": 117785 }, { "epoch": 1.3, "learning_rate": 2.8262394070924504e-05, "loss": 0.6255, "step": 117790 }, { "epoch": 1.3, "learning_rate": 2.826147134378599e-05, "loss": 0.6104, "step": 117795 }, { "epoch": 1.3, "learning_rate": 2.8260548616647476e-05, "loss": 0.6063, "step": 117800 }, { "epoch": 1.3, "learning_rate": 2.825962588950896e-05, "loss": 0.5752, "step": 117805 }, { "epoch": 1.3, "learning_rate": 2.825870316237045e-05, "loss": 0.6781, "step": 117810 }, { "epoch": 1.3, "learning_rate": 2.825778043523194e-05, "loss": 0.6256, "step": 117815 }, { "epoch": 1.3, "learning_rate": 2.8256857708093428e-05, "loss": 0.6649, "step": 117820 }, { "epoch": 1.3, "learning_rate": 2.8255934980954912e-05, "loss": 0.6263, "step": 117825 }, { "epoch": 1.3, "learning_rate": 2.82550122538164e-05, "loss": 0.6476, "step": 117830 }, { "epoch": 1.3, "learning_rate": 2.825408952667789e-05, "loss": 0.624, "step": 117835 }, { "epoch": 1.3, "learning_rate": 2.8253166799539376e-05, "loss": 0.6795, "step": 117840 }, { "epoch": 1.3, "learning_rate": 2.8252244072400864e-05, "loss": 0.6592, "step": 117845 }, { "epoch": 1.3, "learning_rate": 2.8251321345262348e-05, "loss": 0.668, "step": 117850 }, { "epoch": 1.3, "learning_rate": 2.8250398618123843e-05, "loss": 0.6494, "step": 117855 }, { "epoch": 1.31, "learning_rate": 2.8249475890985327e-05, "loss": 0.7326, "step": 117860 }, { "epoch": 1.31, "learning_rate": 2.8248553163846815e-05, "loss": 0.6688, "step": 117865 }, { "epoch": 1.31, "learning_rate": 2.82476304367083e-05, "loss": 0.6424, "step": 117870 }, { "epoch": 1.31, "learning_rate": 2.8246707709569787e-05, "loss": 0.6619, "step": 117875 }, { "epoch": 1.31, "learning_rate": 2.824578498243128e-05, "loss": 0.6093, "step": 117880 }, { "epoch": 1.31, "learning_rate": 2.8244862255292763e-05, "loss": 0.6584, "step": 117885 }, { "epoch": 1.31, "learning_rate": 2.824393952815425e-05, "loss": 0.6562, "step": 117890 }, { "epoch": 1.31, "learning_rate": 2.824301680101574e-05, "loss": 0.6904, "step": 117895 }, { "epoch": 1.31, "learning_rate": 2.824209407387723e-05, "loss": 0.6666, "step": 117900 }, { "epoch": 1.31, "learning_rate": 2.8241171346738714e-05, "loss": 0.6626, "step": 117905 }, { "epoch": 1.31, "learning_rate": 2.8240248619600202e-05, "loss": 0.6505, "step": 117910 }, { "epoch": 1.31, "learning_rate": 2.8239325892461687e-05, "loss": 0.6637, "step": 117915 }, { "epoch": 1.31, "learning_rate": 2.8238403165323178e-05, "loss": 0.584, "step": 117920 }, { "epoch": 1.31, "learning_rate": 2.8237480438184666e-05, "loss": 0.6647, "step": 117925 }, { "epoch": 1.31, "learning_rate": 2.8236557711046154e-05, "loss": 0.6352, "step": 117930 }, { "epoch": 1.31, "learning_rate": 2.8235634983907638e-05, "loss": 0.6702, "step": 117935 }, { "epoch": 1.31, "learning_rate": 2.823471225676913e-05, "loss": 0.6761, "step": 117940 }, { "epoch": 1.31, "learning_rate": 2.8233789529630617e-05, "loss": 0.6574, "step": 117945 }, { "epoch": 1.31, "learning_rate": 2.82328668024921e-05, "loss": 0.6335, "step": 117950 }, { "epoch": 1.31, "learning_rate": 2.823194407535359e-05, "loss": 0.6485, "step": 117955 }, { "epoch": 1.31, "learning_rate": 2.8231021348215074e-05, "loss": 0.6493, "step": 117960 }, { "epoch": 1.31, "learning_rate": 2.823009862107657e-05, "loss": 0.6552, "step": 117965 }, { "epoch": 1.31, "learning_rate": 2.8229175893938053e-05, "loss": 0.7216, "step": 117970 }, { "epoch": 1.31, "learning_rate": 2.822825316679954e-05, "loss": 0.6358, "step": 117975 }, { "epoch": 1.31, "learning_rate": 2.8227330439661025e-05, "loss": 0.6429, "step": 117980 }, { "epoch": 1.31, "learning_rate": 2.8226407712522517e-05, "loss": 0.6725, "step": 117985 }, { "epoch": 1.31, "learning_rate": 2.8225484985384004e-05, "loss": 0.6651, "step": 117990 }, { "epoch": 1.31, "learning_rate": 2.822456225824549e-05, "loss": 0.6613, "step": 117995 }, { "epoch": 1.31, "learning_rate": 2.8223639531106977e-05, "loss": 0.6707, "step": 118000 }, { "epoch": 1.31, "eval_loss": 0.6266816258430481, "eval_runtime": 69.2677, "eval_samples_per_second": 28.873, "eval_steps_per_second": 14.437, "step": 118000 }, { "epoch": 1.31, "learning_rate": 2.8222716803968468e-05, "loss": 0.6575, "step": 118005 }, { "epoch": 1.31, "learning_rate": 2.8221794076829956e-05, "loss": 0.6225, "step": 118010 }, { "epoch": 1.31, "learning_rate": 2.822087134969144e-05, "loss": 0.6895, "step": 118015 }, { "epoch": 1.31, "learning_rate": 2.8219948622552928e-05, "loss": 0.671, "step": 118020 }, { "epoch": 1.31, "learning_rate": 2.821902589541442e-05, "loss": 0.6418, "step": 118025 }, { "epoch": 1.31, "learning_rate": 2.8218103168275907e-05, "loss": 0.6673, "step": 118030 }, { "epoch": 1.31, "learning_rate": 2.821718044113739e-05, "loss": 0.6417, "step": 118035 }, { "epoch": 1.31, "learning_rate": 2.821625771399888e-05, "loss": 0.6803, "step": 118040 }, { "epoch": 1.31, "learning_rate": 2.8215334986860364e-05, "loss": 0.6316, "step": 118045 }, { "epoch": 1.31, "learning_rate": 2.8214412259721855e-05, "loss": 0.6266, "step": 118050 }, { "epoch": 1.31, "learning_rate": 2.8213489532583343e-05, "loss": 0.6207, "step": 118055 }, { "epoch": 1.31, "learning_rate": 2.8212566805444827e-05, "loss": 0.6784, "step": 118060 }, { "epoch": 1.31, "learning_rate": 2.8211644078306315e-05, "loss": 0.6599, "step": 118065 }, { "epoch": 1.31, "learning_rate": 2.8210721351167807e-05, "loss": 0.6444, "step": 118070 }, { "epoch": 1.31, "learning_rate": 2.8209798624029294e-05, "loss": 0.6619, "step": 118075 }, { "epoch": 1.31, "learning_rate": 2.820887589689078e-05, "loss": 0.6895, "step": 118080 }, { "epoch": 1.31, "learning_rate": 2.8207953169752267e-05, "loss": 0.6984, "step": 118085 }, { "epoch": 1.31, "learning_rate": 2.8207030442613758e-05, "loss": 0.6102, "step": 118090 }, { "epoch": 1.31, "learning_rate": 2.8206107715475242e-05, "loss": 0.6214, "step": 118095 }, { "epoch": 1.31, "learning_rate": 2.820518498833673e-05, "loss": 0.6811, "step": 118100 }, { "epoch": 1.31, "learning_rate": 2.8204262261198218e-05, "loss": 0.7046, "step": 118105 }, { "epoch": 1.31, "learning_rate": 2.8203339534059703e-05, "loss": 0.6551, "step": 118110 }, { "epoch": 1.31, "learning_rate": 2.8202416806921194e-05, "loss": 0.6058, "step": 118115 }, { "epoch": 1.31, "learning_rate": 2.820149407978268e-05, "loss": 0.67, "step": 118120 }, { "epoch": 1.31, "learning_rate": 2.8200571352644166e-05, "loss": 0.6682, "step": 118125 }, { "epoch": 1.31, "learning_rate": 2.8199648625505654e-05, "loss": 0.6605, "step": 118130 }, { "epoch": 1.31, "learning_rate": 2.8198725898367145e-05, "loss": 0.6488, "step": 118135 }, { "epoch": 1.31, "learning_rate": 2.8197803171228633e-05, "loss": 0.6354, "step": 118140 }, { "epoch": 1.31, "learning_rate": 2.8196880444090118e-05, "loss": 0.6921, "step": 118145 }, { "epoch": 1.31, "learning_rate": 2.8195957716951605e-05, "loss": 0.6169, "step": 118150 }, { "epoch": 1.31, "learning_rate": 2.8195034989813097e-05, "loss": 0.6802, "step": 118155 }, { "epoch": 1.31, "learning_rate": 2.819411226267458e-05, "loss": 0.6249, "step": 118160 }, { "epoch": 1.31, "learning_rate": 2.819318953553607e-05, "loss": 0.591, "step": 118165 }, { "epoch": 1.31, "learning_rate": 2.8192266808397553e-05, "loss": 0.6754, "step": 118170 }, { "epoch": 1.31, "learning_rate": 2.8191344081259048e-05, "loss": 0.6919, "step": 118175 }, { "epoch": 1.31, "learning_rate": 2.8190421354120532e-05, "loss": 0.6361, "step": 118180 }, { "epoch": 1.31, "learning_rate": 2.818949862698202e-05, "loss": 0.6213, "step": 118185 }, { "epoch": 1.31, "learning_rate": 2.8188575899843505e-05, "loss": 0.6806, "step": 118190 }, { "epoch": 1.31, "learning_rate": 2.8187653172704993e-05, "loss": 0.6273, "step": 118195 }, { "epoch": 1.31, "learning_rate": 2.8186730445566484e-05, "loss": 0.6691, "step": 118200 }, { "epoch": 1.31, "learning_rate": 2.818580771842797e-05, "loss": 0.6286, "step": 118205 }, { "epoch": 1.31, "learning_rate": 2.8184884991289456e-05, "loss": 0.7101, "step": 118210 }, { "epoch": 1.31, "learning_rate": 2.8183962264150944e-05, "loss": 0.6153, "step": 118215 }, { "epoch": 1.31, "learning_rate": 2.8183039537012435e-05, "loss": 0.6855, "step": 118220 }, { "epoch": 1.31, "learning_rate": 2.818211680987392e-05, "loss": 0.628, "step": 118225 }, { "epoch": 1.31, "learning_rate": 2.8181194082735408e-05, "loss": 0.6673, "step": 118230 }, { "epoch": 1.31, "learning_rate": 2.8180271355596892e-05, "loss": 0.6104, "step": 118235 }, { "epoch": 1.31, "learning_rate": 2.8179348628458387e-05, "loss": 0.7152, "step": 118240 }, { "epoch": 1.31, "learning_rate": 2.817842590131987e-05, "loss": 0.688, "step": 118245 }, { "epoch": 1.31, "learning_rate": 2.817750317418136e-05, "loss": 0.6804, "step": 118250 }, { "epoch": 1.31, "learning_rate": 2.8176580447042843e-05, "loss": 0.6525, "step": 118255 }, { "epoch": 1.31, "learning_rate": 2.817565771990433e-05, "loss": 0.6423, "step": 118260 }, { "epoch": 1.31, "learning_rate": 2.8174734992765822e-05, "loss": 0.657, "step": 118265 }, { "epoch": 1.31, "learning_rate": 2.8173812265627307e-05, "loss": 0.6658, "step": 118270 }, { "epoch": 1.31, "learning_rate": 2.8172889538488795e-05, "loss": 0.7137, "step": 118275 }, { "epoch": 1.31, "learning_rate": 2.8171966811350283e-05, "loss": 0.6832, "step": 118280 }, { "epoch": 1.31, "learning_rate": 2.8171044084211774e-05, "loss": 0.6323, "step": 118285 }, { "epoch": 1.31, "learning_rate": 2.817012135707326e-05, "loss": 0.6725, "step": 118290 }, { "epoch": 1.31, "learning_rate": 2.8169198629934746e-05, "loss": 0.6649, "step": 118295 }, { "epoch": 1.31, "learning_rate": 2.816827590279623e-05, "loss": 0.6942, "step": 118300 }, { "epoch": 1.31, "learning_rate": 2.8167353175657725e-05, "loss": 0.6553, "step": 118305 }, { "epoch": 1.31, "learning_rate": 2.816643044851921e-05, "loss": 0.6775, "step": 118310 }, { "epoch": 1.31, "learning_rate": 2.8165507721380698e-05, "loss": 0.6407, "step": 118315 }, { "epoch": 1.31, "learning_rate": 2.8164584994242182e-05, "loss": 0.6427, "step": 118320 }, { "epoch": 1.31, "learning_rate": 2.8163662267103673e-05, "loss": 0.7132, "step": 118325 }, { "epoch": 1.31, "learning_rate": 2.816273953996516e-05, "loss": 0.6078, "step": 118330 }, { "epoch": 1.31, "learning_rate": 2.8161816812826646e-05, "loss": 0.6219, "step": 118335 }, { "epoch": 1.31, "learning_rate": 2.8160894085688133e-05, "loss": 0.6756, "step": 118340 }, { "epoch": 1.31, "learning_rate": 2.8159971358549618e-05, "loss": 0.682, "step": 118345 }, { "epoch": 1.31, "learning_rate": 2.8159048631411113e-05, "loss": 0.7162, "step": 118350 }, { "epoch": 1.31, "learning_rate": 2.8158125904272597e-05, "loss": 0.6882, "step": 118355 }, { "epoch": 1.31, "learning_rate": 2.8157203177134085e-05, "loss": 0.6817, "step": 118360 }, { "epoch": 1.31, "learning_rate": 2.815628044999557e-05, "loss": 0.6653, "step": 118365 }, { "epoch": 1.31, "learning_rate": 2.815535772285706e-05, "loss": 0.6554, "step": 118370 }, { "epoch": 1.31, "learning_rate": 2.815443499571855e-05, "loss": 0.6781, "step": 118375 }, { "epoch": 1.31, "learning_rate": 2.8153512268580036e-05, "loss": 0.6667, "step": 118380 }, { "epoch": 1.31, "learning_rate": 2.815258954144152e-05, "loss": 0.6324, "step": 118385 }, { "epoch": 1.31, "learning_rate": 2.8151666814303012e-05, "loss": 0.6856, "step": 118390 }, { "epoch": 1.31, "learning_rate": 2.81507440871645e-05, "loss": 0.6326, "step": 118395 }, { "epoch": 1.31, "learning_rate": 2.8149821360025984e-05, "loss": 0.6817, "step": 118400 }, { "epoch": 1.31, "learning_rate": 2.8148898632887472e-05, "loss": 0.6366, "step": 118405 }, { "epoch": 1.31, "learning_rate": 2.8147975905748957e-05, "loss": 0.688, "step": 118410 }, { "epoch": 1.31, "learning_rate": 2.814705317861045e-05, "loss": 0.6593, "step": 118415 }, { "epoch": 1.31, "learning_rate": 2.8146130451471936e-05, "loss": 0.6732, "step": 118420 }, { "epoch": 1.31, "learning_rate": 2.8145207724333423e-05, "loss": 0.6682, "step": 118425 }, { "epoch": 1.31, "learning_rate": 2.8144284997194908e-05, "loss": 0.666, "step": 118430 }, { "epoch": 1.31, "learning_rate": 2.81433622700564e-05, "loss": 0.7053, "step": 118435 }, { "epoch": 1.31, "learning_rate": 2.8142439542917887e-05, "loss": 0.6523, "step": 118440 }, { "epoch": 1.31, "learning_rate": 2.814151681577937e-05, "loss": 0.6933, "step": 118445 }, { "epoch": 1.31, "learning_rate": 2.814059408864086e-05, "loss": 0.6759, "step": 118450 }, { "epoch": 1.31, "learning_rate": 2.813967136150235e-05, "loss": 0.6494, "step": 118455 }, { "epoch": 1.31, "learning_rate": 2.813874863436384e-05, "loss": 0.6725, "step": 118460 }, { "epoch": 1.31, "learning_rate": 2.8137825907225323e-05, "loss": 0.6549, "step": 118465 }, { "epoch": 1.31, "learning_rate": 2.813690318008681e-05, "loss": 0.6783, "step": 118470 }, { "epoch": 1.31, "learning_rate": 2.8135980452948302e-05, "loss": 0.6153, "step": 118475 }, { "epoch": 1.31, "learning_rate": 2.8135057725809786e-05, "loss": 0.6352, "step": 118480 }, { "epoch": 1.31, "learning_rate": 2.8134134998671274e-05, "loss": 0.6515, "step": 118485 }, { "epoch": 1.31, "learning_rate": 2.8133212271532762e-05, "loss": 0.6855, "step": 118490 }, { "epoch": 1.31, "learning_rate": 2.8132289544394247e-05, "loss": 0.6449, "step": 118495 }, { "epoch": 1.31, "learning_rate": 2.8131366817255738e-05, "loss": 0.6677, "step": 118500 }, { "epoch": 1.31, "learning_rate": 2.8130444090117226e-05, "loss": 0.7126, "step": 118505 }, { "epoch": 1.31, "learning_rate": 2.812952136297871e-05, "loss": 0.6896, "step": 118510 }, { "epoch": 1.31, "learning_rate": 2.8128598635840198e-05, "loss": 0.7072, "step": 118515 }, { "epoch": 1.31, "learning_rate": 2.812767590870169e-05, "loss": 0.6473, "step": 118520 }, { "epoch": 1.31, "learning_rate": 2.8126753181563177e-05, "loss": 0.6878, "step": 118525 }, { "epoch": 1.31, "learning_rate": 2.812583045442466e-05, "loss": 0.6635, "step": 118530 }, { "epoch": 1.31, "learning_rate": 2.812490772728615e-05, "loss": 0.6591, "step": 118535 }, { "epoch": 1.31, "learning_rate": 2.812398500014764e-05, "loss": 0.6439, "step": 118540 }, { "epoch": 1.31, "learning_rate": 2.8123062273009125e-05, "loss": 0.7301, "step": 118545 }, { "epoch": 1.31, "learning_rate": 2.8122139545870613e-05, "loss": 0.6317, "step": 118550 }, { "epoch": 1.31, "learning_rate": 2.8121216818732097e-05, "loss": 0.665, "step": 118555 }, { "epoch": 1.31, "learning_rate": 2.8120294091593585e-05, "loss": 0.6849, "step": 118560 }, { "epoch": 1.31, "learning_rate": 2.8119371364455076e-05, "loss": 0.6793, "step": 118565 }, { "epoch": 1.31, "learning_rate": 2.8118448637316564e-05, "loss": 0.6579, "step": 118570 }, { "epoch": 1.31, "learning_rate": 2.811752591017805e-05, "loss": 0.7156, "step": 118575 }, { "epoch": 1.31, "learning_rate": 2.8116603183039537e-05, "loss": 0.6725, "step": 118580 }, { "epoch": 1.31, "learning_rate": 2.8115680455901028e-05, "loss": 0.6747, "step": 118585 }, { "epoch": 1.31, "learning_rate": 2.8114757728762516e-05, "loss": 0.646, "step": 118590 }, { "epoch": 1.31, "learning_rate": 2.8113835001624e-05, "loss": 0.6194, "step": 118595 }, { "epoch": 1.31, "learning_rate": 2.8112912274485488e-05, "loss": 0.6772, "step": 118600 }, { "epoch": 1.31, "learning_rate": 2.811198954734698e-05, "loss": 0.627, "step": 118605 }, { "epoch": 1.31, "learning_rate": 2.8111066820208464e-05, "loss": 0.6301, "step": 118610 }, { "epoch": 1.31, "learning_rate": 2.811014409306995e-05, "loss": 0.6617, "step": 118615 }, { "epoch": 1.31, "learning_rate": 2.8109221365931436e-05, "loss": 0.6711, "step": 118620 }, { "epoch": 1.31, "learning_rate": 2.810829863879293e-05, "loss": 0.6107, "step": 118625 }, { "epoch": 1.31, "learning_rate": 2.8107375911654415e-05, "loss": 0.7009, "step": 118630 }, { "epoch": 1.31, "learning_rate": 2.8106453184515903e-05, "loss": 0.6674, "step": 118635 }, { "epoch": 1.31, "learning_rate": 2.8105530457377387e-05, "loss": 0.6974, "step": 118640 }, { "epoch": 1.31, "learning_rate": 2.8104607730238875e-05, "loss": 0.6754, "step": 118645 }, { "epoch": 1.31, "learning_rate": 2.8103685003100367e-05, "loss": 0.6778, "step": 118650 }, { "epoch": 1.31, "learning_rate": 2.810276227596185e-05, "loss": 0.6327, "step": 118655 }, { "epoch": 1.31, "learning_rate": 2.810183954882334e-05, "loss": 0.7231, "step": 118660 }, { "epoch": 1.31, "learning_rate": 2.8100916821684827e-05, "loss": 0.6646, "step": 118665 }, { "epoch": 1.31, "learning_rate": 2.8099994094546318e-05, "loss": 0.6915, "step": 118670 }, { "epoch": 1.31, "learning_rate": 2.8099071367407802e-05, "loss": 0.6301, "step": 118675 }, { "epoch": 1.31, "learning_rate": 2.809814864026929e-05, "loss": 0.6203, "step": 118680 }, { "epoch": 1.31, "learning_rate": 2.8097225913130775e-05, "loss": 0.6954, "step": 118685 }, { "epoch": 1.31, "learning_rate": 2.809630318599227e-05, "loss": 0.7191, "step": 118690 }, { "epoch": 1.31, "learning_rate": 2.8095380458853754e-05, "loss": 0.6194, "step": 118695 }, { "epoch": 1.31, "learning_rate": 2.809445773171524e-05, "loss": 0.6611, "step": 118700 }, { "epoch": 1.31, "learning_rate": 2.8093535004576726e-05, "loss": 0.653, "step": 118705 }, { "epoch": 1.31, "learning_rate": 2.8092612277438214e-05, "loss": 0.7061, "step": 118710 }, { "epoch": 1.31, "learning_rate": 2.8091689550299705e-05, "loss": 0.6516, "step": 118715 }, { "epoch": 1.31, "learning_rate": 2.809076682316119e-05, "loss": 0.6518, "step": 118720 }, { "epoch": 1.31, "learning_rate": 2.8089844096022677e-05, "loss": 0.7066, "step": 118725 }, { "epoch": 1.31, "learning_rate": 2.8088921368884162e-05, "loss": 0.6077, "step": 118730 }, { "epoch": 1.31, "learning_rate": 2.8087998641745657e-05, "loss": 0.6698, "step": 118735 }, { "epoch": 1.31, "learning_rate": 2.808707591460714e-05, "loss": 0.6901, "step": 118740 }, { "epoch": 1.31, "learning_rate": 2.808615318746863e-05, "loss": 0.6446, "step": 118745 }, { "epoch": 1.31, "learning_rate": 2.8085230460330113e-05, "loss": 0.6181, "step": 118750 }, { "epoch": 1.31, "learning_rate": 2.8084307733191605e-05, "loss": 0.6288, "step": 118755 }, { "epoch": 1.31, "learning_rate": 2.8083385006053092e-05, "loss": 0.6695, "step": 118760 }, { "epoch": 1.32, "learning_rate": 2.808246227891458e-05, "loss": 0.6822, "step": 118765 }, { "epoch": 1.32, "learning_rate": 2.8081539551776065e-05, "loss": 0.6629, "step": 118770 }, { "epoch": 1.32, "learning_rate": 2.8080616824637556e-05, "loss": 0.7043, "step": 118775 }, { "epoch": 1.32, "learning_rate": 2.8079694097499044e-05, "loss": 0.6236, "step": 118780 }, { "epoch": 1.32, "learning_rate": 2.8078771370360528e-05, "loss": 0.6993, "step": 118785 }, { "epoch": 1.32, "learning_rate": 2.8077848643222016e-05, "loss": 0.6528, "step": 118790 }, { "epoch": 1.32, "learning_rate": 2.80769259160835e-05, "loss": 0.6307, "step": 118795 }, { "epoch": 1.32, "learning_rate": 2.8076003188944995e-05, "loss": 0.6919, "step": 118800 }, { "epoch": 1.32, "learning_rate": 2.807508046180648e-05, "loss": 0.7213, "step": 118805 }, { "epoch": 1.32, "learning_rate": 2.8074157734667967e-05, "loss": 0.6692, "step": 118810 }, { "epoch": 1.32, "learning_rate": 2.8073235007529452e-05, "loss": 0.6769, "step": 118815 }, { "epoch": 1.32, "learning_rate": 2.8072312280390943e-05, "loss": 0.68, "step": 118820 }, { "epoch": 1.32, "learning_rate": 2.807138955325243e-05, "loss": 0.6505, "step": 118825 }, { "epoch": 1.32, "learning_rate": 2.8070466826113916e-05, "loss": 0.6395, "step": 118830 }, { "epoch": 1.32, "learning_rate": 2.8069544098975403e-05, "loss": 0.6658, "step": 118835 }, { "epoch": 1.32, "learning_rate": 2.8068621371836895e-05, "loss": 0.6028, "step": 118840 }, { "epoch": 1.32, "learning_rate": 2.8067698644698382e-05, "loss": 0.56, "step": 118845 }, { "epoch": 1.32, "learning_rate": 2.8066775917559867e-05, "loss": 0.6692, "step": 118850 }, { "epoch": 1.32, "learning_rate": 2.8065853190421355e-05, "loss": 0.6496, "step": 118855 }, { "epoch": 1.32, "learning_rate": 2.8064930463282846e-05, "loss": 0.6493, "step": 118860 }, { "epoch": 1.32, "learning_rate": 2.806400773614433e-05, "loss": 0.6395, "step": 118865 }, { "epoch": 1.32, "learning_rate": 2.8063085009005818e-05, "loss": 0.6727, "step": 118870 }, { "epoch": 1.32, "learning_rate": 2.8062162281867306e-05, "loss": 0.6043, "step": 118875 }, { "epoch": 1.32, "learning_rate": 2.806123955472879e-05, "loss": 0.6643, "step": 118880 }, { "epoch": 1.32, "learning_rate": 2.8060316827590282e-05, "loss": 0.6584, "step": 118885 }, { "epoch": 1.32, "learning_rate": 2.805939410045177e-05, "loss": 0.6944, "step": 118890 }, { "epoch": 1.32, "learning_rate": 2.8058471373313254e-05, "loss": 0.6224, "step": 118895 }, { "epoch": 1.32, "learning_rate": 2.8057548646174742e-05, "loss": 0.7042, "step": 118900 }, { "epoch": 1.32, "learning_rate": 2.8056625919036233e-05, "loss": 0.6684, "step": 118905 }, { "epoch": 1.32, "learning_rate": 2.805570319189772e-05, "loss": 0.6055, "step": 118910 }, { "epoch": 1.32, "learning_rate": 2.8054780464759206e-05, "loss": 0.5922, "step": 118915 }, { "epoch": 1.32, "learning_rate": 2.8053857737620693e-05, "loss": 0.7162, "step": 118920 }, { "epoch": 1.32, "learning_rate": 2.8052935010482185e-05, "loss": 0.6519, "step": 118925 }, { "epoch": 1.32, "learning_rate": 2.805201228334367e-05, "loss": 0.628, "step": 118930 }, { "epoch": 1.32, "learning_rate": 2.8051089556205157e-05, "loss": 0.6379, "step": 118935 }, { "epoch": 1.32, "learning_rate": 2.805016682906664e-05, "loss": 0.6508, "step": 118940 }, { "epoch": 1.32, "learning_rate": 2.804924410192813e-05, "loss": 0.6399, "step": 118945 }, { "epoch": 1.32, "learning_rate": 2.804832137478962e-05, "loss": 0.6494, "step": 118950 }, { "epoch": 1.32, "learning_rate": 2.804739864765111e-05, "loss": 0.6799, "step": 118955 }, { "epoch": 1.32, "learning_rate": 2.8046475920512593e-05, "loss": 0.6277, "step": 118960 }, { "epoch": 1.32, "learning_rate": 2.804555319337408e-05, "loss": 0.627, "step": 118965 }, { "epoch": 1.32, "learning_rate": 2.8044630466235572e-05, "loss": 0.6946, "step": 118970 }, { "epoch": 1.32, "learning_rate": 2.804370773909706e-05, "loss": 0.5961, "step": 118975 }, { "epoch": 1.32, "learning_rate": 2.8042785011958544e-05, "loss": 0.6169, "step": 118980 }, { "epoch": 1.32, "learning_rate": 2.8041862284820032e-05, "loss": 0.6411, "step": 118985 }, { "epoch": 1.32, "learning_rate": 2.8040939557681523e-05, "loss": 0.6931, "step": 118990 }, { "epoch": 1.32, "learning_rate": 2.8040016830543008e-05, "loss": 0.6349, "step": 118995 }, { "epoch": 1.32, "learning_rate": 2.8039094103404496e-05, "loss": 0.6823, "step": 119000 }, { "epoch": 1.32, "eval_loss": 0.6299309134483337, "eval_runtime": 69.2602, "eval_samples_per_second": 28.877, "eval_steps_per_second": 14.438, "step": 119000 }, { "epoch": 1.32, "learning_rate": 2.803817137626598e-05, "loss": 0.6419, "step": 119005 }, { "epoch": 1.32, "learning_rate": 2.8037248649127475e-05, "loss": 0.6433, "step": 119010 }, { "epoch": 1.32, "learning_rate": 2.803632592198896e-05, "loss": 0.7219, "step": 119015 }, { "epoch": 1.32, "learning_rate": 2.8035403194850447e-05, "loss": 0.6448, "step": 119020 }, { "epoch": 1.32, "learning_rate": 2.803448046771193e-05, "loss": 0.6816, "step": 119025 }, { "epoch": 1.32, "learning_rate": 2.803355774057342e-05, "loss": 0.6754, "step": 119030 }, { "epoch": 1.32, "learning_rate": 2.803263501343491e-05, "loss": 0.6598, "step": 119035 }, { "epoch": 1.32, "learning_rate": 2.8031712286296395e-05, "loss": 0.6552, "step": 119040 }, { "epoch": 1.32, "learning_rate": 2.8030789559157883e-05, "loss": 0.5918, "step": 119045 }, { "epoch": 1.32, "learning_rate": 2.802986683201937e-05, "loss": 0.6678, "step": 119050 }, { "epoch": 1.32, "learning_rate": 2.8028944104880862e-05, "loss": 0.6388, "step": 119055 }, { "epoch": 1.32, "learning_rate": 2.8028021377742346e-05, "loss": 0.6549, "step": 119060 }, { "epoch": 1.32, "learning_rate": 2.8027098650603834e-05, "loss": 0.6791, "step": 119065 }, { "epoch": 1.32, "learning_rate": 2.802617592346532e-05, "loss": 0.6743, "step": 119070 }, { "epoch": 1.32, "learning_rate": 2.8025253196326813e-05, "loss": 0.6429, "step": 119075 }, { "epoch": 1.32, "learning_rate": 2.8024330469188298e-05, "loss": 0.6725, "step": 119080 }, { "epoch": 1.32, "learning_rate": 2.8023407742049786e-05, "loss": 0.6725, "step": 119085 }, { "epoch": 1.32, "learning_rate": 2.802248501491127e-05, "loss": 0.5945, "step": 119090 }, { "epoch": 1.32, "learning_rate": 2.8021562287772758e-05, "loss": 0.5865, "step": 119095 }, { "epoch": 1.32, "learning_rate": 2.802063956063425e-05, "loss": 0.6565, "step": 119100 }, { "epoch": 1.32, "learning_rate": 2.8019716833495734e-05, "loss": 0.705, "step": 119105 }, { "epoch": 1.32, "learning_rate": 2.801879410635722e-05, "loss": 0.635, "step": 119110 }, { "epoch": 1.32, "learning_rate": 2.8017871379218706e-05, "loss": 0.581, "step": 119115 }, { "epoch": 1.32, "learning_rate": 2.80169486520802e-05, "loss": 0.6618, "step": 119120 }, { "epoch": 1.32, "learning_rate": 2.8016025924941685e-05, "loss": 0.6429, "step": 119125 }, { "epoch": 1.32, "learning_rate": 2.8015103197803173e-05, "loss": 0.6696, "step": 119130 }, { "epoch": 1.32, "learning_rate": 2.8014180470664657e-05, "loss": 0.6725, "step": 119135 }, { "epoch": 1.32, "learning_rate": 2.801325774352615e-05, "loss": 0.6653, "step": 119140 }, { "epoch": 1.32, "learning_rate": 2.8012335016387636e-05, "loss": 0.5885, "step": 119145 }, { "epoch": 1.32, "learning_rate": 2.8011412289249124e-05, "loss": 0.5949, "step": 119150 }, { "epoch": 1.32, "learning_rate": 2.801048956211061e-05, "loss": 0.6956, "step": 119155 }, { "epoch": 1.32, "learning_rate": 2.80095668349721e-05, "loss": 0.6901, "step": 119160 }, { "epoch": 1.32, "learning_rate": 2.8008644107833588e-05, "loss": 0.6118, "step": 119165 }, { "epoch": 1.32, "learning_rate": 2.8007721380695072e-05, "loss": 0.6205, "step": 119170 }, { "epoch": 1.32, "learning_rate": 2.800679865355656e-05, "loss": 0.6341, "step": 119175 }, { "epoch": 1.32, "learning_rate": 2.8005875926418045e-05, "loss": 0.6574, "step": 119180 }, { "epoch": 1.32, "learning_rate": 2.800495319927954e-05, "loss": 0.6957, "step": 119185 }, { "epoch": 1.32, "learning_rate": 2.8004030472141024e-05, "loss": 0.5923, "step": 119190 }, { "epoch": 1.32, "learning_rate": 2.800310774500251e-05, "loss": 0.5657, "step": 119195 }, { "epoch": 1.32, "learning_rate": 2.8002185017863996e-05, "loss": 0.6584, "step": 119200 }, { "epoch": 1.32, "learning_rate": 2.8001262290725487e-05, "loss": 0.6041, "step": 119205 }, { "epoch": 1.32, "learning_rate": 2.8000339563586975e-05, "loss": 0.6732, "step": 119210 }, { "epoch": 1.32, "learning_rate": 2.799941683644846e-05, "loss": 0.7049, "step": 119215 }, { "epoch": 1.32, "learning_rate": 2.7998494109309947e-05, "loss": 0.6948, "step": 119220 }, { "epoch": 1.32, "learning_rate": 2.799757138217144e-05, "loss": 0.6951, "step": 119225 }, { "epoch": 1.32, "learning_rate": 2.7996648655032926e-05, "loss": 0.6496, "step": 119230 }, { "epoch": 1.32, "learning_rate": 2.799572592789441e-05, "loss": 0.6855, "step": 119235 }, { "epoch": 1.32, "learning_rate": 2.79948032007559e-05, "loss": 0.6077, "step": 119240 }, { "epoch": 1.32, "learning_rate": 2.7993880473617383e-05, "loss": 0.6284, "step": 119245 }, { "epoch": 1.32, "learning_rate": 2.7992957746478874e-05, "loss": 0.6343, "step": 119250 }, { "epoch": 1.32, "learning_rate": 2.7992035019340362e-05, "loss": 0.6466, "step": 119255 }, { "epoch": 1.32, "learning_rate": 2.799111229220185e-05, "loss": 0.6501, "step": 119260 }, { "epoch": 1.32, "learning_rate": 2.7990189565063335e-05, "loss": 0.64, "step": 119265 }, { "epoch": 1.32, "learning_rate": 2.7989266837924826e-05, "loss": 0.67, "step": 119270 }, { "epoch": 1.32, "learning_rate": 2.7988344110786314e-05, "loss": 0.675, "step": 119275 }, { "epoch": 1.32, "learning_rate": 2.7987421383647798e-05, "loss": 0.6404, "step": 119280 }, { "epoch": 1.32, "learning_rate": 2.7986498656509286e-05, "loss": 0.676, "step": 119285 }, { "epoch": 1.32, "learning_rate": 2.7985575929370777e-05, "loss": 0.675, "step": 119290 }, { "epoch": 1.32, "learning_rate": 2.7984653202232265e-05, "loss": 0.6826, "step": 119295 }, { "epoch": 1.32, "learning_rate": 2.798373047509375e-05, "loss": 0.6756, "step": 119300 }, { "epoch": 1.32, "learning_rate": 2.7982807747955237e-05, "loss": 0.6251, "step": 119305 }, { "epoch": 1.32, "learning_rate": 2.798188502081673e-05, "loss": 0.6562, "step": 119310 }, { "epoch": 1.32, "learning_rate": 2.7980962293678213e-05, "loss": 0.6481, "step": 119315 }, { "epoch": 1.32, "learning_rate": 2.79800395665397e-05, "loss": 0.675, "step": 119320 }, { "epoch": 1.32, "learning_rate": 2.7979116839401185e-05, "loss": 0.6412, "step": 119325 }, { "epoch": 1.32, "learning_rate": 2.7978194112262673e-05, "loss": 0.6907, "step": 119330 }, { "epoch": 1.32, "learning_rate": 2.7977271385124165e-05, "loss": 0.6262, "step": 119335 }, { "epoch": 1.32, "learning_rate": 2.7976348657985652e-05, "loss": 0.6636, "step": 119340 }, { "epoch": 1.32, "learning_rate": 2.7975425930847137e-05, "loss": 0.6234, "step": 119345 }, { "epoch": 1.32, "learning_rate": 2.7974503203708625e-05, "loss": 0.6482, "step": 119350 }, { "epoch": 1.32, "learning_rate": 2.7973580476570116e-05, "loss": 0.6603, "step": 119355 }, { "epoch": 1.32, "learning_rate": 2.7972657749431604e-05, "loss": 0.6364, "step": 119360 }, { "epoch": 1.32, "learning_rate": 2.7971735022293088e-05, "loss": 0.6465, "step": 119365 }, { "epoch": 1.32, "learning_rate": 2.7970812295154576e-05, "loss": 0.6847, "step": 119370 }, { "epoch": 1.32, "learning_rate": 2.7969889568016067e-05, "loss": 0.6319, "step": 119375 }, { "epoch": 1.32, "learning_rate": 2.7968966840877552e-05, "loss": 0.6751, "step": 119380 }, { "epoch": 1.32, "learning_rate": 2.796804411373904e-05, "loss": 0.6464, "step": 119385 }, { "epoch": 1.32, "learning_rate": 2.7967121386600524e-05, "loss": 0.6606, "step": 119390 }, { "epoch": 1.32, "learning_rate": 2.7966198659462012e-05, "loss": 0.6252, "step": 119395 }, { "epoch": 1.32, "learning_rate": 2.7965275932323503e-05, "loss": 0.5668, "step": 119400 }, { "epoch": 1.32, "learning_rate": 2.796435320518499e-05, "loss": 0.6003, "step": 119405 }, { "epoch": 1.32, "learning_rate": 2.7963430478046475e-05, "loss": 0.6147, "step": 119410 }, { "epoch": 1.32, "learning_rate": 2.7962507750907963e-05, "loss": 0.7157, "step": 119415 }, { "epoch": 1.32, "learning_rate": 2.7961585023769455e-05, "loss": 0.6757, "step": 119420 }, { "epoch": 1.32, "learning_rate": 2.796066229663094e-05, "loss": 0.6204, "step": 119425 }, { "epoch": 1.32, "learning_rate": 2.7959739569492427e-05, "loss": 0.7051, "step": 119430 }, { "epoch": 1.32, "learning_rate": 2.7958816842353915e-05, "loss": 0.6899, "step": 119435 }, { "epoch": 1.32, "learning_rate": 2.7957894115215406e-05, "loss": 0.6939, "step": 119440 }, { "epoch": 1.32, "learning_rate": 2.795697138807689e-05, "loss": 0.6713, "step": 119445 }, { "epoch": 1.32, "learning_rate": 2.7956048660938378e-05, "loss": 0.6862, "step": 119450 }, { "epoch": 1.32, "learning_rate": 2.7955125933799863e-05, "loss": 0.608, "step": 119455 }, { "epoch": 1.32, "learning_rate": 2.7954203206661357e-05, "loss": 0.7331, "step": 119460 }, { "epoch": 1.32, "learning_rate": 2.7953280479522842e-05, "loss": 0.682, "step": 119465 }, { "epoch": 1.32, "learning_rate": 2.795235775238433e-05, "loss": 0.6565, "step": 119470 }, { "epoch": 1.32, "learning_rate": 2.7951435025245814e-05, "loss": 0.6265, "step": 119475 }, { "epoch": 1.32, "learning_rate": 2.7950512298107302e-05, "loss": 0.6529, "step": 119480 }, { "epoch": 1.32, "learning_rate": 2.7949589570968793e-05, "loss": 0.6929, "step": 119485 }, { "epoch": 1.32, "learning_rate": 2.7948666843830278e-05, "loss": 0.6793, "step": 119490 }, { "epoch": 1.32, "learning_rate": 2.7947744116691765e-05, "loss": 0.632, "step": 119495 }, { "epoch": 1.32, "learning_rate": 2.794682138955325e-05, "loss": 0.7098, "step": 119500 }, { "epoch": 1.32, "learning_rate": 2.7945898662414745e-05, "loss": 0.6928, "step": 119505 }, { "epoch": 1.32, "learning_rate": 2.794497593527623e-05, "loss": 0.6351, "step": 119510 }, { "epoch": 1.32, "learning_rate": 2.7944053208137717e-05, "loss": 0.6192, "step": 119515 }, { "epoch": 1.32, "learning_rate": 2.79431304809992e-05, "loss": 0.6477, "step": 119520 }, { "epoch": 1.32, "learning_rate": 2.7942207753860693e-05, "loss": 0.6293, "step": 119525 }, { "epoch": 1.32, "learning_rate": 2.794128502672218e-05, "loss": 0.6319, "step": 119530 }, { "epoch": 1.32, "learning_rate": 2.7940362299583668e-05, "loss": 0.637, "step": 119535 }, { "epoch": 1.32, "learning_rate": 2.7939439572445153e-05, "loss": 0.6565, "step": 119540 }, { "epoch": 1.32, "learning_rate": 2.793851684530664e-05, "loss": 0.6195, "step": 119545 }, { "epoch": 1.32, "learning_rate": 2.7937594118168132e-05, "loss": 0.6583, "step": 119550 }, { "epoch": 1.32, "learning_rate": 2.7936671391029616e-05, "loss": 0.6984, "step": 119555 }, { "epoch": 1.32, "learning_rate": 2.7935748663891104e-05, "loss": 0.6634, "step": 119560 }, { "epoch": 1.32, "learning_rate": 2.793482593675259e-05, "loss": 0.642, "step": 119565 }, { "epoch": 1.32, "learning_rate": 2.7933903209614083e-05, "loss": 0.6858, "step": 119570 }, { "epoch": 1.32, "learning_rate": 2.7932980482475568e-05, "loss": 0.6409, "step": 119575 }, { "epoch": 1.32, "learning_rate": 2.7932057755337056e-05, "loss": 0.7077, "step": 119580 }, { "epoch": 1.32, "learning_rate": 2.793113502819854e-05, "loss": 0.6303, "step": 119585 }, { "epoch": 1.32, "learning_rate": 2.793021230106003e-05, "loss": 0.6984, "step": 119590 }, { "epoch": 1.32, "learning_rate": 2.792928957392152e-05, "loss": 0.6624, "step": 119595 }, { "epoch": 1.32, "learning_rate": 2.7928366846783004e-05, "loss": 0.6157, "step": 119600 }, { "epoch": 1.32, "learning_rate": 2.792744411964449e-05, "loss": 0.682, "step": 119605 }, { "epoch": 1.32, "learning_rate": 2.7926521392505983e-05, "loss": 0.6157, "step": 119610 }, { "epoch": 1.32, "learning_rate": 2.792559866536747e-05, "loss": 0.6877, "step": 119615 }, { "epoch": 1.32, "learning_rate": 2.7924675938228955e-05, "loss": 0.6131, "step": 119620 }, { "epoch": 1.32, "learning_rate": 2.7923753211090443e-05, "loss": 0.7238, "step": 119625 }, { "epoch": 1.32, "learning_rate": 2.7922830483951927e-05, "loss": 0.68, "step": 119630 }, { "epoch": 1.32, "learning_rate": 2.7921907756813422e-05, "loss": 0.5985, "step": 119635 }, { "epoch": 1.32, "learning_rate": 2.7920985029674906e-05, "loss": 0.6552, "step": 119640 }, { "epoch": 1.32, "learning_rate": 2.7920062302536394e-05, "loss": 0.6577, "step": 119645 }, { "epoch": 1.32, "learning_rate": 2.791913957539788e-05, "loss": 0.6712, "step": 119650 }, { "epoch": 1.32, "learning_rate": 2.791821684825937e-05, "loss": 0.6935, "step": 119655 }, { "epoch": 1.32, "learning_rate": 2.7917294121120858e-05, "loss": 0.568, "step": 119660 }, { "epoch": 1.33, "learning_rate": 2.7916371393982342e-05, "loss": 0.6677, "step": 119665 }, { "epoch": 1.33, "learning_rate": 2.791544866684383e-05, "loss": 0.6491, "step": 119670 }, { "epoch": 1.33, "learning_rate": 2.791452593970532e-05, "loss": 0.6985, "step": 119675 }, { "epoch": 1.33, "learning_rate": 2.791360321256681e-05, "loss": 0.7102, "step": 119680 }, { "epoch": 1.33, "learning_rate": 2.7912680485428294e-05, "loss": 0.6301, "step": 119685 }, { "epoch": 1.33, "learning_rate": 2.791175775828978e-05, "loss": 0.7067, "step": 119690 }, { "epoch": 1.33, "learning_rate": 2.7910835031151273e-05, "loss": 0.6704, "step": 119695 }, { "epoch": 1.33, "learning_rate": 2.7909912304012757e-05, "loss": 0.6908, "step": 119700 }, { "epoch": 1.33, "learning_rate": 2.7908989576874245e-05, "loss": 0.6792, "step": 119705 }, { "epoch": 1.33, "learning_rate": 2.7908066849735733e-05, "loss": 0.6131, "step": 119710 }, { "epoch": 1.33, "learning_rate": 2.7907144122597217e-05, "loss": 0.6377, "step": 119715 }, { "epoch": 1.33, "learning_rate": 2.790622139545871e-05, "loss": 0.6244, "step": 119720 }, { "epoch": 1.33, "learning_rate": 2.7905298668320196e-05, "loss": 0.6675, "step": 119725 }, { "epoch": 1.33, "learning_rate": 2.790437594118168e-05, "loss": 0.7027, "step": 119730 }, { "epoch": 1.33, "learning_rate": 2.790345321404317e-05, "loss": 0.6998, "step": 119735 }, { "epoch": 1.33, "learning_rate": 2.790253048690466e-05, "loss": 0.6962, "step": 119740 }, { "epoch": 1.33, "learning_rate": 2.7901607759766148e-05, "loss": 0.7303, "step": 119745 }, { "epoch": 1.33, "learning_rate": 2.7900685032627632e-05, "loss": 0.6456, "step": 119750 }, { "epoch": 1.33, "learning_rate": 2.789976230548912e-05, "loss": 0.6422, "step": 119755 }, { "epoch": 1.33, "learning_rate": 2.789883957835061e-05, "loss": 0.644, "step": 119760 }, { "epoch": 1.33, "learning_rate": 2.7897916851212096e-05, "loss": 0.6592, "step": 119765 }, { "epoch": 1.33, "learning_rate": 2.7896994124073584e-05, "loss": 0.6394, "step": 119770 }, { "epoch": 1.33, "learning_rate": 2.7896071396935068e-05, "loss": 0.7078, "step": 119775 }, { "epoch": 1.33, "learning_rate": 2.7895148669796556e-05, "loss": 0.6783, "step": 119780 }, { "epoch": 1.33, "learning_rate": 2.7894225942658047e-05, "loss": 0.6849, "step": 119785 }, { "epoch": 1.33, "learning_rate": 2.7893303215519535e-05, "loss": 0.6732, "step": 119790 }, { "epoch": 1.33, "learning_rate": 2.789238048838102e-05, "loss": 0.6321, "step": 119795 }, { "epoch": 1.33, "learning_rate": 2.7891457761242507e-05, "loss": 0.6222, "step": 119800 }, { "epoch": 1.33, "learning_rate": 2.7890535034104e-05, "loss": 0.6661, "step": 119805 }, { "epoch": 1.33, "learning_rate": 2.7889612306965483e-05, "loss": 0.7124, "step": 119810 }, { "epoch": 1.33, "learning_rate": 2.788868957982697e-05, "loss": 0.6289, "step": 119815 }, { "epoch": 1.33, "learning_rate": 2.788776685268846e-05, "loss": 0.6967, "step": 119820 }, { "epoch": 1.33, "learning_rate": 2.788684412554995e-05, "loss": 0.7446, "step": 119825 }, { "epoch": 1.33, "learning_rate": 2.7885921398411434e-05, "loss": 0.683, "step": 119830 }, { "epoch": 1.33, "learning_rate": 2.7884998671272922e-05, "loss": 0.689, "step": 119835 }, { "epoch": 1.33, "learning_rate": 2.7884075944134407e-05, "loss": 0.6003, "step": 119840 }, { "epoch": 1.33, "learning_rate": 2.78831532169959e-05, "loss": 0.5794, "step": 119845 }, { "epoch": 1.33, "learning_rate": 2.7882230489857386e-05, "loss": 0.6732, "step": 119850 }, { "epoch": 1.33, "learning_rate": 2.7881307762718874e-05, "loss": 0.6279, "step": 119855 }, { "epoch": 1.33, "learning_rate": 2.7880385035580358e-05, "loss": 0.6639, "step": 119860 }, { "epoch": 1.33, "learning_rate": 2.7879462308441846e-05, "loss": 0.5771, "step": 119865 }, { "epoch": 1.33, "learning_rate": 2.7878539581303337e-05, "loss": 0.6483, "step": 119870 }, { "epoch": 1.33, "learning_rate": 2.787761685416482e-05, "loss": 0.6228, "step": 119875 }, { "epoch": 1.33, "learning_rate": 2.787669412702631e-05, "loss": 0.6389, "step": 119880 }, { "epoch": 1.33, "learning_rate": 2.7875771399887794e-05, "loss": 0.6774, "step": 119885 }, { "epoch": 1.33, "learning_rate": 2.787484867274929e-05, "loss": 0.6121, "step": 119890 }, { "epoch": 1.33, "learning_rate": 2.7873925945610773e-05, "loss": 0.6425, "step": 119895 }, { "epoch": 1.33, "learning_rate": 2.787300321847226e-05, "loss": 0.6581, "step": 119900 }, { "epoch": 1.33, "learning_rate": 2.7872080491333745e-05, "loss": 0.6718, "step": 119905 }, { "epoch": 1.33, "learning_rate": 2.7871157764195237e-05, "loss": 0.6885, "step": 119910 }, { "epoch": 1.33, "learning_rate": 2.7870235037056724e-05, "loss": 0.6296, "step": 119915 }, { "epoch": 1.33, "learning_rate": 2.7869312309918212e-05, "loss": 0.6832, "step": 119920 }, { "epoch": 1.33, "learning_rate": 2.7868389582779697e-05, "loss": 0.6323, "step": 119925 }, { "epoch": 1.33, "learning_rate": 2.7867466855641185e-05, "loss": 0.6707, "step": 119930 }, { "epoch": 1.33, "learning_rate": 2.7866544128502676e-05, "loss": 0.6352, "step": 119935 }, { "epoch": 1.33, "learning_rate": 2.786562140136416e-05, "loss": 0.6465, "step": 119940 }, { "epoch": 1.33, "learning_rate": 2.7864698674225648e-05, "loss": 0.686, "step": 119945 }, { "epoch": 1.33, "learning_rate": 2.7863775947087133e-05, "loss": 0.6218, "step": 119950 }, { "epoch": 1.33, "learning_rate": 2.7862853219948627e-05, "loss": 0.6356, "step": 119955 }, { "epoch": 1.33, "learning_rate": 2.7861930492810112e-05, "loss": 0.7128, "step": 119960 }, { "epoch": 1.33, "learning_rate": 2.78610077656716e-05, "loss": 0.6266, "step": 119965 }, { "epoch": 1.33, "learning_rate": 2.7860085038533084e-05, "loss": 0.638, "step": 119970 }, { "epoch": 1.33, "learning_rate": 2.7859162311394575e-05, "loss": 0.6445, "step": 119975 }, { "epoch": 1.33, "learning_rate": 2.7858239584256063e-05, "loss": 0.6382, "step": 119980 }, { "epoch": 1.33, "learning_rate": 2.7857316857117548e-05, "loss": 0.6422, "step": 119985 }, { "epoch": 1.33, "learning_rate": 2.7856394129979035e-05, "loss": 0.6447, "step": 119990 }, { "epoch": 1.33, "learning_rate": 2.7855471402840527e-05, "loss": 0.6904, "step": 119995 }, { "epoch": 1.33, "learning_rate": 2.7854548675702014e-05, "loss": 0.6333, "step": 120000 }, { "epoch": 1.33, "eval_loss": 0.6168709397315979, "eval_runtime": 69.2499, "eval_samples_per_second": 28.881, "eval_steps_per_second": 14.44, "step": 120000 }, { "epoch": 1.33, "learning_rate": 2.78536259485635e-05, "loss": 0.6638, "step": 120005 }, { "epoch": 1.33, "learning_rate": 2.7852703221424987e-05, "loss": 0.6957, "step": 120010 }, { "epoch": 1.33, "learning_rate": 2.785178049428647e-05, "loss": 0.6376, "step": 120015 }, { "epoch": 1.33, "learning_rate": 2.7850857767147966e-05, "loss": 0.6472, "step": 120020 }, { "epoch": 1.33, "learning_rate": 2.784993504000945e-05, "loss": 0.6432, "step": 120025 }, { "epoch": 1.33, "learning_rate": 2.7849012312870938e-05, "loss": 0.6502, "step": 120030 }, { "epoch": 1.33, "learning_rate": 2.7848089585732423e-05, "loss": 0.5987, "step": 120035 }, { "epoch": 1.33, "learning_rate": 2.7847166858593914e-05, "loss": 0.6664, "step": 120040 }, { "epoch": 1.33, "learning_rate": 2.7846244131455402e-05, "loss": 0.6235, "step": 120045 }, { "epoch": 1.33, "learning_rate": 2.7845321404316886e-05, "loss": 0.6384, "step": 120050 }, { "epoch": 1.33, "learning_rate": 2.7844398677178374e-05, "loss": 0.6744, "step": 120055 }, { "epoch": 1.33, "learning_rate": 2.7843475950039865e-05, "loss": 0.671, "step": 120060 }, { "epoch": 1.33, "learning_rate": 2.7842553222901353e-05, "loss": 0.6752, "step": 120065 }, { "epoch": 1.33, "learning_rate": 2.7841630495762838e-05, "loss": 0.6559, "step": 120070 }, { "epoch": 1.33, "learning_rate": 2.7840707768624325e-05, "loss": 0.6229, "step": 120075 }, { "epoch": 1.33, "learning_rate": 2.783978504148581e-05, "loss": 0.643, "step": 120080 }, { "epoch": 1.33, "learning_rate": 2.78388623143473e-05, "loss": 0.6921, "step": 120085 }, { "epoch": 1.33, "learning_rate": 2.783793958720879e-05, "loss": 0.6156, "step": 120090 }, { "epoch": 1.33, "learning_rate": 2.7837016860070277e-05, "loss": 0.62, "step": 120095 }, { "epoch": 1.33, "learning_rate": 2.783609413293176e-05, "loss": 0.6715, "step": 120100 }, { "epoch": 1.33, "learning_rate": 2.7835171405793253e-05, "loss": 0.6923, "step": 120105 }, { "epoch": 1.33, "learning_rate": 2.783424867865474e-05, "loss": 0.6613, "step": 120110 }, { "epoch": 1.33, "learning_rate": 2.7833325951516225e-05, "loss": 0.671, "step": 120115 }, { "epoch": 1.33, "learning_rate": 2.7832403224377713e-05, "loss": 0.6233, "step": 120120 }, { "epoch": 1.33, "learning_rate": 2.7831480497239204e-05, "loss": 0.6838, "step": 120125 }, { "epoch": 1.33, "learning_rate": 2.7830557770100692e-05, "loss": 0.5978, "step": 120130 }, { "epoch": 1.33, "learning_rate": 2.7829635042962176e-05, "loss": 0.6196, "step": 120135 }, { "epoch": 1.33, "learning_rate": 2.7828712315823664e-05, "loss": 0.7018, "step": 120140 }, { "epoch": 1.33, "learning_rate": 2.7827789588685155e-05, "loss": 0.6773, "step": 120145 }, { "epoch": 1.33, "learning_rate": 2.782686686154664e-05, "loss": 0.6992, "step": 120150 }, { "epoch": 1.33, "learning_rate": 2.7825944134408128e-05, "loss": 0.6811, "step": 120155 }, { "epoch": 1.33, "learning_rate": 2.7825021407269612e-05, "loss": 0.658, "step": 120160 }, { "epoch": 1.33, "learning_rate": 2.78240986801311e-05, "loss": 0.6522, "step": 120165 }, { "epoch": 1.33, "learning_rate": 2.782317595299259e-05, "loss": 0.6557, "step": 120170 }, { "epoch": 1.33, "learning_rate": 2.782225322585408e-05, "loss": 0.6879, "step": 120175 }, { "epoch": 1.33, "learning_rate": 2.7821330498715563e-05, "loss": 0.6933, "step": 120180 }, { "epoch": 1.33, "learning_rate": 2.782040777157705e-05, "loss": 0.6553, "step": 120185 }, { "epoch": 1.33, "learning_rate": 2.7819485044438543e-05, "loss": 0.6241, "step": 120190 }, { "epoch": 1.33, "learning_rate": 2.7818562317300027e-05, "loss": 0.6781, "step": 120195 }, { "epoch": 1.33, "learning_rate": 2.7817639590161515e-05, "loss": 0.7255, "step": 120200 }, { "epoch": 1.33, "learning_rate": 2.7816716863023003e-05, "loss": 0.66, "step": 120205 }, { "epoch": 1.33, "learning_rate": 2.7815794135884494e-05, "loss": 0.6389, "step": 120210 }, { "epoch": 1.33, "learning_rate": 2.781487140874598e-05, "loss": 0.6337, "step": 120215 }, { "epoch": 1.33, "learning_rate": 2.7813948681607466e-05, "loss": 0.6448, "step": 120220 }, { "epoch": 1.33, "learning_rate": 2.781302595446895e-05, "loss": 0.6478, "step": 120225 }, { "epoch": 1.33, "learning_rate": 2.781210322733044e-05, "loss": 0.6242, "step": 120230 }, { "epoch": 1.33, "learning_rate": 2.781118050019193e-05, "loss": 0.6291, "step": 120235 }, { "epoch": 1.33, "learning_rate": 2.7810257773053418e-05, "loss": 0.6806, "step": 120240 }, { "epoch": 1.33, "learning_rate": 2.7809335045914902e-05, "loss": 0.6252, "step": 120245 }, { "epoch": 1.33, "learning_rate": 2.780841231877639e-05, "loss": 0.6692, "step": 120250 }, { "epoch": 1.33, "learning_rate": 2.780748959163788e-05, "loss": 0.665, "step": 120255 }, { "epoch": 1.33, "learning_rate": 2.7806566864499366e-05, "loss": 0.6705, "step": 120260 }, { "epoch": 1.33, "learning_rate": 2.7805644137360854e-05, "loss": 0.6546, "step": 120265 }, { "epoch": 1.33, "learning_rate": 2.7804721410222338e-05, "loss": 0.6624, "step": 120270 }, { "epoch": 1.33, "learning_rate": 2.7803798683083833e-05, "loss": 0.667, "step": 120275 }, { "epoch": 1.33, "learning_rate": 2.7802875955945317e-05, "loss": 0.7164, "step": 120280 }, { "epoch": 1.33, "learning_rate": 2.7801953228806805e-05, "loss": 0.6791, "step": 120285 }, { "epoch": 1.33, "learning_rate": 2.780103050166829e-05, "loss": 0.6544, "step": 120290 }, { "epoch": 1.33, "learning_rate": 2.780010777452978e-05, "loss": 0.6287, "step": 120295 }, { "epoch": 1.33, "learning_rate": 2.779918504739127e-05, "loss": 0.6518, "step": 120300 }, { "epoch": 1.33, "learning_rate": 2.7798262320252756e-05, "loss": 0.6359, "step": 120305 }, { "epoch": 1.33, "learning_rate": 2.779733959311424e-05, "loss": 0.5983, "step": 120310 }, { "epoch": 1.33, "learning_rate": 2.779641686597573e-05, "loss": 0.7243, "step": 120315 }, { "epoch": 1.33, "learning_rate": 2.779549413883722e-05, "loss": 0.6115, "step": 120320 }, { "epoch": 1.33, "learning_rate": 2.7794571411698704e-05, "loss": 0.6455, "step": 120325 }, { "epoch": 1.33, "learning_rate": 2.7793648684560192e-05, "loss": 0.6196, "step": 120330 }, { "epoch": 1.33, "learning_rate": 2.7792725957421677e-05, "loss": 0.7101, "step": 120335 }, { "epoch": 1.33, "learning_rate": 2.779180323028317e-05, "loss": 0.6506, "step": 120340 }, { "epoch": 1.33, "learning_rate": 2.7790880503144656e-05, "loss": 0.6386, "step": 120345 }, { "epoch": 1.33, "learning_rate": 2.7789957776006144e-05, "loss": 0.6049, "step": 120350 }, { "epoch": 1.33, "learning_rate": 2.7789035048867628e-05, "loss": 0.6583, "step": 120355 }, { "epoch": 1.33, "learning_rate": 2.778811232172912e-05, "loss": 0.6036, "step": 120360 }, { "epoch": 1.33, "learning_rate": 2.7787189594590607e-05, "loss": 0.6908, "step": 120365 }, { "epoch": 1.33, "learning_rate": 2.778626686745209e-05, "loss": 0.641, "step": 120370 }, { "epoch": 1.33, "learning_rate": 2.778534414031358e-05, "loss": 0.6297, "step": 120375 }, { "epoch": 1.33, "learning_rate": 2.7784421413175067e-05, "loss": 0.6238, "step": 120380 }, { "epoch": 1.33, "learning_rate": 2.778349868603656e-05, "loss": 0.6698, "step": 120385 }, { "epoch": 1.33, "learning_rate": 2.7782575958898043e-05, "loss": 0.6358, "step": 120390 }, { "epoch": 1.33, "learning_rate": 2.778165323175953e-05, "loss": 0.6452, "step": 120395 }, { "epoch": 1.33, "learning_rate": 2.7780730504621015e-05, "loss": 0.6486, "step": 120400 }, { "epoch": 1.33, "learning_rate": 2.777980777748251e-05, "loss": 0.6202, "step": 120405 }, { "epoch": 1.33, "learning_rate": 2.7778885050343994e-05, "loss": 0.6516, "step": 120410 }, { "epoch": 1.33, "learning_rate": 2.7777962323205482e-05, "loss": 0.6432, "step": 120415 }, { "epoch": 1.33, "learning_rate": 2.7777039596066967e-05, "loss": 0.6134, "step": 120420 }, { "epoch": 1.33, "learning_rate": 2.7776116868928458e-05, "loss": 0.6834, "step": 120425 }, { "epoch": 1.33, "learning_rate": 2.7775194141789946e-05, "loss": 0.6572, "step": 120430 }, { "epoch": 1.33, "learning_rate": 2.777427141465143e-05, "loss": 0.6432, "step": 120435 }, { "epoch": 1.33, "learning_rate": 2.7773348687512918e-05, "loss": 0.6867, "step": 120440 }, { "epoch": 1.33, "learning_rate": 2.777242596037441e-05, "loss": 0.6318, "step": 120445 }, { "epoch": 1.33, "learning_rate": 2.7771503233235897e-05, "loss": 0.6759, "step": 120450 }, { "epoch": 1.33, "learning_rate": 2.777058050609738e-05, "loss": 0.6595, "step": 120455 }, { "epoch": 1.33, "learning_rate": 2.776965777895887e-05, "loss": 0.7316, "step": 120460 }, { "epoch": 1.33, "learning_rate": 2.7768735051820354e-05, "loss": 0.7071, "step": 120465 }, { "epoch": 1.33, "learning_rate": 2.7767812324681845e-05, "loss": 0.629, "step": 120470 }, { "epoch": 1.33, "learning_rate": 2.7766889597543333e-05, "loss": 0.6806, "step": 120475 }, { "epoch": 1.33, "learning_rate": 2.776596687040482e-05, "loss": 0.6444, "step": 120480 }, { "epoch": 1.33, "learning_rate": 2.7765044143266305e-05, "loss": 0.5911, "step": 120485 }, { "epoch": 1.33, "learning_rate": 2.7764121416127797e-05, "loss": 0.6925, "step": 120490 }, { "epoch": 1.33, "learning_rate": 2.7763198688989284e-05, "loss": 0.6992, "step": 120495 }, { "epoch": 1.33, "learning_rate": 2.776227596185077e-05, "loss": 0.6704, "step": 120500 }, { "epoch": 1.33, "learning_rate": 2.7761353234712257e-05, "loss": 0.6244, "step": 120505 }, { "epoch": 1.33, "learning_rate": 2.7760430507573748e-05, "loss": 0.6581, "step": 120510 }, { "epoch": 1.33, "learning_rate": 2.7759507780435236e-05, "loss": 0.6015, "step": 120515 }, { "epoch": 1.33, "learning_rate": 2.775858505329672e-05, "loss": 0.6626, "step": 120520 }, { "epoch": 1.33, "learning_rate": 2.7757662326158208e-05, "loss": 0.6316, "step": 120525 }, { "epoch": 1.33, "learning_rate": 2.77567395990197e-05, "loss": 0.6679, "step": 120530 }, { "epoch": 1.33, "learning_rate": 2.7755816871881184e-05, "loss": 0.6784, "step": 120535 }, { "epoch": 1.33, "learning_rate": 2.775489414474267e-05, "loss": 0.6571, "step": 120540 }, { "epoch": 1.33, "learning_rate": 2.7753971417604156e-05, "loss": 0.6424, "step": 120545 }, { "epoch": 1.33, "learning_rate": 2.7753048690465644e-05, "loss": 0.6794, "step": 120550 }, { "epoch": 1.33, "learning_rate": 2.7752125963327135e-05, "loss": 0.6925, "step": 120555 }, { "epoch": 1.33, "learning_rate": 2.7751203236188623e-05, "loss": 0.6817, "step": 120560 }, { "epoch": 1.33, "learning_rate": 2.7750280509050108e-05, "loss": 0.6612, "step": 120565 }, { "epoch": 1.34, "learning_rate": 2.7749357781911595e-05, "loss": 0.6356, "step": 120570 }, { "epoch": 1.34, "learning_rate": 2.7748435054773087e-05, "loss": 0.705, "step": 120575 }, { "epoch": 1.34, "learning_rate": 2.7747512327634574e-05, "loss": 0.6419, "step": 120580 }, { "epoch": 1.34, "learning_rate": 2.774658960049606e-05, "loss": 0.6163, "step": 120585 }, { "epoch": 1.34, "learning_rate": 2.7745666873357547e-05, "loss": 0.5975, "step": 120590 }, { "epoch": 1.34, "learning_rate": 2.7744744146219038e-05, "loss": 0.6634, "step": 120595 }, { "epoch": 1.34, "learning_rate": 2.7743821419080522e-05, "loss": 0.6794, "step": 120600 }, { "epoch": 1.34, "learning_rate": 2.774289869194201e-05, "loss": 0.6728, "step": 120605 }, { "epoch": 1.34, "learning_rate": 2.7741975964803495e-05, "loss": 0.6071, "step": 120610 }, { "epoch": 1.34, "learning_rate": 2.7741053237664983e-05, "loss": 0.6946, "step": 120615 }, { "epoch": 1.34, "learning_rate": 2.7740130510526474e-05, "loss": 0.6347, "step": 120620 }, { "epoch": 1.34, "learning_rate": 2.7739207783387962e-05, "loss": 0.6386, "step": 120625 }, { "epoch": 1.34, "learning_rate": 2.7738285056249446e-05, "loss": 0.6697, "step": 120630 }, { "epoch": 1.34, "learning_rate": 2.7737362329110934e-05, "loss": 0.6386, "step": 120635 }, { "epoch": 1.34, "learning_rate": 2.7736439601972425e-05, "loss": 0.6522, "step": 120640 }, { "epoch": 1.34, "learning_rate": 2.773551687483391e-05, "loss": 0.6586, "step": 120645 }, { "epoch": 1.34, "learning_rate": 2.7734594147695398e-05, "loss": 0.6342, "step": 120650 }, { "epoch": 1.34, "learning_rate": 2.7733671420556885e-05, "loss": 0.6765, "step": 120655 }, { "epoch": 1.34, "learning_rate": 2.7732748693418377e-05, "loss": 0.6364, "step": 120660 }, { "epoch": 1.34, "learning_rate": 2.773182596627986e-05, "loss": 0.5808, "step": 120665 }, { "epoch": 1.34, "learning_rate": 2.773090323914135e-05, "loss": 0.626, "step": 120670 }, { "epoch": 1.34, "learning_rate": 2.7729980512002833e-05, "loss": 0.6899, "step": 120675 }, { "epoch": 1.34, "learning_rate": 2.7729057784864325e-05, "loss": 0.7199, "step": 120680 }, { "epoch": 1.34, "learning_rate": 2.7728135057725812e-05, "loss": 0.673, "step": 120685 }, { "epoch": 1.34, "learning_rate": 2.77272123305873e-05, "loss": 0.6445, "step": 120690 }, { "epoch": 1.34, "learning_rate": 2.7726289603448785e-05, "loss": 0.6683, "step": 120695 }, { "epoch": 1.34, "learning_rate": 2.7725366876310273e-05, "loss": 0.6513, "step": 120700 }, { "epoch": 1.34, "learning_rate": 2.7724444149171764e-05, "loss": 0.6059, "step": 120705 }, { "epoch": 1.34, "learning_rate": 2.772352142203325e-05, "loss": 0.7104, "step": 120710 }, { "epoch": 1.34, "learning_rate": 2.7722598694894736e-05, "loss": 0.7168, "step": 120715 }, { "epoch": 1.34, "learning_rate": 2.772167596775622e-05, "loss": 0.6563, "step": 120720 }, { "epoch": 1.34, "learning_rate": 2.7720753240617715e-05, "loss": 0.6578, "step": 120725 }, { "epoch": 1.34, "learning_rate": 2.77198305134792e-05, "loss": 0.6575, "step": 120730 }, { "epoch": 1.34, "learning_rate": 2.7718907786340688e-05, "loss": 0.6603, "step": 120735 }, { "epoch": 1.34, "learning_rate": 2.7717985059202172e-05, "loss": 0.6747, "step": 120740 }, { "epoch": 1.34, "learning_rate": 2.7717062332063663e-05, "loss": 0.6728, "step": 120745 }, { "epoch": 1.34, "learning_rate": 2.771613960492515e-05, "loss": 0.6242, "step": 120750 }, { "epoch": 1.34, "learning_rate": 2.7715216877786636e-05, "loss": 0.6613, "step": 120755 }, { "epoch": 1.34, "learning_rate": 2.7714294150648123e-05, "loss": 0.6211, "step": 120760 }, { "epoch": 1.34, "learning_rate": 2.771337142350961e-05, "loss": 0.6187, "step": 120765 }, { "epoch": 1.34, "learning_rate": 2.7712448696371103e-05, "loss": 0.6268, "step": 120770 }, { "epoch": 1.34, "learning_rate": 2.7711525969232587e-05, "loss": 0.5756, "step": 120775 }, { "epoch": 1.34, "learning_rate": 2.7710603242094075e-05, "loss": 0.6447, "step": 120780 }, { "epoch": 1.34, "learning_rate": 2.770968051495556e-05, "loss": 0.6284, "step": 120785 }, { "epoch": 1.34, "learning_rate": 2.7708757787817054e-05, "loss": 0.6229, "step": 120790 }, { "epoch": 1.34, "learning_rate": 2.770783506067854e-05, "loss": 0.665, "step": 120795 }, { "epoch": 1.34, "learning_rate": 2.7706912333540026e-05, "loss": 0.59, "step": 120800 }, { "epoch": 1.34, "learning_rate": 2.770598960640151e-05, "loss": 0.6012, "step": 120805 }, { "epoch": 1.34, "learning_rate": 2.7705066879263002e-05, "loss": 0.6343, "step": 120810 }, { "epoch": 1.34, "learning_rate": 2.770414415212449e-05, "loss": 0.6247, "step": 120815 }, { "epoch": 1.34, "learning_rate": 2.7703221424985974e-05, "loss": 0.6436, "step": 120820 }, { "epoch": 1.34, "learning_rate": 2.7702298697847462e-05, "loss": 0.6821, "step": 120825 }, { "epoch": 1.34, "learning_rate": 2.7701375970708953e-05, "loss": 0.6115, "step": 120830 }, { "epoch": 1.34, "learning_rate": 2.770045324357044e-05, "loss": 0.6575, "step": 120835 }, { "epoch": 1.34, "learning_rate": 2.7699530516431926e-05, "loss": 0.6232, "step": 120840 }, { "epoch": 1.34, "learning_rate": 2.7698607789293413e-05, "loss": 0.6288, "step": 120845 }, { "epoch": 1.34, "learning_rate": 2.7697685062154898e-05, "loss": 0.6216, "step": 120850 }, { "epoch": 1.34, "learning_rate": 2.769676233501639e-05, "loss": 0.6545, "step": 120855 }, { "epoch": 1.34, "learning_rate": 2.7695839607877877e-05, "loss": 0.6887, "step": 120860 }, { "epoch": 1.34, "learning_rate": 2.7694916880739365e-05, "loss": 0.6565, "step": 120865 }, { "epoch": 1.34, "learning_rate": 2.769399415360085e-05, "loss": 0.6111, "step": 120870 }, { "epoch": 1.34, "learning_rate": 2.769307142646234e-05, "loss": 0.654, "step": 120875 }, { "epoch": 1.34, "learning_rate": 2.769214869932383e-05, "loss": 0.6162, "step": 120880 }, { "epoch": 1.34, "learning_rate": 2.7691225972185313e-05, "loss": 0.6334, "step": 120885 }, { "epoch": 1.34, "learning_rate": 2.76903032450468e-05, "loss": 0.6321, "step": 120890 }, { "epoch": 1.34, "learning_rate": 2.7689380517908292e-05, "loss": 0.6165, "step": 120895 }, { "epoch": 1.34, "learning_rate": 2.768845779076978e-05, "loss": 0.6403, "step": 120900 }, { "epoch": 1.34, "learning_rate": 2.7687535063631264e-05, "loss": 0.7159, "step": 120905 }, { "epoch": 1.34, "learning_rate": 2.7686612336492752e-05, "loss": 0.6492, "step": 120910 }, { "epoch": 1.34, "learning_rate": 2.7685689609354237e-05, "loss": 0.6274, "step": 120915 }, { "epoch": 1.34, "learning_rate": 2.7684766882215728e-05, "loss": 0.6542, "step": 120920 }, { "epoch": 1.34, "learning_rate": 2.7683844155077216e-05, "loss": 0.6855, "step": 120925 }, { "epoch": 1.34, "learning_rate": 2.76829214279387e-05, "loss": 0.7078, "step": 120930 }, { "epoch": 1.34, "learning_rate": 2.7681998700800188e-05, "loss": 0.665, "step": 120935 }, { "epoch": 1.34, "learning_rate": 2.768107597366168e-05, "loss": 0.6413, "step": 120940 }, { "epoch": 1.34, "learning_rate": 2.7680153246523167e-05, "loss": 0.6315, "step": 120945 }, { "epoch": 1.34, "learning_rate": 2.767923051938465e-05, "loss": 0.6089, "step": 120950 }, { "epoch": 1.34, "learning_rate": 2.767830779224614e-05, "loss": 0.6117, "step": 120955 }, { "epoch": 1.34, "learning_rate": 2.767738506510763e-05, "loss": 0.6225, "step": 120960 }, { "epoch": 1.34, "learning_rate": 2.767646233796912e-05, "loss": 0.6542, "step": 120965 }, { "epoch": 1.34, "learning_rate": 2.7675539610830603e-05, "loss": 0.6331, "step": 120970 }, { "epoch": 1.34, "learning_rate": 2.767461688369209e-05, "loss": 0.6657, "step": 120975 }, { "epoch": 1.34, "learning_rate": 2.7673694156553582e-05, "loss": 0.6333, "step": 120980 }, { "epoch": 1.34, "learning_rate": 2.7672771429415066e-05, "loss": 0.6963, "step": 120985 }, { "epoch": 1.34, "learning_rate": 2.7671848702276554e-05, "loss": 0.6841, "step": 120990 }, { "epoch": 1.34, "learning_rate": 2.767092597513804e-05, "loss": 0.6057, "step": 120995 }, { "epoch": 1.34, "learning_rate": 2.7670003247999527e-05, "loss": 0.685, "step": 121000 }, { "epoch": 1.34, "eval_loss": 0.6371257305145264, "eval_runtime": 69.2316, "eval_samples_per_second": 28.889, "eval_steps_per_second": 14.444, "step": 121000 }, { "epoch": 1.34, "learning_rate": 2.7669080520861018e-05, "loss": 0.6344, "step": 121005 }, { "epoch": 1.34, "learning_rate": 2.7668157793722506e-05, "loss": 0.6902, "step": 121010 }, { "epoch": 1.34, "learning_rate": 2.766723506658399e-05, "loss": 0.6606, "step": 121015 }, { "epoch": 1.34, "learning_rate": 2.7666312339445478e-05, "loss": 0.6916, "step": 121020 }, { "epoch": 1.34, "learning_rate": 2.766538961230697e-05, "loss": 0.6386, "step": 121025 }, { "epoch": 1.34, "learning_rate": 2.7664466885168454e-05, "loss": 0.6442, "step": 121030 }, { "epoch": 1.34, "learning_rate": 2.766354415802994e-05, "loss": 0.6214, "step": 121035 }, { "epoch": 1.34, "learning_rate": 2.766262143089143e-05, "loss": 0.6572, "step": 121040 }, { "epoch": 1.34, "learning_rate": 2.766169870375292e-05, "loss": 0.6307, "step": 121045 }, { "epoch": 1.34, "learning_rate": 2.7660775976614405e-05, "loss": 0.7074, "step": 121050 }, { "epoch": 1.34, "learning_rate": 2.7659853249475893e-05, "loss": 0.5967, "step": 121055 }, { "epoch": 1.34, "learning_rate": 2.7658930522337377e-05, "loss": 0.6083, "step": 121060 }, { "epoch": 1.34, "learning_rate": 2.7658007795198865e-05, "loss": 0.6572, "step": 121065 }, { "epoch": 1.34, "learning_rate": 2.7657085068060357e-05, "loss": 0.6865, "step": 121070 }, { "epoch": 1.34, "learning_rate": 2.7656162340921844e-05, "loss": 0.6093, "step": 121075 }, { "epoch": 1.34, "learning_rate": 2.765523961378333e-05, "loss": 0.5937, "step": 121080 }, { "epoch": 1.34, "learning_rate": 2.7654316886644817e-05, "loss": 0.6436, "step": 121085 }, { "epoch": 1.34, "learning_rate": 2.7653394159506308e-05, "loss": 0.7204, "step": 121090 }, { "epoch": 1.34, "learning_rate": 2.7652471432367792e-05, "loss": 0.5776, "step": 121095 }, { "epoch": 1.34, "learning_rate": 2.765154870522928e-05, "loss": 0.6053, "step": 121100 }, { "epoch": 1.34, "learning_rate": 2.7650625978090765e-05, "loss": 0.6742, "step": 121105 }, { "epoch": 1.34, "learning_rate": 2.764970325095226e-05, "loss": 0.641, "step": 121110 }, { "epoch": 1.34, "learning_rate": 2.7648780523813744e-05, "loss": 0.6517, "step": 121115 }, { "epoch": 1.34, "learning_rate": 2.764785779667523e-05, "loss": 0.6769, "step": 121120 }, { "epoch": 1.34, "learning_rate": 2.7646935069536716e-05, "loss": 0.662, "step": 121125 }, { "epoch": 1.34, "learning_rate": 2.7646012342398207e-05, "loss": 0.6635, "step": 121130 }, { "epoch": 1.34, "learning_rate": 2.7645089615259695e-05, "loss": 0.596, "step": 121135 }, { "epoch": 1.34, "learning_rate": 2.764416688812118e-05, "loss": 0.6411, "step": 121140 }, { "epoch": 1.34, "learning_rate": 2.7643244160982667e-05, "loss": 0.6837, "step": 121145 }, { "epoch": 1.34, "learning_rate": 2.7642321433844155e-05, "loss": 0.6511, "step": 121150 }, { "epoch": 1.34, "learning_rate": 2.7641398706705647e-05, "loss": 0.6643, "step": 121155 }, { "epoch": 1.34, "learning_rate": 2.764047597956713e-05, "loss": 0.678, "step": 121160 }, { "epoch": 1.34, "learning_rate": 2.763955325242862e-05, "loss": 0.6393, "step": 121165 }, { "epoch": 1.34, "learning_rate": 2.7638630525290103e-05, "loss": 0.6448, "step": 121170 }, { "epoch": 1.34, "learning_rate": 2.7637707798151598e-05, "loss": 0.6087, "step": 121175 }, { "epoch": 1.34, "learning_rate": 2.7636785071013082e-05, "loss": 0.6894, "step": 121180 }, { "epoch": 1.34, "learning_rate": 2.763586234387457e-05, "loss": 0.6427, "step": 121185 }, { "epoch": 1.34, "learning_rate": 2.7634939616736055e-05, "loss": 0.5961, "step": 121190 }, { "epoch": 1.34, "learning_rate": 2.7634016889597546e-05, "loss": 0.6846, "step": 121195 }, { "epoch": 1.34, "learning_rate": 2.7633094162459034e-05, "loss": 0.6714, "step": 121200 }, { "epoch": 1.34, "learning_rate": 2.7632171435320518e-05, "loss": 0.6225, "step": 121205 }, { "epoch": 1.34, "learning_rate": 2.7631248708182006e-05, "loss": 0.7009, "step": 121210 }, { "epoch": 1.34, "learning_rate": 2.763032598104349e-05, "loss": 0.6297, "step": 121215 }, { "epoch": 1.34, "learning_rate": 2.7629403253904985e-05, "loss": 0.7074, "step": 121220 }, { "epoch": 1.34, "learning_rate": 2.762848052676647e-05, "loss": 0.5652, "step": 121225 }, { "epoch": 1.34, "learning_rate": 2.7627557799627958e-05, "loss": 0.652, "step": 121230 }, { "epoch": 1.34, "learning_rate": 2.7626635072489442e-05, "loss": 0.6385, "step": 121235 }, { "epoch": 1.34, "learning_rate": 2.7625712345350933e-05, "loss": 0.6622, "step": 121240 }, { "epoch": 1.34, "learning_rate": 2.762478961821242e-05, "loss": 0.6505, "step": 121245 }, { "epoch": 1.34, "learning_rate": 2.762386689107391e-05, "loss": 0.5752, "step": 121250 }, { "epoch": 1.34, "learning_rate": 2.7622944163935393e-05, "loss": 0.6222, "step": 121255 }, { "epoch": 1.34, "learning_rate": 2.7622021436796885e-05, "loss": 0.6303, "step": 121260 }, { "epoch": 1.34, "learning_rate": 2.7621098709658372e-05, "loss": 0.666, "step": 121265 }, { "epoch": 1.34, "learning_rate": 2.7620175982519857e-05, "loss": 0.6944, "step": 121270 }, { "epoch": 1.34, "learning_rate": 2.7619253255381345e-05, "loss": 0.6694, "step": 121275 }, { "epoch": 1.34, "learning_rate": 2.7618330528242836e-05, "loss": 0.6362, "step": 121280 }, { "epoch": 1.34, "learning_rate": 2.7617407801104324e-05, "loss": 0.6823, "step": 121285 }, { "epoch": 1.34, "learning_rate": 2.7616485073965808e-05, "loss": 0.6742, "step": 121290 }, { "epoch": 1.34, "learning_rate": 2.7615562346827296e-05, "loss": 0.6499, "step": 121295 }, { "epoch": 1.34, "learning_rate": 2.761463961968878e-05, "loss": 0.6495, "step": 121300 }, { "epoch": 1.34, "learning_rate": 2.7613716892550272e-05, "loss": 0.6754, "step": 121305 }, { "epoch": 1.34, "learning_rate": 2.761279416541176e-05, "loss": 0.6281, "step": 121310 }, { "epoch": 1.34, "learning_rate": 2.7611871438273244e-05, "loss": 0.6177, "step": 121315 }, { "epoch": 1.34, "learning_rate": 2.7610948711134732e-05, "loss": 0.6073, "step": 121320 }, { "epoch": 1.34, "learning_rate": 2.7610025983996223e-05, "loss": 0.6462, "step": 121325 }, { "epoch": 1.34, "learning_rate": 2.760910325685771e-05, "loss": 0.6531, "step": 121330 }, { "epoch": 1.34, "learning_rate": 2.7608180529719196e-05, "loss": 0.6815, "step": 121335 }, { "epoch": 1.34, "learning_rate": 2.7607257802580683e-05, "loss": 0.6758, "step": 121340 }, { "epoch": 1.34, "learning_rate": 2.7606335075442175e-05, "loss": 0.6053, "step": 121345 }, { "epoch": 1.34, "learning_rate": 2.7605412348303662e-05, "loss": 0.6446, "step": 121350 }, { "epoch": 1.34, "learning_rate": 2.7604489621165147e-05, "loss": 0.6275, "step": 121355 }, { "epoch": 1.34, "learning_rate": 2.7603566894026635e-05, "loss": 0.6614, "step": 121360 }, { "epoch": 1.34, "learning_rate": 2.7602644166888126e-05, "loss": 0.633, "step": 121365 }, { "epoch": 1.34, "learning_rate": 2.760172143974961e-05, "loss": 0.6507, "step": 121370 }, { "epoch": 1.34, "learning_rate": 2.76007987126111e-05, "loss": 0.6374, "step": 121375 }, { "epoch": 1.34, "learning_rate": 2.7599875985472583e-05, "loss": 0.6868, "step": 121380 }, { "epoch": 1.34, "learning_rate": 2.759895325833407e-05, "loss": 0.6146, "step": 121385 }, { "epoch": 1.34, "learning_rate": 2.7598030531195562e-05, "loss": 0.6872, "step": 121390 }, { "epoch": 1.34, "learning_rate": 2.759710780405705e-05, "loss": 0.6381, "step": 121395 }, { "epoch": 1.34, "learning_rate": 2.7596185076918534e-05, "loss": 0.616, "step": 121400 }, { "epoch": 1.34, "learning_rate": 2.7595262349780022e-05, "loss": 0.6208, "step": 121405 }, { "epoch": 1.34, "learning_rate": 2.7594339622641513e-05, "loss": 0.6409, "step": 121410 }, { "epoch": 1.34, "learning_rate": 2.7593416895502998e-05, "loss": 0.6547, "step": 121415 }, { "epoch": 1.34, "learning_rate": 2.7592494168364486e-05, "loss": 0.6384, "step": 121420 }, { "epoch": 1.34, "learning_rate": 2.7591571441225973e-05, "loss": 0.6561, "step": 121425 }, { "epoch": 1.34, "learning_rate": 2.7590648714087465e-05, "loss": 0.6522, "step": 121430 }, { "epoch": 1.34, "learning_rate": 2.758972598694895e-05, "loss": 0.6899, "step": 121435 }, { "epoch": 1.34, "learning_rate": 2.7588803259810437e-05, "loss": 0.6292, "step": 121440 }, { "epoch": 1.34, "learning_rate": 2.758788053267192e-05, "loss": 0.6801, "step": 121445 }, { "epoch": 1.34, "learning_rate": 2.758695780553341e-05, "loss": 0.6635, "step": 121450 }, { "epoch": 1.34, "learning_rate": 2.75860350783949e-05, "loss": 0.7003, "step": 121455 }, { "epoch": 1.34, "learning_rate": 2.758511235125639e-05, "loss": 0.6769, "step": 121460 }, { "epoch": 1.34, "learning_rate": 2.7584189624117873e-05, "loss": 0.6578, "step": 121465 }, { "epoch": 1.35, "learning_rate": 2.758326689697936e-05, "loss": 0.7192, "step": 121470 }, { "epoch": 1.35, "learning_rate": 2.7582344169840852e-05, "loss": 0.6041, "step": 121475 }, { "epoch": 1.35, "learning_rate": 2.7581421442702336e-05, "loss": 0.6622, "step": 121480 }, { "epoch": 1.35, "learning_rate": 2.7580498715563824e-05, "loss": 0.6752, "step": 121485 }, { "epoch": 1.35, "learning_rate": 2.757957598842531e-05, "loss": 0.5992, "step": 121490 }, { "epoch": 1.35, "learning_rate": 2.7578653261286803e-05, "loss": 0.6115, "step": 121495 }, { "epoch": 1.35, "learning_rate": 2.7577730534148288e-05, "loss": 0.6841, "step": 121500 }, { "epoch": 1.35, "learning_rate": 2.7576807807009776e-05, "loss": 0.6604, "step": 121505 }, { "epoch": 1.35, "learning_rate": 2.757588507987126e-05, "loss": 0.6232, "step": 121510 }, { "epoch": 1.35, "learning_rate": 2.757496235273275e-05, "loss": 0.7024, "step": 121515 }, { "epoch": 1.35, "learning_rate": 2.757403962559424e-05, "loss": 0.6521, "step": 121520 }, { "epoch": 1.35, "learning_rate": 2.7573116898455724e-05, "loss": 0.6783, "step": 121525 }, { "epoch": 1.35, "learning_rate": 2.757219417131721e-05, "loss": 0.6667, "step": 121530 }, { "epoch": 1.35, "learning_rate": 2.75712714441787e-05, "loss": 0.6528, "step": 121535 }, { "epoch": 1.35, "learning_rate": 2.757034871704019e-05, "loss": 0.6787, "step": 121540 }, { "epoch": 1.35, "learning_rate": 2.7569425989901675e-05, "loss": 0.6195, "step": 121545 }, { "epoch": 1.35, "learning_rate": 2.7568503262763163e-05, "loss": 0.6724, "step": 121550 }, { "epoch": 1.35, "learning_rate": 2.7567580535624647e-05, "loss": 0.6202, "step": 121555 }, { "epoch": 1.35, "learning_rate": 2.7566657808486142e-05, "loss": 0.6713, "step": 121560 }, { "epoch": 1.35, "learning_rate": 2.7565735081347626e-05, "loss": 0.6788, "step": 121565 }, { "epoch": 1.35, "learning_rate": 2.7564812354209114e-05, "loss": 0.6275, "step": 121570 }, { "epoch": 1.35, "learning_rate": 2.75638896270706e-05, "loss": 0.6988, "step": 121575 }, { "epoch": 1.35, "learning_rate": 2.756296689993209e-05, "loss": 0.6257, "step": 121580 }, { "epoch": 1.35, "learning_rate": 2.7562044172793578e-05, "loss": 0.6276, "step": 121585 }, { "epoch": 1.35, "learning_rate": 2.7561121445655062e-05, "loss": 0.6967, "step": 121590 }, { "epoch": 1.35, "learning_rate": 2.756019871851655e-05, "loss": 0.618, "step": 121595 }, { "epoch": 1.35, "learning_rate": 2.7559275991378038e-05, "loss": 0.7057, "step": 121600 }, { "epoch": 1.35, "learning_rate": 2.755835326423953e-05, "loss": 0.6419, "step": 121605 }, { "epoch": 1.35, "learning_rate": 2.7557430537101014e-05, "loss": 0.6932, "step": 121610 }, { "epoch": 1.35, "learning_rate": 2.75565078099625e-05, "loss": 0.6716, "step": 121615 }, { "epoch": 1.35, "learning_rate": 2.7555585082823986e-05, "loss": 0.6394, "step": 121620 }, { "epoch": 1.35, "learning_rate": 2.7554662355685477e-05, "loss": 0.6744, "step": 121625 }, { "epoch": 1.35, "learning_rate": 2.7553739628546965e-05, "loss": 0.6164, "step": 121630 }, { "epoch": 1.35, "learning_rate": 2.7552816901408453e-05, "loss": 0.6962, "step": 121635 }, { "epoch": 1.35, "learning_rate": 2.7551894174269937e-05, "loss": 0.6447, "step": 121640 }, { "epoch": 1.35, "learning_rate": 2.755097144713143e-05, "loss": 0.6428, "step": 121645 }, { "epoch": 1.35, "learning_rate": 2.7550048719992916e-05, "loss": 0.6694, "step": 121650 }, { "epoch": 1.35, "learning_rate": 2.75491259928544e-05, "loss": 0.6606, "step": 121655 }, { "epoch": 1.35, "learning_rate": 2.754820326571589e-05, "loss": 0.6535, "step": 121660 }, { "epoch": 1.35, "learning_rate": 2.754728053857738e-05, "loss": 0.6774, "step": 121665 }, { "epoch": 1.35, "learning_rate": 2.7546357811438868e-05, "loss": 0.6343, "step": 121670 }, { "epoch": 1.35, "learning_rate": 2.7545435084300352e-05, "loss": 0.6287, "step": 121675 }, { "epoch": 1.35, "learning_rate": 2.754451235716184e-05, "loss": 0.6159, "step": 121680 }, { "epoch": 1.35, "learning_rate": 2.7543589630023325e-05, "loss": 0.659, "step": 121685 }, { "epoch": 1.35, "learning_rate": 2.7542666902884816e-05, "loss": 0.6147, "step": 121690 }, { "epoch": 1.35, "learning_rate": 2.7541744175746304e-05, "loss": 0.6286, "step": 121695 }, { "epoch": 1.35, "learning_rate": 2.7540821448607788e-05, "loss": 0.6687, "step": 121700 }, { "epoch": 1.35, "learning_rate": 2.7539898721469276e-05, "loss": 0.6368, "step": 121705 }, { "epoch": 1.35, "learning_rate": 2.7538975994330767e-05, "loss": 0.6494, "step": 121710 }, { "epoch": 1.35, "learning_rate": 2.7538053267192255e-05, "loss": 0.5841, "step": 121715 }, { "epoch": 1.35, "learning_rate": 2.753713054005374e-05, "loss": 0.6001, "step": 121720 }, { "epoch": 1.35, "learning_rate": 2.7536207812915227e-05, "loss": 0.6851, "step": 121725 }, { "epoch": 1.35, "learning_rate": 2.753528508577672e-05, "loss": 0.7069, "step": 121730 }, { "epoch": 1.35, "learning_rate": 2.7534362358638207e-05, "loss": 0.6204, "step": 121735 }, { "epoch": 1.35, "learning_rate": 2.753343963149969e-05, "loss": 0.6459, "step": 121740 }, { "epoch": 1.35, "learning_rate": 2.753251690436118e-05, "loss": 0.6401, "step": 121745 }, { "epoch": 1.35, "learning_rate": 2.7531594177222663e-05, "loss": 0.7357, "step": 121750 }, { "epoch": 1.35, "learning_rate": 2.7530671450084155e-05, "loss": 0.6611, "step": 121755 }, { "epoch": 1.35, "learning_rate": 2.7529748722945642e-05, "loss": 0.6667, "step": 121760 }, { "epoch": 1.35, "learning_rate": 2.7528825995807127e-05, "loss": 0.6717, "step": 121765 }, { "epoch": 1.35, "learning_rate": 2.7527903268668615e-05, "loss": 0.6498, "step": 121770 }, { "epoch": 1.35, "learning_rate": 2.7526980541530106e-05, "loss": 0.6538, "step": 121775 }, { "epoch": 1.35, "learning_rate": 2.7526057814391594e-05, "loss": 0.6031, "step": 121780 }, { "epoch": 1.35, "learning_rate": 2.7525135087253078e-05, "loss": 0.6093, "step": 121785 }, { "epoch": 1.35, "learning_rate": 2.7524212360114566e-05, "loss": 0.6968, "step": 121790 }, { "epoch": 1.35, "learning_rate": 2.7523289632976057e-05, "loss": 0.6412, "step": 121795 }, { "epoch": 1.35, "learning_rate": 2.7522366905837542e-05, "loss": 0.691, "step": 121800 }, { "epoch": 1.35, "learning_rate": 2.752144417869903e-05, "loss": 0.6497, "step": 121805 }, { "epoch": 1.35, "learning_rate": 2.7520521451560517e-05, "loss": 0.6667, "step": 121810 }, { "epoch": 1.35, "learning_rate": 2.751959872442201e-05, "loss": 0.6674, "step": 121815 }, { "epoch": 1.35, "learning_rate": 2.7518675997283493e-05, "loss": 0.6876, "step": 121820 }, { "epoch": 1.35, "learning_rate": 2.751775327014498e-05, "loss": 0.629, "step": 121825 }, { "epoch": 1.35, "learning_rate": 2.7516830543006465e-05, "loss": 0.6534, "step": 121830 }, { "epoch": 1.35, "learning_rate": 2.7515907815867953e-05, "loss": 0.7523, "step": 121835 }, { "epoch": 1.35, "learning_rate": 2.7514985088729445e-05, "loss": 0.6636, "step": 121840 }, { "epoch": 1.35, "learning_rate": 2.7514062361590932e-05, "loss": 0.6214, "step": 121845 }, { "epoch": 1.35, "learning_rate": 2.7513139634452417e-05, "loss": 0.6652, "step": 121850 }, { "epoch": 1.35, "learning_rate": 2.7512216907313905e-05, "loss": 0.7288, "step": 121855 }, { "epoch": 1.35, "learning_rate": 2.7511294180175396e-05, "loss": 0.6576, "step": 121860 }, { "epoch": 1.35, "learning_rate": 2.751037145303688e-05, "loss": 0.6305, "step": 121865 }, { "epoch": 1.35, "learning_rate": 2.7509448725898368e-05, "loss": 0.6539, "step": 121870 }, { "epoch": 1.35, "learning_rate": 2.7508525998759853e-05, "loss": 0.6657, "step": 121875 }, { "epoch": 1.35, "learning_rate": 2.7507603271621347e-05, "loss": 0.6853, "step": 121880 }, { "epoch": 1.35, "learning_rate": 2.7506680544482832e-05, "loss": 0.6811, "step": 121885 }, { "epoch": 1.35, "learning_rate": 2.750575781734432e-05, "loss": 0.6463, "step": 121890 }, { "epoch": 1.35, "learning_rate": 2.7504835090205804e-05, "loss": 0.6387, "step": 121895 }, { "epoch": 1.35, "learning_rate": 2.7503912363067292e-05, "loss": 0.6305, "step": 121900 }, { "epoch": 1.35, "learning_rate": 2.7502989635928783e-05, "loss": 0.6537, "step": 121905 }, { "epoch": 1.35, "learning_rate": 2.750206690879027e-05, "loss": 0.6417, "step": 121910 }, { "epoch": 1.35, "learning_rate": 2.7501144181651756e-05, "loss": 0.6624, "step": 121915 }, { "epoch": 1.35, "learning_rate": 2.7500221454513243e-05, "loss": 0.6305, "step": 121920 }, { "epoch": 1.35, "learning_rate": 2.7499298727374735e-05, "loss": 0.7123, "step": 121925 }, { "epoch": 1.35, "learning_rate": 2.749837600023622e-05, "loss": 0.6398, "step": 121930 }, { "epoch": 1.35, "learning_rate": 2.7497453273097707e-05, "loss": 0.6289, "step": 121935 }, { "epoch": 1.35, "learning_rate": 2.749653054595919e-05, "loss": 0.6427, "step": 121940 }, { "epoch": 1.35, "learning_rate": 2.7495607818820686e-05, "loss": 0.6394, "step": 121945 }, { "epoch": 1.35, "learning_rate": 2.749468509168217e-05, "loss": 0.6576, "step": 121950 }, { "epoch": 1.35, "learning_rate": 2.7493762364543658e-05, "loss": 0.6702, "step": 121955 }, { "epoch": 1.35, "learning_rate": 2.7492839637405143e-05, "loss": 0.6327, "step": 121960 }, { "epoch": 1.35, "learning_rate": 2.7491916910266634e-05, "loss": 0.6347, "step": 121965 }, { "epoch": 1.35, "learning_rate": 2.7490994183128122e-05, "loss": 0.6232, "step": 121970 }, { "epoch": 1.35, "learning_rate": 2.7490071455989606e-05, "loss": 0.6352, "step": 121975 }, { "epoch": 1.35, "learning_rate": 2.7489148728851094e-05, "loss": 0.6189, "step": 121980 }, { "epoch": 1.35, "learning_rate": 2.7488226001712582e-05, "loss": 0.7081, "step": 121985 }, { "epoch": 1.35, "learning_rate": 2.7487303274574073e-05, "loss": 0.5975, "step": 121990 }, { "epoch": 1.35, "learning_rate": 2.7486380547435558e-05, "loss": 0.7068, "step": 121995 }, { "epoch": 1.35, "learning_rate": 2.7485457820297046e-05, "loss": 0.6941, "step": 122000 }, { "epoch": 1.35, "eval_loss": 0.6244728565216064, "eval_runtime": 69.3008, "eval_samples_per_second": 28.86, "eval_steps_per_second": 14.43, "step": 122000 }, { "epoch": 1.35, "learning_rate": 2.748453509315853e-05, "loss": 0.6505, "step": 122005 }, { "epoch": 1.35, "learning_rate": 2.748361236602002e-05, "loss": 0.6626, "step": 122010 }, { "epoch": 1.35, "learning_rate": 2.748268963888151e-05, "loss": 0.6318, "step": 122015 }, { "epoch": 1.35, "learning_rate": 2.7481766911742997e-05, "loss": 0.5879, "step": 122020 }, { "epoch": 1.35, "learning_rate": 2.748084418460448e-05, "loss": 0.5675, "step": 122025 }, { "epoch": 1.35, "learning_rate": 2.7479921457465973e-05, "loss": 0.624, "step": 122030 }, { "epoch": 1.35, "learning_rate": 2.747899873032746e-05, "loss": 0.6023, "step": 122035 }, { "epoch": 1.35, "learning_rate": 2.7478076003188945e-05, "loss": 0.7055, "step": 122040 }, { "epoch": 1.35, "learning_rate": 2.7477153276050433e-05, "loss": 0.6205, "step": 122045 }, { "epoch": 1.35, "learning_rate": 2.7476230548911917e-05, "loss": 0.6284, "step": 122050 }, { "epoch": 1.35, "learning_rate": 2.7475307821773412e-05, "loss": 0.6354, "step": 122055 }, { "epoch": 1.35, "learning_rate": 2.7474385094634896e-05, "loss": 0.6647, "step": 122060 }, { "epoch": 1.35, "learning_rate": 2.7473462367496384e-05, "loss": 0.6771, "step": 122065 }, { "epoch": 1.35, "learning_rate": 2.747253964035787e-05, "loss": 0.6324, "step": 122070 }, { "epoch": 1.35, "learning_rate": 2.747161691321936e-05, "loss": 0.6703, "step": 122075 }, { "epoch": 1.35, "learning_rate": 2.7470694186080848e-05, "loss": 0.6498, "step": 122080 }, { "epoch": 1.35, "learning_rate": 2.7469771458942332e-05, "loss": 0.6483, "step": 122085 }, { "epoch": 1.35, "learning_rate": 2.746884873180382e-05, "loss": 0.6711, "step": 122090 }, { "epoch": 1.35, "learning_rate": 2.746792600466531e-05, "loss": 0.6967, "step": 122095 }, { "epoch": 1.35, "learning_rate": 2.74670032775268e-05, "loss": 0.639, "step": 122100 }, { "epoch": 1.35, "learning_rate": 2.7466080550388284e-05, "loss": 0.6334, "step": 122105 }, { "epoch": 1.35, "learning_rate": 2.746515782324977e-05, "loss": 0.6893, "step": 122110 }, { "epoch": 1.35, "learning_rate": 2.7464235096111263e-05, "loss": 0.6538, "step": 122115 }, { "epoch": 1.35, "learning_rate": 2.746331236897275e-05, "loss": 0.6315, "step": 122120 }, { "epoch": 1.35, "learning_rate": 2.7462389641834235e-05, "loss": 0.6537, "step": 122125 }, { "epoch": 1.35, "learning_rate": 2.7461466914695723e-05, "loss": 0.7045, "step": 122130 }, { "epoch": 1.35, "learning_rate": 2.7460544187557207e-05, "loss": 0.6429, "step": 122135 }, { "epoch": 1.35, "learning_rate": 2.74596214604187e-05, "loss": 0.5963, "step": 122140 }, { "epoch": 1.35, "learning_rate": 2.7458698733280186e-05, "loss": 0.6591, "step": 122145 }, { "epoch": 1.35, "learning_rate": 2.745777600614167e-05, "loss": 0.6967, "step": 122150 }, { "epoch": 1.35, "learning_rate": 2.745685327900316e-05, "loss": 0.6972, "step": 122155 }, { "epoch": 1.35, "learning_rate": 2.745593055186465e-05, "loss": 0.62, "step": 122160 }, { "epoch": 1.35, "learning_rate": 2.7455007824726138e-05, "loss": 0.6684, "step": 122165 }, { "epoch": 1.35, "learning_rate": 2.7454085097587622e-05, "loss": 0.6382, "step": 122170 }, { "epoch": 1.35, "learning_rate": 2.745316237044911e-05, "loss": 0.6365, "step": 122175 }, { "epoch": 1.35, "learning_rate": 2.74522396433106e-05, "loss": 0.6554, "step": 122180 }, { "epoch": 1.35, "learning_rate": 2.7451316916172086e-05, "loss": 0.618, "step": 122185 }, { "epoch": 1.35, "learning_rate": 2.7450394189033574e-05, "loss": 0.6925, "step": 122190 }, { "epoch": 1.35, "learning_rate": 2.744947146189506e-05, "loss": 0.6295, "step": 122195 }, { "epoch": 1.35, "learning_rate": 2.7448548734756553e-05, "loss": 0.6185, "step": 122200 }, { "epoch": 1.35, "learning_rate": 2.7447626007618037e-05, "loss": 0.6291, "step": 122205 }, { "epoch": 1.35, "learning_rate": 2.7446703280479525e-05, "loss": 0.674, "step": 122210 }, { "epoch": 1.35, "learning_rate": 2.744578055334101e-05, "loss": 0.6459, "step": 122215 }, { "epoch": 1.35, "learning_rate": 2.7444857826202497e-05, "loss": 0.6537, "step": 122220 }, { "epoch": 1.35, "learning_rate": 2.744393509906399e-05, "loss": 0.6709, "step": 122225 }, { "epoch": 1.35, "learning_rate": 2.7443012371925476e-05, "loss": 0.6453, "step": 122230 }, { "epoch": 1.35, "learning_rate": 2.744208964478696e-05, "loss": 0.6243, "step": 122235 }, { "epoch": 1.35, "learning_rate": 2.744116691764845e-05, "loss": 0.6829, "step": 122240 }, { "epoch": 1.35, "learning_rate": 2.744024419050994e-05, "loss": 0.6731, "step": 122245 }, { "epoch": 1.35, "learning_rate": 2.7439321463371424e-05, "loss": 0.6306, "step": 122250 }, { "epoch": 1.35, "learning_rate": 2.7438398736232912e-05, "loss": 0.6568, "step": 122255 }, { "epoch": 1.35, "learning_rate": 2.7437476009094397e-05, "loss": 0.6616, "step": 122260 }, { "epoch": 1.35, "learning_rate": 2.743655328195589e-05, "loss": 0.6589, "step": 122265 }, { "epoch": 1.35, "learning_rate": 2.7435630554817376e-05, "loss": 0.63, "step": 122270 }, { "epoch": 1.35, "learning_rate": 2.7434707827678864e-05, "loss": 0.6077, "step": 122275 }, { "epoch": 1.35, "learning_rate": 2.7433785100540348e-05, "loss": 0.6405, "step": 122280 }, { "epoch": 1.35, "learning_rate": 2.7432862373401836e-05, "loss": 0.6795, "step": 122285 }, { "epoch": 1.35, "learning_rate": 2.7431939646263327e-05, "loss": 0.6435, "step": 122290 }, { "epoch": 1.35, "learning_rate": 2.7431016919124815e-05, "loss": 0.6531, "step": 122295 }, { "epoch": 1.35, "learning_rate": 2.74300941919863e-05, "loss": 0.6443, "step": 122300 }, { "epoch": 1.35, "learning_rate": 2.7429171464847787e-05, "loss": 0.6451, "step": 122305 }, { "epoch": 1.35, "learning_rate": 2.742824873770928e-05, "loss": 0.6324, "step": 122310 }, { "epoch": 1.35, "learning_rate": 2.7427326010570763e-05, "loss": 0.6509, "step": 122315 }, { "epoch": 1.35, "learning_rate": 2.742640328343225e-05, "loss": 0.6218, "step": 122320 }, { "epoch": 1.35, "learning_rate": 2.7425480556293735e-05, "loss": 0.6583, "step": 122325 }, { "epoch": 1.35, "learning_rate": 2.742455782915523e-05, "loss": 0.6484, "step": 122330 }, { "epoch": 1.35, "learning_rate": 2.7423635102016714e-05, "loss": 0.6114, "step": 122335 }, { "epoch": 1.35, "learning_rate": 2.7422712374878202e-05, "loss": 0.6086, "step": 122340 }, { "epoch": 1.35, "learning_rate": 2.7421789647739687e-05, "loss": 0.6742, "step": 122345 }, { "epoch": 1.35, "learning_rate": 2.7420866920601178e-05, "loss": 0.658, "step": 122350 }, { "epoch": 1.35, "learning_rate": 2.7419944193462666e-05, "loss": 0.6487, "step": 122355 }, { "epoch": 1.35, "learning_rate": 2.741902146632415e-05, "loss": 0.6812, "step": 122360 }, { "epoch": 1.35, "learning_rate": 2.7418098739185638e-05, "loss": 0.6336, "step": 122365 }, { "epoch": 1.35, "learning_rate": 2.7417176012047126e-05, "loss": 0.6776, "step": 122370 }, { "epoch": 1.36, "learning_rate": 2.7416253284908617e-05, "loss": 0.6815, "step": 122375 }, { "epoch": 1.36, "learning_rate": 2.7415330557770102e-05, "loss": 0.6567, "step": 122380 }, { "epoch": 1.36, "learning_rate": 2.741440783063159e-05, "loss": 0.6691, "step": 122385 }, { "epoch": 1.36, "learning_rate": 2.7413485103493074e-05, "loss": 0.7038, "step": 122390 }, { "epoch": 1.36, "learning_rate": 2.7412562376354565e-05, "loss": 0.6112, "step": 122395 }, { "epoch": 1.36, "learning_rate": 2.7411639649216053e-05, "loss": 0.6738, "step": 122400 }, { "epoch": 1.36, "learning_rate": 2.741071692207754e-05, "loss": 0.6052, "step": 122405 }, { "epoch": 1.36, "learning_rate": 2.7409794194939025e-05, "loss": 0.6353, "step": 122410 }, { "epoch": 1.36, "learning_rate": 2.7408871467800517e-05, "loss": 0.6811, "step": 122415 }, { "epoch": 1.36, "learning_rate": 2.7407948740662005e-05, "loss": 0.677, "step": 122420 }, { "epoch": 1.36, "learning_rate": 2.740702601352349e-05, "loss": 0.7019, "step": 122425 }, { "epoch": 1.36, "learning_rate": 2.7406103286384977e-05, "loss": 0.6284, "step": 122430 }, { "epoch": 1.36, "learning_rate": 2.740518055924646e-05, "loss": 0.6187, "step": 122435 }, { "epoch": 1.36, "learning_rate": 2.7404257832107956e-05, "loss": 0.6512, "step": 122440 }, { "epoch": 1.36, "learning_rate": 2.740333510496944e-05, "loss": 0.7321, "step": 122445 }, { "epoch": 1.36, "learning_rate": 2.7402412377830928e-05, "loss": 0.6903, "step": 122450 }, { "epoch": 1.36, "learning_rate": 2.7401489650692413e-05, "loss": 0.647, "step": 122455 }, { "epoch": 1.36, "learning_rate": 2.7400566923553904e-05, "loss": 0.6501, "step": 122460 }, { "epoch": 1.36, "learning_rate": 2.7399644196415392e-05, "loss": 0.6238, "step": 122465 }, { "epoch": 1.36, "learning_rate": 2.7398721469276876e-05, "loss": 0.6639, "step": 122470 }, { "epoch": 1.36, "learning_rate": 2.7397798742138364e-05, "loss": 0.6327, "step": 122475 }, { "epoch": 1.36, "learning_rate": 2.7396876014999855e-05, "loss": 0.645, "step": 122480 }, { "epoch": 1.36, "learning_rate": 2.7395953287861343e-05, "loss": 0.6842, "step": 122485 }, { "epoch": 1.36, "learning_rate": 2.7395030560722828e-05, "loss": 0.618, "step": 122490 }, { "epoch": 1.36, "learning_rate": 2.7394107833584315e-05, "loss": 0.6543, "step": 122495 }, { "epoch": 1.36, "learning_rate": 2.7393185106445807e-05, "loss": 0.6656, "step": 122500 }, { "epoch": 1.36, "learning_rate": 2.7392262379307295e-05, "loss": 0.6431, "step": 122505 }, { "epoch": 1.36, "learning_rate": 2.739133965216878e-05, "loss": 0.6066, "step": 122510 }, { "epoch": 1.36, "learning_rate": 2.7390416925030267e-05, "loss": 0.6372, "step": 122515 }, { "epoch": 1.36, "learning_rate": 2.738949419789175e-05, "loss": 0.6543, "step": 122520 }, { "epoch": 1.36, "learning_rate": 2.7388571470753243e-05, "loss": 0.6641, "step": 122525 }, { "epoch": 1.36, "learning_rate": 2.738764874361473e-05, "loss": 0.5901, "step": 122530 }, { "epoch": 1.36, "learning_rate": 2.7386726016476215e-05, "loss": 0.6574, "step": 122535 }, { "epoch": 1.36, "learning_rate": 2.7385803289337703e-05, "loss": 0.6831, "step": 122540 }, { "epoch": 1.36, "learning_rate": 2.7384880562199194e-05, "loss": 0.6368, "step": 122545 }, { "epoch": 1.36, "learning_rate": 2.7383957835060682e-05, "loss": 0.6829, "step": 122550 }, { "epoch": 1.36, "learning_rate": 2.7383035107922166e-05, "loss": 0.6065, "step": 122555 }, { "epoch": 1.36, "learning_rate": 2.7382112380783654e-05, "loss": 0.5941, "step": 122560 }, { "epoch": 1.36, "learning_rate": 2.7381189653645145e-05, "loss": 0.7379, "step": 122565 }, { "epoch": 1.36, "learning_rate": 2.738026692650663e-05, "loss": 0.6621, "step": 122570 }, { "epoch": 1.36, "learning_rate": 2.7379344199368118e-05, "loss": 0.6804, "step": 122575 }, { "epoch": 1.36, "learning_rate": 2.7378421472229605e-05, "loss": 0.6815, "step": 122580 }, { "epoch": 1.36, "learning_rate": 2.737749874509109e-05, "loss": 0.6299, "step": 122585 }, { "epoch": 1.36, "learning_rate": 2.737657601795258e-05, "loss": 0.6592, "step": 122590 }, { "epoch": 1.36, "learning_rate": 2.737565329081407e-05, "loss": 0.6743, "step": 122595 }, { "epoch": 1.36, "learning_rate": 2.7374730563675554e-05, "loss": 0.6994, "step": 122600 }, { "epoch": 1.36, "learning_rate": 2.737380783653704e-05, "loss": 0.5703, "step": 122605 }, { "epoch": 1.36, "learning_rate": 2.7372885109398533e-05, "loss": 0.6714, "step": 122610 }, { "epoch": 1.36, "learning_rate": 2.737196238226002e-05, "loss": 0.6459, "step": 122615 }, { "epoch": 1.36, "learning_rate": 2.7371039655121505e-05, "loss": 0.6588, "step": 122620 }, { "epoch": 1.36, "learning_rate": 2.7370116927982993e-05, "loss": 0.5929, "step": 122625 }, { "epoch": 1.36, "learning_rate": 2.7369194200844484e-05, "loss": 0.7037, "step": 122630 }, { "epoch": 1.36, "learning_rate": 2.736827147370597e-05, "loss": 0.6001, "step": 122635 }, { "epoch": 1.36, "learning_rate": 2.7367348746567456e-05, "loss": 0.6607, "step": 122640 }, { "epoch": 1.36, "learning_rate": 2.736642601942894e-05, "loss": 0.7091, "step": 122645 }, { "epoch": 1.36, "learning_rate": 2.7365503292290435e-05, "loss": 0.672, "step": 122650 }, { "epoch": 1.36, "learning_rate": 2.736458056515192e-05, "loss": 0.605, "step": 122655 }, { "epoch": 1.36, "learning_rate": 2.7363657838013408e-05, "loss": 0.7073, "step": 122660 }, { "epoch": 1.36, "learning_rate": 2.7362735110874892e-05, "loss": 0.6585, "step": 122665 }, { "epoch": 1.36, "learning_rate": 2.736181238373638e-05, "loss": 0.6205, "step": 122670 }, { "epoch": 1.36, "learning_rate": 2.736088965659787e-05, "loss": 0.6584, "step": 122675 }, { "epoch": 1.36, "learning_rate": 2.735996692945936e-05, "loss": 0.739, "step": 122680 }, { "epoch": 1.36, "learning_rate": 2.7359044202320844e-05, "loss": 0.6492, "step": 122685 }, { "epoch": 1.36, "learning_rate": 2.735812147518233e-05, "loss": 0.6788, "step": 122690 }, { "epoch": 1.36, "learning_rate": 2.7357198748043823e-05, "loss": 0.6554, "step": 122695 }, { "epoch": 1.36, "learning_rate": 2.7356276020905307e-05, "loss": 0.6389, "step": 122700 }, { "epoch": 1.36, "learning_rate": 2.7355353293766795e-05, "loss": 0.6893, "step": 122705 }, { "epoch": 1.36, "learning_rate": 2.735443056662828e-05, "loss": 0.68, "step": 122710 }, { "epoch": 1.36, "learning_rate": 2.7353507839489774e-05, "loss": 0.6784, "step": 122715 }, { "epoch": 1.36, "learning_rate": 2.735258511235126e-05, "loss": 0.7099, "step": 122720 }, { "epoch": 1.36, "learning_rate": 2.7351662385212746e-05, "loss": 0.704, "step": 122725 }, { "epoch": 1.36, "learning_rate": 2.735073965807423e-05, "loss": 0.6237, "step": 122730 }, { "epoch": 1.36, "learning_rate": 2.734981693093572e-05, "loss": 0.6696, "step": 122735 }, { "epoch": 1.36, "learning_rate": 2.734889420379721e-05, "loss": 0.6658, "step": 122740 }, { "epoch": 1.36, "learning_rate": 2.7347971476658694e-05, "loss": 0.6321, "step": 122745 }, { "epoch": 1.36, "learning_rate": 2.7347048749520182e-05, "loss": 0.6389, "step": 122750 }, { "epoch": 1.36, "learning_rate": 2.734612602238167e-05, "loss": 0.6571, "step": 122755 }, { "epoch": 1.36, "learning_rate": 2.734520329524316e-05, "loss": 0.6867, "step": 122760 }, { "epoch": 1.36, "learning_rate": 2.7344280568104646e-05, "loss": 0.6433, "step": 122765 }, { "epoch": 1.36, "learning_rate": 2.7343357840966134e-05, "loss": 0.6873, "step": 122770 }, { "epoch": 1.36, "learning_rate": 2.7342435113827618e-05, "loss": 0.6566, "step": 122775 }, { "epoch": 1.36, "learning_rate": 2.734151238668911e-05, "loss": 0.6472, "step": 122780 }, { "epoch": 1.36, "learning_rate": 2.7340589659550597e-05, "loss": 0.6186, "step": 122785 }, { "epoch": 1.36, "learning_rate": 2.7339666932412085e-05, "loss": 0.7141, "step": 122790 }, { "epoch": 1.36, "learning_rate": 2.733874420527357e-05, "loss": 0.6186, "step": 122795 }, { "epoch": 1.36, "learning_rate": 2.733782147813506e-05, "loss": 0.7066, "step": 122800 }, { "epoch": 1.36, "learning_rate": 2.733689875099655e-05, "loss": 0.6453, "step": 122805 }, { "epoch": 1.36, "learning_rate": 2.7335976023858033e-05, "loss": 0.6172, "step": 122810 }, { "epoch": 1.36, "learning_rate": 2.733505329671952e-05, "loss": 0.6532, "step": 122815 }, { "epoch": 1.36, "learning_rate": 2.7334130569581005e-05, "loss": 0.6765, "step": 122820 }, { "epoch": 1.36, "learning_rate": 2.73332078424425e-05, "loss": 0.6723, "step": 122825 }, { "epoch": 1.36, "learning_rate": 2.7332285115303984e-05, "loss": 0.584, "step": 122830 }, { "epoch": 1.36, "learning_rate": 2.7331362388165472e-05, "loss": 0.6864, "step": 122835 }, { "epoch": 1.36, "learning_rate": 2.7330439661026957e-05, "loss": 0.7272, "step": 122840 }, { "epoch": 1.36, "learning_rate": 2.7329516933888448e-05, "loss": 0.6445, "step": 122845 }, { "epoch": 1.36, "learning_rate": 2.7328594206749936e-05, "loss": 0.6294, "step": 122850 }, { "epoch": 1.36, "learning_rate": 2.7327671479611424e-05, "loss": 0.67, "step": 122855 }, { "epoch": 1.36, "learning_rate": 2.7326748752472908e-05, "loss": 0.7022, "step": 122860 }, { "epoch": 1.36, "learning_rate": 2.73258260253344e-05, "loss": 0.5802, "step": 122865 }, { "epoch": 1.36, "learning_rate": 2.7324903298195887e-05, "loss": 0.665, "step": 122870 }, { "epoch": 1.36, "learning_rate": 2.732398057105737e-05, "loss": 0.6298, "step": 122875 }, { "epoch": 1.36, "learning_rate": 2.732305784391886e-05, "loss": 0.6828, "step": 122880 }, { "epoch": 1.36, "learning_rate": 2.7322135116780344e-05, "loss": 0.6094, "step": 122885 }, { "epoch": 1.36, "learning_rate": 2.732121238964184e-05, "loss": 0.6475, "step": 122890 }, { "epoch": 1.36, "learning_rate": 2.7320289662503323e-05, "loss": 0.6494, "step": 122895 }, { "epoch": 1.36, "learning_rate": 2.731936693536481e-05, "loss": 0.6279, "step": 122900 }, { "epoch": 1.36, "learning_rate": 2.7318444208226295e-05, "loss": 0.6897, "step": 122905 }, { "epoch": 1.36, "learning_rate": 2.7317521481087787e-05, "loss": 0.7043, "step": 122910 }, { "epoch": 1.36, "learning_rate": 2.7316598753949274e-05, "loss": 0.6225, "step": 122915 }, { "epoch": 1.36, "learning_rate": 2.731567602681076e-05, "loss": 0.6328, "step": 122920 }, { "epoch": 1.36, "learning_rate": 2.7314753299672247e-05, "loss": 0.6699, "step": 122925 }, { "epoch": 1.36, "learning_rate": 2.7313830572533738e-05, "loss": 0.6619, "step": 122930 }, { "epoch": 1.36, "learning_rate": 2.7312907845395226e-05, "loss": 0.621, "step": 122935 }, { "epoch": 1.36, "learning_rate": 2.731198511825671e-05, "loss": 0.627, "step": 122940 }, { "epoch": 1.36, "learning_rate": 2.7311062391118198e-05, "loss": 0.7176, "step": 122945 }, { "epoch": 1.36, "learning_rate": 2.731013966397969e-05, "loss": 0.7013, "step": 122950 }, { "epoch": 1.36, "learning_rate": 2.7309216936841174e-05, "loss": 0.5873, "step": 122955 }, { "epoch": 1.36, "learning_rate": 2.730829420970266e-05, "loss": 0.684, "step": 122960 }, { "epoch": 1.36, "learning_rate": 2.730737148256415e-05, "loss": 0.6521, "step": 122965 }, { "epoch": 1.36, "learning_rate": 2.7306448755425634e-05, "loss": 0.6055, "step": 122970 }, { "epoch": 1.36, "learning_rate": 2.7305526028287125e-05, "loss": 0.6489, "step": 122975 }, { "epoch": 1.36, "learning_rate": 2.7304603301148613e-05, "loss": 0.7339, "step": 122980 }, { "epoch": 1.36, "learning_rate": 2.7303680574010098e-05, "loss": 0.5737, "step": 122985 }, { "epoch": 1.36, "learning_rate": 2.7302757846871585e-05, "loss": 0.6021, "step": 122990 }, { "epoch": 1.36, "learning_rate": 2.7301835119733077e-05, "loss": 0.659, "step": 122995 }, { "epoch": 1.36, "learning_rate": 2.7300912392594564e-05, "loss": 0.6358, "step": 123000 }, { "epoch": 1.36, "eval_loss": 0.6290777325630188, "eval_runtime": 69.5949, "eval_samples_per_second": 28.738, "eval_steps_per_second": 14.369, "step": 123000 }, { "epoch": 1.36, "learning_rate": 2.729998966545605e-05, "loss": 0.6561, "step": 123005 }, { "epoch": 1.36, "learning_rate": 2.7299066938317537e-05, "loss": 0.6831, "step": 123010 }, { "epoch": 1.36, "learning_rate": 2.7298144211179028e-05, "loss": 0.6334, "step": 123015 }, { "epoch": 1.36, "learning_rate": 2.7297221484040512e-05, "loss": 0.6948, "step": 123020 }, { "epoch": 1.36, "learning_rate": 2.7296298756902e-05, "loss": 0.7024, "step": 123025 }, { "epoch": 1.36, "learning_rate": 2.7295376029763485e-05, "loss": 0.7084, "step": 123030 }, { "epoch": 1.36, "learning_rate": 2.729445330262498e-05, "loss": 0.6895, "step": 123035 }, { "epoch": 1.36, "learning_rate": 2.7293530575486464e-05, "loss": 0.6867, "step": 123040 }, { "epoch": 1.36, "learning_rate": 2.7292607848347952e-05, "loss": 0.6061, "step": 123045 }, { "epoch": 1.36, "learning_rate": 2.7291685121209436e-05, "loss": 0.663, "step": 123050 }, { "epoch": 1.36, "learning_rate": 2.7290762394070924e-05, "loss": 0.6774, "step": 123055 }, { "epoch": 1.36, "learning_rate": 2.7289839666932415e-05, "loss": 0.6472, "step": 123060 }, { "epoch": 1.36, "learning_rate": 2.7288916939793903e-05, "loss": 0.6508, "step": 123065 }, { "epoch": 1.36, "learning_rate": 2.7287994212655388e-05, "loss": 0.6631, "step": 123070 }, { "epoch": 1.36, "learning_rate": 2.7287071485516875e-05, "loss": 0.6322, "step": 123075 }, { "epoch": 1.36, "learning_rate": 2.7286148758378367e-05, "loss": 0.7054, "step": 123080 }, { "epoch": 1.36, "learning_rate": 2.728522603123985e-05, "loss": 0.6102, "step": 123085 }, { "epoch": 1.36, "learning_rate": 2.728430330410134e-05, "loss": 0.6245, "step": 123090 }, { "epoch": 1.36, "learning_rate": 2.7283380576962823e-05, "loss": 0.6742, "step": 123095 }, { "epoch": 1.36, "learning_rate": 2.7282457849824318e-05, "loss": 0.5995, "step": 123100 }, { "epoch": 1.36, "learning_rate": 2.7281535122685803e-05, "loss": 0.652, "step": 123105 }, { "epoch": 1.36, "learning_rate": 2.728061239554729e-05, "loss": 0.6761, "step": 123110 }, { "epoch": 1.36, "learning_rate": 2.7279689668408775e-05, "loss": 0.6395, "step": 123115 }, { "epoch": 1.36, "learning_rate": 2.7278766941270263e-05, "loss": 0.6166, "step": 123120 }, { "epoch": 1.36, "learning_rate": 2.7277844214131754e-05, "loss": 0.6794, "step": 123125 }, { "epoch": 1.36, "learning_rate": 2.727692148699324e-05, "loss": 0.7211, "step": 123130 }, { "epoch": 1.36, "learning_rate": 2.7275998759854726e-05, "loss": 0.6699, "step": 123135 }, { "epoch": 1.36, "learning_rate": 2.7275076032716214e-05, "loss": 0.6696, "step": 123140 }, { "epoch": 1.36, "learning_rate": 2.7274153305577705e-05, "loss": 0.6322, "step": 123145 }, { "epoch": 1.36, "learning_rate": 2.727323057843919e-05, "loss": 0.6662, "step": 123150 }, { "epoch": 1.36, "learning_rate": 2.7272307851300678e-05, "loss": 0.6697, "step": 123155 }, { "epoch": 1.36, "learning_rate": 2.7271385124162162e-05, "loss": 0.6132, "step": 123160 }, { "epoch": 1.36, "learning_rate": 2.7270462397023657e-05, "loss": 0.6502, "step": 123165 }, { "epoch": 1.36, "learning_rate": 2.726953966988514e-05, "loss": 0.639, "step": 123170 }, { "epoch": 1.36, "learning_rate": 2.726861694274663e-05, "loss": 0.6821, "step": 123175 }, { "epoch": 1.36, "learning_rate": 2.7267694215608113e-05, "loss": 0.6764, "step": 123180 }, { "epoch": 1.36, "learning_rate": 2.7266771488469605e-05, "loss": 0.6791, "step": 123185 }, { "epoch": 1.36, "learning_rate": 2.7265848761331093e-05, "loss": 0.6809, "step": 123190 }, { "epoch": 1.36, "learning_rate": 2.7264926034192577e-05, "loss": 0.6475, "step": 123195 }, { "epoch": 1.36, "learning_rate": 2.7264003307054065e-05, "loss": 0.6203, "step": 123200 }, { "epoch": 1.36, "learning_rate": 2.726308057991555e-05, "loss": 0.7066, "step": 123205 }, { "epoch": 1.36, "learning_rate": 2.7262157852777044e-05, "loss": 0.6532, "step": 123210 }, { "epoch": 1.36, "learning_rate": 2.726123512563853e-05, "loss": 0.6182, "step": 123215 }, { "epoch": 1.36, "learning_rate": 2.7260312398500016e-05, "loss": 0.6721, "step": 123220 }, { "epoch": 1.36, "learning_rate": 2.72593896713615e-05, "loss": 0.6167, "step": 123225 }, { "epoch": 1.36, "learning_rate": 2.7258466944222992e-05, "loss": 0.6353, "step": 123230 }, { "epoch": 1.36, "learning_rate": 2.725754421708448e-05, "loss": 0.6783, "step": 123235 }, { "epoch": 1.36, "learning_rate": 2.7256621489945968e-05, "loss": 0.6445, "step": 123240 }, { "epoch": 1.36, "learning_rate": 2.7255698762807452e-05, "loss": 0.7099, "step": 123245 }, { "epoch": 1.36, "learning_rate": 2.7254776035668943e-05, "loss": 0.6849, "step": 123250 }, { "epoch": 1.36, "learning_rate": 2.725385330853043e-05, "loss": 0.6612, "step": 123255 }, { "epoch": 1.36, "learning_rate": 2.7252930581391916e-05, "loss": 0.6862, "step": 123260 }, { "epoch": 1.36, "learning_rate": 2.7252007854253403e-05, "loss": 0.6208, "step": 123265 }, { "epoch": 1.36, "learning_rate": 2.7251085127114888e-05, "loss": 0.7023, "step": 123270 }, { "epoch": 1.36, "learning_rate": 2.7250162399976383e-05, "loss": 0.6646, "step": 123275 }, { "epoch": 1.37, "learning_rate": 2.7249239672837867e-05, "loss": 0.6394, "step": 123280 }, { "epoch": 1.37, "learning_rate": 2.7248316945699355e-05, "loss": 0.6141, "step": 123285 }, { "epoch": 1.37, "learning_rate": 2.724739421856084e-05, "loss": 0.65, "step": 123290 }, { "epoch": 1.37, "learning_rate": 2.724647149142233e-05, "loss": 0.6433, "step": 123295 }, { "epoch": 1.37, "learning_rate": 2.724554876428382e-05, "loss": 0.5968, "step": 123300 }, { "epoch": 1.37, "learning_rate": 2.7244626037145303e-05, "loss": 0.6614, "step": 123305 }, { "epoch": 1.37, "learning_rate": 2.724370331000679e-05, "loss": 0.6873, "step": 123310 }, { "epoch": 1.37, "learning_rate": 2.7242780582868282e-05, "loss": 0.6594, "step": 123315 }, { "epoch": 1.37, "learning_rate": 2.724185785572977e-05, "loss": 0.7003, "step": 123320 }, { "epoch": 1.37, "learning_rate": 2.7240935128591254e-05, "loss": 0.6427, "step": 123325 }, { "epoch": 1.37, "learning_rate": 2.7240012401452742e-05, "loss": 0.6351, "step": 123330 }, { "epoch": 1.37, "learning_rate": 2.7239089674314233e-05, "loss": 0.5888, "step": 123335 }, { "epoch": 1.37, "learning_rate": 2.7238166947175718e-05, "loss": 0.6722, "step": 123340 }, { "epoch": 1.37, "learning_rate": 2.7237244220037206e-05, "loss": 0.6658, "step": 123345 }, { "epoch": 1.37, "learning_rate": 2.7236321492898694e-05, "loss": 0.6185, "step": 123350 }, { "epoch": 1.37, "learning_rate": 2.7235398765760178e-05, "loss": 0.6313, "step": 123355 }, { "epoch": 1.37, "learning_rate": 2.723447603862167e-05, "loss": 0.6711, "step": 123360 }, { "epoch": 1.37, "learning_rate": 2.7233553311483157e-05, "loss": 0.685, "step": 123365 }, { "epoch": 1.37, "learning_rate": 2.723263058434464e-05, "loss": 0.6376, "step": 123370 }, { "epoch": 1.37, "learning_rate": 2.723170785720613e-05, "loss": 0.6862, "step": 123375 }, { "epoch": 1.37, "learning_rate": 2.723078513006762e-05, "loss": 0.6904, "step": 123380 }, { "epoch": 1.37, "learning_rate": 2.722986240292911e-05, "loss": 0.6086, "step": 123385 }, { "epoch": 1.37, "learning_rate": 2.7228939675790593e-05, "loss": 0.6191, "step": 123390 }, { "epoch": 1.37, "learning_rate": 2.722801694865208e-05, "loss": 0.6348, "step": 123395 }, { "epoch": 1.37, "learning_rate": 2.7227094221513572e-05, "loss": 0.6798, "step": 123400 }, { "epoch": 1.37, "learning_rate": 2.7226171494375056e-05, "loss": 0.597, "step": 123405 }, { "epoch": 1.37, "learning_rate": 2.7225248767236544e-05, "loss": 0.6144, "step": 123410 }, { "epoch": 1.37, "learning_rate": 2.722432604009803e-05, "loss": 0.6446, "step": 123415 }, { "epoch": 1.37, "learning_rate": 2.7223403312959517e-05, "loss": 0.6476, "step": 123420 }, { "epoch": 1.37, "learning_rate": 2.7222480585821008e-05, "loss": 0.659, "step": 123425 }, { "epoch": 1.37, "learning_rate": 2.7221557858682496e-05, "loss": 0.5985, "step": 123430 }, { "epoch": 1.37, "learning_rate": 2.722063513154398e-05, "loss": 0.6841, "step": 123435 }, { "epoch": 1.37, "learning_rate": 2.7219712404405468e-05, "loss": 0.6606, "step": 123440 }, { "epoch": 1.37, "learning_rate": 2.721878967726696e-05, "loss": 0.641, "step": 123445 }, { "epoch": 1.37, "learning_rate": 2.7217866950128447e-05, "loss": 0.6339, "step": 123450 }, { "epoch": 1.37, "learning_rate": 2.721694422298993e-05, "loss": 0.6439, "step": 123455 }, { "epoch": 1.37, "learning_rate": 2.721602149585142e-05, "loss": 0.6364, "step": 123460 }, { "epoch": 1.37, "learning_rate": 2.721509876871291e-05, "loss": 0.6936, "step": 123465 }, { "epoch": 1.37, "learning_rate": 2.7214176041574395e-05, "loss": 0.6425, "step": 123470 }, { "epoch": 1.37, "learning_rate": 2.7213253314435883e-05, "loss": 0.6583, "step": 123475 }, { "epoch": 1.37, "learning_rate": 2.7212330587297367e-05, "loss": 0.6054, "step": 123480 }, { "epoch": 1.37, "learning_rate": 2.7211407860158862e-05, "loss": 0.6558, "step": 123485 }, { "epoch": 1.37, "learning_rate": 2.7210485133020347e-05, "loss": 0.6854, "step": 123490 }, { "epoch": 1.37, "learning_rate": 2.7209562405881834e-05, "loss": 0.6704, "step": 123495 }, { "epoch": 1.37, "learning_rate": 2.720863967874332e-05, "loss": 0.6441, "step": 123500 }, { "epoch": 1.37, "learning_rate": 2.7207716951604807e-05, "loss": 0.6193, "step": 123505 }, { "epoch": 1.37, "learning_rate": 2.7206794224466298e-05, "loss": 0.6994, "step": 123510 }, { "epoch": 1.37, "learning_rate": 2.7205871497327782e-05, "loss": 0.6171, "step": 123515 }, { "epoch": 1.37, "learning_rate": 2.720494877018927e-05, "loss": 0.6807, "step": 123520 }, { "epoch": 1.37, "learning_rate": 2.7204026043050758e-05, "loss": 0.6848, "step": 123525 }, { "epoch": 1.37, "learning_rate": 2.720310331591225e-05, "loss": 0.688, "step": 123530 }, { "epoch": 1.37, "learning_rate": 2.7202180588773734e-05, "loss": 0.6738, "step": 123535 }, { "epoch": 1.37, "learning_rate": 2.720125786163522e-05, "loss": 0.685, "step": 123540 }, { "epoch": 1.37, "learning_rate": 2.7200335134496706e-05, "loss": 0.6418, "step": 123545 }, { "epoch": 1.37, "learning_rate": 2.71994124073582e-05, "loss": 0.6255, "step": 123550 }, { "epoch": 1.37, "learning_rate": 2.7198489680219685e-05, "loss": 0.637, "step": 123555 }, { "epoch": 1.37, "learning_rate": 2.7197566953081173e-05, "loss": 0.6684, "step": 123560 }, { "epoch": 1.37, "learning_rate": 2.7196644225942657e-05, "loss": 0.6844, "step": 123565 }, { "epoch": 1.37, "learning_rate": 2.7195721498804145e-05, "loss": 0.6522, "step": 123570 }, { "epoch": 1.37, "learning_rate": 2.7194798771665637e-05, "loss": 0.623, "step": 123575 }, { "epoch": 1.37, "learning_rate": 2.719387604452712e-05, "loss": 0.6709, "step": 123580 }, { "epoch": 1.37, "learning_rate": 2.719295331738861e-05, "loss": 0.6172, "step": 123585 }, { "epoch": 1.37, "learning_rate": 2.7192030590250093e-05, "loss": 0.6525, "step": 123590 }, { "epoch": 1.37, "learning_rate": 2.7191107863111588e-05, "loss": 0.6174, "step": 123595 }, { "epoch": 1.37, "learning_rate": 2.7190185135973072e-05, "loss": 0.6535, "step": 123600 }, { "epoch": 1.37, "learning_rate": 2.718926240883456e-05, "loss": 0.5722, "step": 123605 }, { "epoch": 1.37, "learning_rate": 2.7188339681696045e-05, "loss": 0.6642, "step": 123610 }, { "epoch": 1.37, "learning_rate": 2.7187416954557536e-05, "loss": 0.6111, "step": 123615 }, { "epoch": 1.37, "learning_rate": 2.7186494227419024e-05, "loss": 0.7107, "step": 123620 }, { "epoch": 1.37, "learning_rate": 2.718557150028051e-05, "loss": 0.7127, "step": 123625 }, { "epoch": 1.37, "learning_rate": 2.7184648773141996e-05, "loss": 0.6204, "step": 123630 }, { "epoch": 1.37, "learning_rate": 2.7183726046003487e-05, "loss": 0.6524, "step": 123635 }, { "epoch": 1.37, "learning_rate": 2.7182803318864975e-05, "loss": 0.6359, "step": 123640 }, { "epoch": 1.37, "learning_rate": 2.718188059172646e-05, "loss": 0.6627, "step": 123645 }, { "epoch": 1.37, "learning_rate": 2.7180957864587948e-05, "loss": 0.6688, "step": 123650 }, { "epoch": 1.37, "learning_rate": 2.7180035137449432e-05, "loss": 0.6233, "step": 123655 }, { "epoch": 1.37, "learning_rate": 2.7179112410310927e-05, "loss": 0.6254, "step": 123660 }, { "epoch": 1.37, "learning_rate": 2.717818968317241e-05, "loss": 0.6887, "step": 123665 }, { "epoch": 1.37, "learning_rate": 2.71772669560339e-05, "loss": 0.5873, "step": 123670 }, { "epoch": 1.37, "learning_rate": 2.7176344228895383e-05, "loss": 0.6009, "step": 123675 }, { "epoch": 1.37, "learning_rate": 2.7175421501756875e-05, "loss": 0.6572, "step": 123680 }, { "epoch": 1.37, "learning_rate": 2.7174498774618362e-05, "loss": 0.6718, "step": 123685 }, { "epoch": 1.37, "learning_rate": 2.7173576047479847e-05, "loss": 0.6496, "step": 123690 }, { "epoch": 1.37, "learning_rate": 2.7172653320341335e-05, "loss": 0.6719, "step": 123695 }, { "epoch": 1.37, "learning_rate": 2.7171730593202826e-05, "loss": 0.7046, "step": 123700 }, { "epoch": 1.37, "learning_rate": 2.7170807866064314e-05, "loss": 0.6766, "step": 123705 }, { "epoch": 1.37, "learning_rate": 2.71698851389258e-05, "loss": 0.6549, "step": 123710 }, { "epoch": 1.37, "learning_rate": 2.7168962411787286e-05, "loss": 0.6437, "step": 123715 }, { "epoch": 1.37, "learning_rate": 2.716803968464877e-05, "loss": 0.6824, "step": 123720 }, { "epoch": 1.37, "learning_rate": 2.7167116957510262e-05, "loss": 0.6558, "step": 123725 }, { "epoch": 1.37, "learning_rate": 2.716619423037175e-05, "loss": 0.5915, "step": 123730 }, { "epoch": 1.37, "learning_rate": 2.7165271503233238e-05, "loss": 0.6721, "step": 123735 }, { "epoch": 1.37, "learning_rate": 2.7164348776094722e-05, "loss": 0.6907, "step": 123740 }, { "epoch": 1.37, "learning_rate": 2.7163426048956213e-05, "loss": 0.6864, "step": 123745 }, { "epoch": 1.37, "learning_rate": 2.71625033218177e-05, "loss": 0.6765, "step": 123750 }, { "epoch": 1.37, "learning_rate": 2.7161580594679186e-05, "loss": 0.6779, "step": 123755 }, { "epoch": 1.37, "learning_rate": 2.7160657867540673e-05, "loss": 0.7348, "step": 123760 }, { "epoch": 1.37, "learning_rate": 2.7159735140402165e-05, "loss": 0.6162, "step": 123765 }, { "epoch": 1.37, "learning_rate": 2.7158812413263652e-05, "loss": 0.6695, "step": 123770 }, { "epoch": 1.37, "learning_rate": 2.7157889686125137e-05, "loss": 0.6593, "step": 123775 }, { "epoch": 1.37, "learning_rate": 2.7156966958986625e-05, "loss": 0.645, "step": 123780 }, { "epoch": 1.37, "learning_rate": 2.7156044231848116e-05, "loss": 0.6532, "step": 123785 }, { "epoch": 1.37, "learning_rate": 2.71551215047096e-05, "loss": 0.6182, "step": 123790 }, { "epoch": 1.37, "learning_rate": 2.715419877757109e-05, "loss": 0.638, "step": 123795 }, { "epoch": 1.37, "learning_rate": 2.7153276050432573e-05, "loss": 0.5907, "step": 123800 }, { "epoch": 1.37, "learning_rate": 2.715235332329406e-05, "loss": 0.6572, "step": 123805 }, { "epoch": 1.37, "learning_rate": 2.7151430596155552e-05, "loss": 0.7148, "step": 123810 }, { "epoch": 1.37, "learning_rate": 2.715050786901704e-05, "loss": 0.6824, "step": 123815 }, { "epoch": 1.37, "learning_rate": 2.7149585141878524e-05, "loss": 0.6355, "step": 123820 }, { "epoch": 1.37, "learning_rate": 2.7148662414740012e-05, "loss": 0.7206, "step": 123825 }, { "epoch": 1.37, "learning_rate": 2.7147739687601503e-05, "loss": 0.6583, "step": 123830 }, { "epoch": 1.37, "learning_rate": 2.714681696046299e-05, "loss": 0.6011, "step": 123835 }, { "epoch": 1.37, "learning_rate": 2.7145894233324476e-05, "loss": 0.667, "step": 123840 }, { "epoch": 1.37, "learning_rate": 2.7144971506185963e-05, "loss": 0.6936, "step": 123845 }, { "epoch": 1.37, "learning_rate": 2.7144048779047455e-05, "loss": 0.5864, "step": 123850 }, { "epoch": 1.37, "learning_rate": 2.714312605190894e-05, "loss": 0.6667, "step": 123855 }, { "epoch": 1.37, "learning_rate": 2.7142203324770427e-05, "loss": 0.669, "step": 123860 }, { "epoch": 1.37, "learning_rate": 2.714128059763191e-05, "loss": 0.6437, "step": 123865 }, { "epoch": 1.37, "learning_rate": 2.7140357870493406e-05, "loss": 0.6106, "step": 123870 }, { "epoch": 1.37, "learning_rate": 2.713943514335489e-05, "loss": 0.6477, "step": 123875 }, { "epoch": 1.37, "learning_rate": 2.713851241621638e-05, "loss": 0.6835, "step": 123880 }, { "epoch": 1.37, "learning_rate": 2.7137589689077863e-05, "loss": 0.5909, "step": 123885 }, { "epoch": 1.37, "learning_rate": 2.713666696193935e-05, "loss": 0.6192, "step": 123890 }, { "epoch": 1.37, "learning_rate": 2.7135744234800842e-05, "loss": 0.6887, "step": 123895 }, { "epoch": 1.37, "learning_rate": 2.7134821507662326e-05, "loss": 0.6619, "step": 123900 }, { "epoch": 1.37, "learning_rate": 2.7133898780523814e-05, "loss": 0.6162, "step": 123905 }, { "epoch": 1.37, "learning_rate": 2.7132976053385302e-05, "loss": 0.6688, "step": 123910 }, { "epoch": 1.37, "learning_rate": 2.7132053326246793e-05, "loss": 0.6601, "step": 123915 }, { "epoch": 1.37, "learning_rate": 2.7131130599108278e-05, "loss": 0.671, "step": 123920 }, { "epoch": 1.37, "learning_rate": 2.7130207871969766e-05, "loss": 0.6736, "step": 123925 }, { "epoch": 1.37, "learning_rate": 2.712928514483125e-05, "loss": 0.6316, "step": 123930 }, { "epoch": 1.37, "learning_rate": 2.7128362417692745e-05, "loss": 0.5667, "step": 123935 }, { "epoch": 1.37, "learning_rate": 2.712743969055423e-05, "loss": 0.6149, "step": 123940 }, { "epoch": 1.37, "learning_rate": 2.7126516963415717e-05, "loss": 0.6446, "step": 123945 }, { "epoch": 1.37, "learning_rate": 2.71255942362772e-05, "loss": 0.714, "step": 123950 }, { "epoch": 1.37, "learning_rate": 2.712467150913869e-05, "loss": 0.7368, "step": 123955 }, { "epoch": 1.37, "learning_rate": 2.712374878200018e-05, "loss": 0.6262, "step": 123960 }, { "epoch": 1.37, "learning_rate": 2.7122826054861665e-05, "loss": 0.668, "step": 123965 }, { "epoch": 1.37, "learning_rate": 2.7121903327723153e-05, "loss": 0.7005, "step": 123970 }, { "epoch": 1.37, "learning_rate": 2.7120980600584637e-05, "loss": 0.6478, "step": 123975 }, { "epoch": 1.37, "learning_rate": 2.7120057873446132e-05, "loss": 0.6978, "step": 123980 }, { "epoch": 1.37, "learning_rate": 2.7119135146307616e-05, "loss": 0.6194, "step": 123985 }, { "epoch": 1.37, "learning_rate": 2.7118212419169104e-05, "loss": 0.5837, "step": 123990 }, { "epoch": 1.37, "learning_rate": 2.711728969203059e-05, "loss": 0.6013, "step": 123995 }, { "epoch": 1.37, "learning_rate": 2.711636696489208e-05, "loss": 0.6754, "step": 124000 }, { "epoch": 1.37, "eval_loss": 0.6399930715560913, "eval_runtime": 69.2783, "eval_samples_per_second": 28.869, "eval_steps_per_second": 14.435, "step": 124000 }, { "epoch": 1.37, "learning_rate": 2.7115444237753568e-05, "loss": 0.682, "step": 124005 }, { "epoch": 1.37, "learning_rate": 2.7114521510615056e-05, "loss": 0.6441, "step": 124010 }, { "epoch": 1.37, "learning_rate": 2.711359878347654e-05, "loss": 0.6437, "step": 124015 }, { "epoch": 1.37, "learning_rate": 2.711267605633803e-05, "loss": 0.5886, "step": 124020 }, { "epoch": 1.37, "learning_rate": 2.711175332919952e-05, "loss": 0.6456, "step": 124025 }, { "epoch": 1.37, "learning_rate": 2.7110830602061004e-05, "loss": 0.6959, "step": 124030 }, { "epoch": 1.37, "learning_rate": 2.710990787492249e-05, "loss": 0.6495, "step": 124035 }, { "epoch": 1.37, "learning_rate": 2.7108985147783976e-05, "loss": 0.6727, "step": 124040 }, { "epoch": 1.37, "learning_rate": 2.710806242064547e-05, "loss": 0.671, "step": 124045 }, { "epoch": 1.37, "learning_rate": 2.7107139693506955e-05, "loss": 0.6393, "step": 124050 }, { "epoch": 1.37, "learning_rate": 2.7106216966368443e-05, "loss": 0.6717, "step": 124055 }, { "epoch": 1.37, "learning_rate": 2.7105294239229927e-05, "loss": 0.6551, "step": 124060 }, { "epoch": 1.37, "learning_rate": 2.710437151209142e-05, "loss": 0.6337, "step": 124065 }, { "epoch": 1.37, "learning_rate": 2.7103448784952906e-05, "loss": 0.6809, "step": 124070 }, { "epoch": 1.37, "learning_rate": 2.710252605781439e-05, "loss": 0.6446, "step": 124075 }, { "epoch": 1.37, "learning_rate": 2.710160333067588e-05, "loss": 0.6515, "step": 124080 }, { "epoch": 1.37, "learning_rate": 2.710068060353737e-05, "loss": 0.7188, "step": 124085 }, { "epoch": 1.37, "learning_rate": 2.7099757876398858e-05, "loss": 0.6325, "step": 124090 }, { "epoch": 1.37, "learning_rate": 2.7098835149260342e-05, "loss": 0.6467, "step": 124095 }, { "epoch": 1.37, "learning_rate": 2.709791242212183e-05, "loss": 0.7062, "step": 124100 }, { "epoch": 1.37, "learning_rate": 2.7096989694983315e-05, "loss": 0.6843, "step": 124105 }, { "epoch": 1.37, "learning_rate": 2.709606696784481e-05, "loss": 0.6392, "step": 124110 }, { "epoch": 1.37, "learning_rate": 2.7095144240706294e-05, "loss": 0.6517, "step": 124115 }, { "epoch": 1.37, "learning_rate": 2.709422151356778e-05, "loss": 0.6612, "step": 124120 }, { "epoch": 1.37, "learning_rate": 2.7093298786429266e-05, "loss": 0.6602, "step": 124125 }, { "epoch": 1.37, "learning_rate": 2.7092376059290757e-05, "loss": 0.6249, "step": 124130 }, { "epoch": 1.37, "learning_rate": 2.7091453332152245e-05, "loss": 0.6816, "step": 124135 }, { "epoch": 1.37, "learning_rate": 2.709053060501373e-05, "loss": 0.7091, "step": 124140 }, { "epoch": 1.37, "learning_rate": 2.7089607877875217e-05, "loss": 0.6378, "step": 124145 }, { "epoch": 1.37, "learning_rate": 2.708868515073671e-05, "loss": 0.6217, "step": 124150 }, { "epoch": 1.37, "learning_rate": 2.7087762423598197e-05, "loss": 0.6438, "step": 124155 }, { "epoch": 1.37, "learning_rate": 2.708683969645968e-05, "loss": 0.6605, "step": 124160 }, { "epoch": 1.37, "learning_rate": 2.708591696932117e-05, "loss": 0.6687, "step": 124165 }, { "epoch": 1.37, "learning_rate": 2.708499424218266e-05, "loss": 0.6811, "step": 124170 }, { "epoch": 1.37, "learning_rate": 2.7084071515044145e-05, "loss": 0.6487, "step": 124175 }, { "epoch": 1.38, "learning_rate": 2.7083148787905632e-05, "loss": 0.6981, "step": 124180 }, { "epoch": 1.38, "learning_rate": 2.708222606076712e-05, "loss": 0.6289, "step": 124185 }, { "epoch": 1.38, "learning_rate": 2.7081303333628605e-05, "loss": 0.6913, "step": 124190 }, { "epoch": 1.38, "learning_rate": 2.7080380606490096e-05, "loss": 0.6013, "step": 124195 }, { "epoch": 1.38, "learning_rate": 2.7079457879351584e-05, "loss": 0.6933, "step": 124200 }, { "epoch": 1.38, "learning_rate": 2.7078535152213068e-05, "loss": 0.7073, "step": 124205 }, { "epoch": 1.38, "learning_rate": 2.7077612425074556e-05, "loss": 0.7161, "step": 124210 }, { "epoch": 1.38, "learning_rate": 2.7076689697936047e-05, "loss": 0.6307, "step": 124215 }, { "epoch": 1.38, "learning_rate": 2.7075766970797535e-05, "loss": 0.6836, "step": 124220 }, { "epoch": 1.38, "learning_rate": 2.707484424365902e-05, "loss": 0.6776, "step": 124225 }, { "epoch": 1.38, "learning_rate": 2.7073921516520507e-05, "loss": 0.7234, "step": 124230 }, { "epoch": 1.38, "learning_rate": 2.7072998789382e-05, "loss": 0.6072, "step": 124235 }, { "epoch": 1.38, "learning_rate": 2.7072076062243483e-05, "loss": 0.6294, "step": 124240 }, { "epoch": 1.38, "learning_rate": 2.707115333510497e-05, "loss": 0.6177, "step": 124245 }, { "epoch": 1.38, "learning_rate": 2.7070230607966455e-05, "loss": 0.6541, "step": 124250 }, { "epoch": 1.38, "learning_rate": 2.7069307880827943e-05, "loss": 0.6209, "step": 124255 }, { "epoch": 1.38, "learning_rate": 2.7068385153689435e-05, "loss": 0.6456, "step": 124260 }, { "epoch": 1.38, "learning_rate": 2.7067462426550922e-05, "loss": 0.6845, "step": 124265 }, { "epoch": 1.38, "learning_rate": 2.7066539699412407e-05, "loss": 0.5818, "step": 124270 }, { "epoch": 1.38, "learning_rate": 2.7065616972273895e-05, "loss": 0.6588, "step": 124275 }, { "epoch": 1.38, "learning_rate": 2.7064694245135386e-05, "loss": 0.6584, "step": 124280 }, { "epoch": 1.38, "learning_rate": 2.706377151799687e-05, "loss": 0.6412, "step": 124285 }, { "epoch": 1.38, "learning_rate": 2.7062848790858358e-05, "loss": 0.6118, "step": 124290 }, { "epoch": 1.38, "learning_rate": 2.7061926063719846e-05, "loss": 0.6109, "step": 124295 }, { "epoch": 1.38, "learning_rate": 2.7061003336581337e-05, "loss": 0.6059, "step": 124300 }, { "epoch": 1.38, "learning_rate": 2.7060080609442822e-05, "loss": 0.6351, "step": 124305 }, { "epoch": 1.38, "learning_rate": 2.705915788230431e-05, "loss": 0.6086, "step": 124310 }, { "epoch": 1.38, "learning_rate": 2.7058235155165794e-05, "loss": 0.6923, "step": 124315 }, { "epoch": 1.38, "learning_rate": 2.705731242802729e-05, "loss": 0.6666, "step": 124320 }, { "epoch": 1.38, "learning_rate": 2.7056389700888773e-05, "loss": 0.639, "step": 124325 }, { "epoch": 1.38, "learning_rate": 2.705546697375026e-05, "loss": 0.6647, "step": 124330 }, { "epoch": 1.38, "learning_rate": 2.7054544246611746e-05, "loss": 0.6704, "step": 124335 }, { "epoch": 1.38, "learning_rate": 2.7053621519473233e-05, "loss": 0.6403, "step": 124340 }, { "epoch": 1.38, "learning_rate": 2.7052698792334725e-05, "loss": 0.6418, "step": 124345 }, { "epoch": 1.38, "learning_rate": 2.705177606519621e-05, "loss": 0.6238, "step": 124350 }, { "epoch": 1.38, "learning_rate": 2.7050853338057697e-05, "loss": 0.6537, "step": 124355 }, { "epoch": 1.38, "learning_rate": 2.704993061091918e-05, "loss": 0.7054, "step": 124360 }, { "epoch": 1.38, "learning_rate": 2.7049007883780676e-05, "loss": 0.6217, "step": 124365 }, { "epoch": 1.38, "learning_rate": 2.704808515664216e-05, "loss": 0.6846, "step": 124370 }, { "epoch": 1.38, "learning_rate": 2.7047162429503648e-05, "loss": 0.6381, "step": 124375 }, { "epoch": 1.38, "learning_rate": 2.7046239702365133e-05, "loss": 0.6735, "step": 124380 }, { "epoch": 1.38, "learning_rate": 2.7045316975226624e-05, "loss": 0.6637, "step": 124385 }, { "epoch": 1.38, "learning_rate": 2.7044394248088112e-05, "loss": 0.6213, "step": 124390 }, { "epoch": 1.38, "learning_rate": 2.70434715209496e-05, "loss": 0.5771, "step": 124395 }, { "epoch": 1.38, "learning_rate": 2.7042548793811084e-05, "loss": 0.6556, "step": 124400 }, { "epoch": 1.38, "learning_rate": 2.7041626066672572e-05, "loss": 0.6631, "step": 124405 }, { "epoch": 1.38, "learning_rate": 2.7040703339534063e-05, "loss": 0.6546, "step": 124410 }, { "epoch": 1.38, "learning_rate": 2.7039780612395548e-05, "loss": 0.6405, "step": 124415 }, { "epoch": 1.38, "learning_rate": 2.7038857885257036e-05, "loss": 0.6421, "step": 124420 }, { "epoch": 1.38, "learning_rate": 2.703793515811852e-05, "loss": 0.6484, "step": 124425 }, { "epoch": 1.38, "learning_rate": 2.7037012430980015e-05, "loss": 0.6794, "step": 124430 }, { "epoch": 1.38, "learning_rate": 2.70360897038415e-05, "loss": 0.6041, "step": 124435 }, { "epoch": 1.38, "learning_rate": 2.7035166976702987e-05, "loss": 0.6171, "step": 124440 }, { "epoch": 1.38, "learning_rate": 2.703424424956447e-05, "loss": 0.6533, "step": 124445 }, { "epoch": 1.38, "learning_rate": 2.7033321522425963e-05, "loss": 0.6251, "step": 124450 }, { "epoch": 1.38, "learning_rate": 2.703239879528745e-05, "loss": 0.6465, "step": 124455 }, { "epoch": 1.38, "learning_rate": 2.7031476068148935e-05, "loss": 0.6313, "step": 124460 }, { "epoch": 1.38, "learning_rate": 2.7030553341010423e-05, "loss": 0.6707, "step": 124465 }, { "epoch": 1.38, "learning_rate": 2.7029630613871914e-05, "loss": 0.7033, "step": 124470 }, { "epoch": 1.38, "learning_rate": 2.7028707886733402e-05, "loss": 0.6427, "step": 124475 }, { "epoch": 1.38, "learning_rate": 2.7027785159594886e-05, "loss": 0.5864, "step": 124480 }, { "epoch": 1.38, "learning_rate": 2.7026862432456374e-05, "loss": 0.6109, "step": 124485 }, { "epoch": 1.38, "learning_rate": 2.702593970531786e-05, "loss": 0.6638, "step": 124490 }, { "epoch": 1.38, "learning_rate": 2.7025016978179353e-05, "loss": 0.6004, "step": 124495 }, { "epoch": 1.38, "learning_rate": 2.7024094251040838e-05, "loss": 0.6782, "step": 124500 }, { "epoch": 1.38, "learning_rate": 2.7023171523902326e-05, "loss": 0.637, "step": 124505 }, { "epoch": 1.38, "learning_rate": 2.702224879676381e-05, "loss": 0.6639, "step": 124510 }, { "epoch": 1.38, "learning_rate": 2.70213260696253e-05, "loss": 0.6516, "step": 124515 }, { "epoch": 1.38, "learning_rate": 2.702040334248679e-05, "loss": 0.6388, "step": 124520 }, { "epoch": 1.38, "learning_rate": 2.7019480615348274e-05, "loss": 0.6755, "step": 124525 }, { "epoch": 1.38, "learning_rate": 2.701855788820976e-05, "loss": 0.6352, "step": 124530 }, { "epoch": 1.38, "learning_rate": 2.7017635161071253e-05, "loss": 0.6297, "step": 124535 }, { "epoch": 1.38, "learning_rate": 2.701671243393274e-05, "loss": 0.6398, "step": 124540 }, { "epoch": 1.38, "learning_rate": 2.7015789706794225e-05, "loss": 0.6286, "step": 124545 }, { "epoch": 1.38, "learning_rate": 2.7014866979655713e-05, "loss": 0.6863, "step": 124550 }, { "epoch": 1.38, "learning_rate": 2.7013944252517197e-05, "loss": 0.6446, "step": 124555 }, { "epoch": 1.38, "learning_rate": 2.701302152537869e-05, "loss": 0.6445, "step": 124560 }, { "epoch": 1.38, "learning_rate": 2.7012098798240176e-05, "loss": 0.7135, "step": 124565 }, { "epoch": 1.38, "learning_rate": 2.7011176071101664e-05, "loss": 0.6044, "step": 124570 }, { "epoch": 1.38, "learning_rate": 2.701025334396315e-05, "loss": 0.6652, "step": 124575 }, { "epoch": 1.38, "learning_rate": 2.700933061682464e-05, "loss": 0.7104, "step": 124580 }, { "epoch": 1.38, "learning_rate": 2.7008407889686128e-05, "loss": 0.6758, "step": 124585 }, { "epoch": 1.38, "learning_rate": 2.7007485162547612e-05, "loss": 0.6313, "step": 124590 }, { "epoch": 1.38, "learning_rate": 2.70065624354091e-05, "loss": 0.5931, "step": 124595 }, { "epoch": 1.38, "learning_rate": 2.700563970827059e-05, "loss": 0.6783, "step": 124600 }, { "epoch": 1.38, "learning_rate": 2.700471698113208e-05, "loss": 0.651, "step": 124605 }, { "epoch": 1.38, "learning_rate": 2.7003794253993564e-05, "loss": 0.6368, "step": 124610 }, { "epoch": 1.38, "learning_rate": 2.700287152685505e-05, "loss": 0.6477, "step": 124615 }, { "epoch": 1.38, "learning_rate": 2.7001948799716543e-05, "loss": 0.6065, "step": 124620 }, { "epoch": 1.38, "learning_rate": 2.7001026072578027e-05, "loss": 0.6302, "step": 124625 }, { "epoch": 1.38, "learning_rate": 2.7000103345439515e-05, "loss": 0.6338, "step": 124630 }, { "epoch": 1.38, "learning_rate": 2.6999180618301e-05, "loss": 0.6212, "step": 124635 }, { "epoch": 1.38, "learning_rate": 2.6998257891162487e-05, "loss": 0.6179, "step": 124640 }, { "epoch": 1.38, "learning_rate": 2.699733516402398e-05, "loss": 0.6752, "step": 124645 }, { "epoch": 1.38, "learning_rate": 2.6996412436885466e-05, "loss": 0.6269, "step": 124650 }, { "epoch": 1.38, "learning_rate": 2.699548970974695e-05, "loss": 0.6295, "step": 124655 }, { "epoch": 1.38, "learning_rate": 2.699456698260844e-05, "loss": 0.6616, "step": 124660 }, { "epoch": 1.38, "learning_rate": 2.699364425546993e-05, "loss": 0.7017, "step": 124665 }, { "epoch": 1.38, "learning_rate": 2.6992721528331414e-05, "loss": 0.6355, "step": 124670 }, { "epoch": 1.38, "learning_rate": 2.6991798801192902e-05, "loss": 0.6704, "step": 124675 }, { "epoch": 1.38, "learning_rate": 2.699087607405439e-05, "loss": 0.6298, "step": 124680 }, { "epoch": 1.38, "learning_rate": 2.698995334691588e-05, "loss": 0.6248, "step": 124685 }, { "epoch": 1.38, "learning_rate": 2.6989030619777366e-05, "loss": 0.6378, "step": 124690 }, { "epoch": 1.38, "learning_rate": 2.6988107892638854e-05, "loss": 0.6407, "step": 124695 }, { "epoch": 1.38, "learning_rate": 2.6987185165500338e-05, "loss": 0.6303, "step": 124700 }, { "epoch": 1.38, "learning_rate": 2.6986262438361833e-05, "loss": 0.6727, "step": 124705 }, { "epoch": 1.38, "learning_rate": 2.6985339711223317e-05, "loss": 0.7092, "step": 124710 }, { "epoch": 1.38, "learning_rate": 2.6984416984084805e-05, "loss": 0.6475, "step": 124715 }, { "epoch": 1.38, "learning_rate": 2.698349425694629e-05, "loss": 0.6979, "step": 124720 }, { "epoch": 1.38, "learning_rate": 2.6982571529807777e-05, "loss": 0.6476, "step": 124725 }, { "epoch": 1.38, "learning_rate": 2.698164880266927e-05, "loss": 0.6339, "step": 124730 }, { "epoch": 1.38, "learning_rate": 2.6980726075530753e-05, "loss": 0.6112, "step": 124735 }, { "epoch": 1.38, "learning_rate": 2.697980334839224e-05, "loss": 0.626, "step": 124740 }, { "epoch": 1.38, "learning_rate": 2.6978880621253725e-05, "loss": 0.6611, "step": 124745 }, { "epoch": 1.38, "learning_rate": 2.697795789411522e-05, "loss": 0.6296, "step": 124750 }, { "epoch": 1.38, "learning_rate": 2.6977035166976704e-05, "loss": 0.6247, "step": 124755 }, { "epoch": 1.38, "learning_rate": 2.6976112439838192e-05, "loss": 0.6559, "step": 124760 }, { "epoch": 1.38, "learning_rate": 2.6975189712699677e-05, "loss": 0.6379, "step": 124765 }, { "epoch": 1.38, "learning_rate": 2.6974266985561168e-05, "loss": 0.655, "step": 124770 }, { "epoch": 1.38, "learning_rate": 2.6973344258422656e-05, "loss": 0.6647, "step": 124775 }, { "epoch": 1.38, "learning_rate": 2.6972421531284144e-05, "loss": 0.6146, "step": 124780 }, { "epoch": 1.38, "learning_rate": 2.6971498804145628e-05, "loss": 0.7039, "step": 124785 }, { "epoch": 1.38, "learning_rate": 2.6970576077007116e-05, "loss": 0.5882, "step": 124790 }, { "epoch": 1.38, "learning_rate": 2.6969653349868607e-05, "loss": 0.6458, "step": 124795 }, { "epoch": 1.38, "learning_rate": 2.6968730622730092e-05, "loss": 0.5937, "step": 124800 }, { "epoch": 1.38, "learning_rate": 2.696780789559158e-05, "loss": 0.6269, "step": 124805 }, { "epoch": 1.38, "learning_rate": 2.6966885168453064e-05, "loss": 0.633, "step": 124810 }, { "epoch": 1.38, "learning_rate": 2.696596244131456e-05, "loss": 0.613, "step": 124815 }, { "epoch": 1.38, "learning_rate": 2.6965039714176043e-05, "loss": 0.6142, "step": 124820 }, { "epoch": 1.38, "learning_rate": 2.696411698703753e-05, "loss": 0.6783, "step": 124825 }, { "epoch": 1.38, "learning_rate": 2.6963194259899015e-05, "loss": 0.6484, "step": 124830 }, { "epoch": 1.38, "learning_rate": 2.6962271532760507e-05, "loss": 0.5958, "step": 124835 }, { "epoch": 1.38, "learning_rate": 2.6961348805621995e-05, "loss": 0.6887, "step": 124840 }, { "epoch": 1.38, "learning_rate": 2.696042607848348e-05, "loss": 0.5936, "step": 124845 }, { "epoch": 1.38, "learning_rate": 2.6959503351344967e-05, "loss": 0.7188, "step": 124850 }, { "epoch": 1.38, "learning_rate": 2.6958580624206458e-05, "loss": 0.6753, "step": 124855 }, { "epoch": 1.38, "learning_rate": 2.6957657897067946e-05, "loss": 0.6351, "step": 124860 }, { "epoch": 1.38, "learning_rate": 2.695673516992943e-05, "loss": 0.6244, "step": 124865 }, { "epoch": 1.38, "learning_rate": 2.6955812442790918e-05, "loss": 0.6995, "step": 124870 }, { "epoch": 1.38, "learning_rate": 2.6954889715652403e-05, "loss": 0.7114, "step": 124875 }, { "epoch": 1.38, "learning_rate": 2.6953966988513897e-05, "loss": 0.7, "step": 124880 }, { "epoch": 1.38, "learning_rate": 2.6953044261375382e-05, "loss": 0.6258, "step": 124885 }, { "epoch": 1.38, "learning_rate": 2.695212153423687e-05, "loss": 0.6673, "step": 124890 }, { "epoch": 1.38, "learning_rate": 2.6951198807098354e-05, "loss": 0.6336, "step": 124895 }, { "epoch": 1.38, "learning_rate": 2.6950276079959845e-05, "loss": 0.6947, "step": 124900 }, { "epoch": 1.38, "learning_rate": 2.6949353352821333e-05, "loss": 0.6189, "step": 124905 }, { "epoch": 1.38, "learning_rate": 2.6948430625682818e-05, "loss": 0.6304, "step": 124910 }, { "epoch": 1.38, "learning_rate": 2.6947507898544305e-05, "loss": 0.6051, "step": 124915 }, { "epoch": 1.38, "learning_rate": 2.6946585171405797e-05, "loss": 0.5878, "step": 124920 }, { "epoch": 1.38, "learning_rate": 2.6945662444267285e-05, "loss": 0.6447, "step": 124925 }, { "epoch": 1.38, "learning_rate": 2.694473971712877e-05, "loss": 0.6241, "step": 124930 }, { "epoch": 1.38, "learning_rate": 2.6943816989990257e-05, "loss": 0.6602, "step": 124935 }, { "epoch": 1.38, "learning_rate": 2.694289426285174e-05, "loss": 0.6242, "step": 124940 }, { "epoch": 1.38, "learning_rate": 2.6941971535713233e-05, "loss": 0.7064, "step": 124945 }, { "epoch": 1.38, "learning_rate": 2.694104880857472e-05, "loss": 0.6591, "step": 124950 }, { "epoch": 1.38, "learning_rate": 2.6940126081436208e-05, "loss": 0.6325, "step": 124955 }, { "epoch": 1.38, "learning_rate": 2.6939203354297693e-05, "loss": 0.6554, "step": 124960 }, { "epoch": 1.38, "learning_rate": 2.6938280627159184e-05, "loss": 0.6863, "step": 124965 }, { "epoch": 1.38, "learning_rate": 2.6937357900020672e-05, "loss": 0.6866, "step": 124970 }, { "epoch": 1.38, "learning_rate": 2.6936435172882156e-05, "loss": 0.6105, "step": 124975 }, { "epoch": 1.38, "learning_rate": 2.6935512445743644e-05, "loss": 0.6808, "step": 124980 }, { "epoch": 1.38, "learning_rate": 2.6934589718605135e-05, "loss": 0.638, "step": 124985 }, { "epoch": 1.38, "learning_rate": 2.6933666991466623e-05, "loss": 0.66, "step": 124990 }, { "epoch": 1.38, "learning_rate": 2.6932744264328108e-05, "loss": 0.6818, "step": 124995 }, { "epoch": 1.38, "learning_rate": 2.6931821537189596e-05, "loss": 0.6286, "step": 125000 }, { "epoch": 1.38, "eval_loss": 0.614825427532196, "eval_runtime": 69.4078, "eval_samples_per_second": 28.815, "eval_steps_per_second": 14.408, "step": 125000 }, { "epoch": 1.38, "learning_rate": 2.6930898810051087e-05, "loss": 0.6226, "step": 125005 }, { "epoch": 1.38, "learning_rate": 2.692997608291257e-05, "loss": 0.6086, "step": 125010 }, { "epoch": 1.38, "learning_rate": 2.692905335577406e-05, "loss": 0.6696, "step": 125015 }, { "epoch": 1.38, "learning_rate": 2.6928130628635544e-05, "loss": 0.6615, "step": 125020 }, { "epoch": 1.38, "learning_rate": 2.692720790149703e-05, "loss": 0.6576, "step": 125025 }, { "epoch": 1.38, "learning_rate": 2.6926285174358523e-05, "loss": 0.6442, "step": 125030 }, { "epoch": 1.38, "learning_rate": 2.692536244722001e-05, "loss": 0.6708, "step": 125035 }, { "epoch": 1.38, "learning_rate": 2.6924439720081495e-05, "loss": 0.5753, "step": 125040 }, { "epoch": 1.38, "learning_rate": 2.6923516992942983e-05, "loss": 0.6695, "step": 125045 }, { "epoch": 1.38, "learning_rate": 2.6922594265804474e-05, "loss": 0.6526, "step": 125050 }, { "epoch": 1.38, "learning_rate": 2.692167153866596e-05, "loss": 0.7097, "step": 125055 }, { "epoch": 1.38, "learning_rate": 2.6920748811527446e-05, "loss": 0.61, "step": 125060 }, { "epoch": 1.38, "learning_rate": 2.6919826084388934e-05, "loss": 0.6514, "step": 125065 }, { "epoch": 1.38, "learning_rate": 2.6918903357250425e-05, "loss": 0.6535, "step": 125070 }, { "epoch": 1.38, "learning_rate": 2.691798063011191e-05, "loss": 0.7161, "step": 125075 }, { "epoch": 1.38, "learning_rate": 2.6917057902973398e-05, "loss": 0.6326, "step": 125080 }, { "epoch": 1.39, "learning_rate": 2.6916135175834882e-05, "loss": 0.6455, "step": 125085 }, { "epoch": 1.39, "learning_rate": 2.691521244869637e-05, "loss": 0.6772, "step": 125090 }, { "epoch": 1.39, "learning_rate": 2.691428972155786e-05, "loss": 0.5737, "step": 125095 }, { "epoch": 1.39, "learning_rate": 2.691336699441935e-05, "loss": 0.6443, "step": 125100 }, { "epoch": 1.39, "learning_rate": 2.6912444267280834e-05, "loss": 0.6368, "step": 125105 }, { "epoch": 1.39, "learning_rate": 2.691152154014232e-05, "loss": 0.6155, "step": 125110 }, { "epoch": 1.39, "learning_rate": 2.6910598813003813e-05, "loss": 0.6513, "step": 125115 }, { "epoch": 1.39, "learning_rate": 2.6909676085865297e-05, "loss": 0.6765, "step": 125120 }, { "epoch": 1.39, "learning_rate": 2.6908753358726785e-05, "loss": 0.7146, "step": 125125 }, { "epoch": 1.39, "learning_rate": 2.6907830631588273e-05, "loss": 0.6313, "step": 125130 }, { "epoch": 1.39, "learning_rate": 2.6906907904449764e-05, "loss": 0.6242, "step": 125135 }, { "epoch": 1.39, "learning_rate": 2.690598517731125e-05, "loss": 0.6627, "step": 125140 }, { "epoch": 1.39, "learning_rate": 2.6905062450172736e-05, "loss": 0.6237, "step": 125145 }, { "epoch": 1.39, "learning_rate": 2.690413972303422e-05, "loss": 0.6046, "step": 125150 }, { "epoch": 1.39, "learning_rate": 2.6903216995895712e-05, "loss": 0.6612, "step": 125155 }, { "epoch": 1.39, "learning_rate": 2.69022942687572e-05, "loss": 0.6754, "step": 125160 }, { "epoch": 1.39, "learning_rate": 2.6901371541618688e-05, "loss": 0.6495, "step": 125165 }, { "epoch": 1.39, "learning_rate": 2.6900448814480172e-05, "loss": 0.6462, "step": 125170 }, { "epoch": 1.39, "learning_rate": 2.689952608734166e-05, "loss": 0.6769, "step": 125175 }, { "epoch": 1.39, "learning_rate": 2.689860336020315e-05, "loss": 0.6496, "step": 125180 }, { "epoch": 1.39, "learning_rate": 2.6897680633064636e-05, "loss": 0.6526, "step": 125185 }, { "epoch": 1.39, "learning_rate": 2.6896757905926124e-05, "loss": 0.6775, "step": 125190 }, { "epoch": 1.39, "learning_rate": 2.6895835178787608e-05, "loss": 0.5988, "step": 125195 }, { "epoch": 1.39, "learning_rate": 2.6894912451649103e-05, "loss": 0.622, "step": 125200 }, { "epoch": 1.39, "learning_rate": 2.6893989724510587e-05, "loss": 0.6516, "step": 125205 }, { "epoch": 1.39, "learning_rate": 2.6893066997372075e-05, "loss": 0.6982, "step": 125210 }, { "epoch": 1.39, "learning_rate": 2.689214427023356e-05, "loss": 0.7058, "step": 125215 }, { "epoch": 1.39, "learning_rate": 2.689122154309505e-05, "loss": 0.6733, "step": 125220 }, { "epoch": 1.39, "learning_rate": 2.689029881595654e-05, "loss": 0.6834, "step": 125225 }, { "epoch": 1.39, "learning_rate": 2.6889376088818023e-05, "loss": 0.6429, "step": 125230 }, { "epoch": 1.39, "learning_rate": 2.688845336167951e-05, "loss": 0.6641, "step": 125235 }, { "epoch": 1.39, "learning_rate": 2.6887530634541e-05, "loss": 0.6326, "step": 125240 }, { "epoch": 1.39, "learning_rate": 2.688660790740249e-05, "loss": 0.6545, "step": 125245 }, { "epoch": 1.39, "learning_rate": 2.6885685180263974e-05, "loss": 0.8025, "step": 125250 }, { "epoch": 1.39, "learning_rate": 2.6884762453125462e-05, "loss": 0.6493, "step": 125255 }, { "epoch": 1.39, "learning_rate": 2.6883839725986947e-05, "loss": 0.5891, "step": 125260 }, { "epoch": 1.39, "learning_rate": 2.688291699884844e-05, "loss": 0.6366, "step": 125265 }, { "epoch": 1.39, "learning_rate": 2.6881994271709926e-05, "loss": 0.6839, "step": 125270 }, { "epoch": 1.39, "learning_rate": 2.6881071544571414e-05, "loss": 0.661, "step": 125275 }, { "epoch": 1.39, "learning_rate": 2.6880148817432898e-05, "loss": 0.6232, "step": 125280 }, { "epoch": 1.39, "learning_rate": 2.687922609029439e-05, "loss": 0.6711, "step": 125285 }, { "epoch": 1.39, "learning_rate": 2.6878303363155877e-05, "loss": 0.6577, "step": 125290 }, { "epoch": 1.39, "learning_rate": 2.687738063601736e-05, "loss": 0.6452, "step": 125295 }, { "epoch": 1.39, "learning_rate": 2.687645790887885e-05, "loss": 0.6358, "step": 125300 }, { "epoch": 1.39, "learning_rate": 2.687553518174034e-05, "loss": 0.6893, "step": 125305 }, { "epoch": 1.39, "learning_rate": 2.687461245460183e-05, "loss": 0.6368, "step": 125310 }, { "epoch": 1.39, "learning_rate": 2.6873689727463313e-05, "loss": 0.6392, "step": 125315 }, { "epoch": 1.39, "learning_rate": 2.68727670003248e-05, "loss": 0.6569, "step": 125320 }, { "epoch": 1.39, "learning_rate": 2.6871844273186285e-05, "loss": 0.6281, "step": 125325 }, { "epoch": 1.39, "learning_rate": 2.6870921546047777e-05, "loss": 0.6268, "step": 125330 }, { "epoch": 1.39, "learning_rate": 2.6869998818909264e-05, "loss": 0.6354, "step": 125335 }, { "epoch": 1.39, "learning_rate": 2.6869076091770752e-05, "loss": 0.6627, "step": 125340 }, { "epoch": 1.39, "learning_rate": 2.6868153364632237e-05, "loss": 0.649, "step": 125345 }, { "epoch": 1.39, "learning_rate": 2.6867230637493728e-05, "loss": 0.6284, "step": 125350 }, { "epoch": 1.39, "learning_rate": 2.6866307910355216e-05, "loss": 0.6365, "step": 125355 }, { "epoch": 1.39, "learning_rate": 2.68653851832167e-05, "loss": 0.6708, "step": 125360 }, { "epoch": 1.39, "learning_rate": 2.6864462456078188e-05, "loss": 0.67, "step": 125365 }, { "epoch": 1.39, "learning_rate": 2.686353972893968e-05, "loss": 0.6677, "step": 125370 }, { "epoch": 1.39, "learning_rate": 2.6862617001801167e-05, "loss": 0.7212, "step": 125375 }, { "epoch": 1.39, "learning_rate": 2.686169427466265e-05, "loss": 0.6192, "step": 125380 }, { "epoch": 1.39, "learning_rate": 2.686077154752414e-05, "loss": 0.6506, "step": 125385 }, { "epoch": 1.39, "learning_rate": 2.6859848820385624e-05, "loss": 0.6535, "step": 125390 }, { "epoch": 1.39, "learning_rate": 2.6858926093247115e-05, "loss": 0.6167, "step": 125395 }, { "epoch": 1.39, "learning_rate": 2.6858003366108603e-05, "loss": 0.5888, "step": 125400 }, { "epoch": 1.39, "learning_rate": 2.6857080638970088e-05, "loss": 0.6525, "step": 125405 }, { "epoch": 1.39, "learning_rate": 2.6856157911831575e-05, "loss": 0.7053, "step": 125410 }, { "epoch": 1.39, "learning_rate": 2.6855235184693067e-05, "loss": 0.6668, "step": 125415 }, { "epoch": 1.39, "learning_rate": 2.6854312457554554e-05, "loss": 0.641, "step": 125420 }, { "epoch": 1.39, "learning_rate": 2.685338973041604e-05, "loss": 0.6277, "step": 125425 }, { "epoch": 1.39, "learning_rate": 2.6852467003277527e-05, "loss": 0.7067, "step": 125430 }, { "epoch": 1.39, "learning_rate": 2.6851544276139018e-05, "loss": 0.5967, "step": 125435 }, { "epoch": 1.39, "learning_rate": 2.6850621549000506e-05, "loss": 0.629, "step": 125440 }, { "epoch": 1.39, "learning_rate": 2.684969882186199e-05, "loss": 0.6335, "step": 125445 }, { "epoch": 1.39, "learning_rate": 2.6848776094723478e-05, "loss": 0.6694, "step": 125450 }, { "epoch": 1.39, "learning_rate": 2.684785336758497e-05, "loss": 0.6575, "step": 125455 }, { "epoch": 1.39, "learning_rate": 2.6846930640446454e-05, "loss": 0.6392, "step": 125460 }, { "epoch": 1.39, "learning_rate": 2.6846007913307942e-05, "loss": 0.6703, "step": 125465 }, { "epoch": 1.39, "learning_rate": 2.6845085186169426e-05, "loss": 0.6823, "step": 125470 }, { "epoch": 1.39, "learning_rate": 2.6844162459030914e-05, "loss": 0.6505, "step": 125475 }, { "epoch": 1.39, "learning_rate": 2.6843239731892405e-05, "loss": 0.7314, "step": 125480 }, { "epoch": 1.39, "learning_rate": 2.6842317004753893e-05, "loss": 0.5877, "step": 125485 }, { "epoch": 1.39, "learning_rate": 2.6841394277615378e-05, "loss": 0.6125, "step": 125490 }, { "epoch": 1.39, "learning_rate": 2.6840471550476865e-05, "loss": 0.6566, "step": 125495 }, { "epoch": 1.39, "learning_rate": 2.6839548823338357e-05, "loss": 0.6373, "step": 125500 }, { "epoch": 1.39, "learning_rate": 2.683862609619984e-05, "loss": 0.6323, "step": 125505 }, { "epoch": 1.39, "learning_rate": 2.683770336906133e-05, "loss": 0.6639, "step": 125510 }, { "epoch": 1.39, "learning_rate": 2.6836780641922817e-05, "loss": 0.7306, "step": 125515 }, { "epoch": 1.39, "learning_rate": 2.6835857914784308e-05, "loss": 0.6571, "step": 125520 }, { "epoch": 1.39, "learning_rate": 2.6834935187645793e-05, "loss": 0.6581, "step": 125525 }, { "epoch": 1.39, "learning_rate": 2.683401246050728e-05, "loss": 0.6561, "step": 125530 }, { "epoch": 1.39, "learning_rate": 2.6833089733368765e-05, "loss": 0.5861, "step": 125535 }, { "epoch": 1.39, "learning_rate": 2.6832167006230256e-05, "loss": 0.6712, "step": 125540 }, { "epoch": 1.39, "learning_rate": 2.6831244279091744e-05, "loss": 0.6783, "step": 125545 }, { "epoch": 1.39, "learning_rate": 2.6830321551953232e-05, "loss": 0.6449, "step": 125550 }, { "epoch": 1.39, "learning_rate": 2.6829398824814716e-05, "loss": 0.6318, "step": 125555 }, { "epoch": 1.39, "learning_rate": 2.6828476097676204e-05, "loss": 0.7061, "step": 125560 }, { "epoch": 1.39, "learning_rate": 2.6827553370537695e-05, "loss": 0.6765, "step": 125565 }, { "epoch": 1.39, "learning_rate": 2.682663064339918e-05, "loss": 0.6218, "step": 125570 }, { "epoch": 1.39, "learning_rate": 2.6825707916260668e-05, "loss": 0.5853, "step": 125575 }, { "epoch": 1.39, "learning_rate": 2.6824785189122152e-05, "loss": 0.6308, "step": 125580 }, { "epoch": 1.39, "learning_rate": 2.6823862461983647e-05, "loss": 0.6557, "step": 125585 }, { "epoch": 1.39, "learning_rate": 2.682293973484513e-05, "loss": 0.6711, "step": 125590 }, { "epoch": 1.39, "learning_rate": 2.682201700770662e-05, "loss": 0.6374, "step": 125595 }, { "epoch": 1.39, "learning_rate": 2.6821094280568103e-05, "loss": 0.7144, "step": 125600 }, { "epoch": 1.39, "learning_rate": 2.6820171553429595e-05, "loss": 0.68, "step": 125605 }, { "epoch": 1.39, "learning_rate": 2.6819248826291083e-05, "loss": 0.7258, "step": 125610 }, { "epoch": 1.39, "learning_rate": 2.6818326099152567e-05, "loss": 0.6723, "step": 125615 }, { "epoch": 1.39, "learning_rate": 2.6817403372014055e-05, "loss": 0.6455, "step": 125620 }, { "epoch": 1.39, "learning_rate": 2.6816480644875543e-05, "loss": 0.6717, "step": 125625 }, { "epoch": 1.39, "learning_rate": 2.6815557917737034e-05, "loss": 0.6218, "step": 125630 }, { "epoch": 1.39, "learning_rate": 2.681463519059852e-05, "loss": 0.6635, "step": 125635 }, { "epoch": 1.39, "learning_rate": 2.6813712463460006e-05, "loss": 0.7507, "step": 125640 }, { "epoch": 1.39, "learning_rate": 2.681278973632149e-05, "loss": 0.6544, "step": 125645 }, { "epoch": 1.39, "learning_rate": 2.6811867009182985e-05, "loss": 0.686, "step": 125650 }, { "epoch": 1.39, "learning_rate": 2.681094428204447e-05, "loss": 0.6544, "step": 125655 }, { "epoch": 1.39, "learning_rate": 2.6810021554905958e-05, "loss": 0.6043, "step": 125660 }, { "epoch": 1.39, "learning_rate": 2.6809098827767442e-05, "loss": 0.6242, "step": 125665 }, { "epoch": 1.39, "learning_rate": 2.6808176100628933e-05, "loss": 0.624, "step": 125670 }, { "epoch": 1.39, "learning_rate": 2.680725337349042e-05, "loss": 0.6329, "step": 125675 }, { "epoch": 1.39, "learning_rate": 2.6806330646351906e-05, "loss": 0.6516, "step": 125680 }, { "epoch": 1.39, "learning_rate": 2.6805407919213394e-05, "loss": 0.6051, "step": 125685 }, { "epoch": 1.39, "learning_rate": 2.6804485192074885e-05, "loss": 0.6248, "step": 125690 }, { "epoch": 1.39, "learning_rate": 2.6803562464936373e-05, "loss": 0.6181, "step": 125695 }, { "epoch": 1.39, "learning_rate": 2.6802639737797857e-05, "loss": 0.6451, "step": 125700 }, { "epoch": 1.39, "learning_rate": 2.6801717010659345e-05, "loss": 0.6326, "step": 125705 }, { "epoch": 1.39, "learning_rate": 2.680079428352083e-05, "loss": 0.6003, "step": 125710 }, { "epoch": 1.39, "learning_rate": 2.679987155638232e-05, "loss": 0.6482, "step": 125715 }, { "epoch": 1.39, "learning_rate": 2.679894882924381e-05, "loss": 0.6273, "step": 125720 }, { "epoch": 1.39, "learning_rate": 2.6798026102105296e-05, "loss": 0.6604, "step": 125725 }, { "epoch": 1.39, "learning_rate": 2.679710337496678e-05, "loss": 0.6651, "step": 125730 }, { "epoch": 1.39, "learning_rate": 2.6796180647828272e-05, "loss": 0.6885, "step": 125735 }, { "epoch": 1.39, "learning_rate": 2.679525792068976e-05, "loss": 0.6455, "step": 125740 }, { "epoch": 1.39, "learning_rate": 2.6794335193551244e-05, "loss": 0.6508, "step": 125745 }, { "epoch": 1.39, "learning_rate": 2.6793412466412732e-05, "loss": 0.7128, "step": 125750 }, { "epoch": 1.39, "learning_rate": 2.6792489739274223e-05, "loss": 0.6509, "step": 125755 }, { "epoch": 1.39, "learning_rate": 2.679156701213571e-05, "loss": 0.6426, "step": 125760 }, { "epoch": 1.39, "learning_rate": 2.6790644284997196e-05, "loss": 0.6543, "step": 125765 }, { "epoch": 1.39, "learning_rate": 2.6789721557858684e-05, "loss": 0.647, "step": 125770 }, { "epoch": 1.39, "learning_rate": 2.6788798830720168e-05, "loss": 0.5921, "step": 125775 }, { "epoch": 1.39, "learning_rate": 2.678787610358166e-05, "loss": 0.6306, "step": 125780 }, { "epoch": 1.39, "learning_rate": 2.6786953376443147e-05, "loss": 0.6236, "step": 125785 }, { "epoch": 1.39, "learning_rate": 2.678603064930463e-05, "loss": 0.6153, "step": 125790 }, { "epoch": 1.39, "learning_rate": 2.678510792216612e-05, "loss": 0.6997, "step": 125795 }, { "epoch": 1.39, "learning_rate": 2.678418519502761e-05, "loss": 0.6501, "step": 125800 }, { "epoch": 1.39, "learning_rate": 2.67832624678891e-05, "loss": 0.6228, "step": 125805 }, { "epoch": 1.39, "learning_rate": 2.6782339740750583e-05, "loss": 0.5687, "step": 125810 }, { "epoch": 1.39, "learning_rate": 2.678141701361207e-05, "loss": 0.597, "step": 125815 }, { "epoch": 1.39, "learning_rate": 2.6780494286473562e-05, "loss": 0.6332, "step": 125820 }, { "epoch": 1.39, "learning_rate": 2.677957155933505e-05, "loss": 0.6653, "step": 125825 }, { "epoch": 1.39, "learning_rate": 2.6778648832196534e-05, "loss": 0.6992, "step": 125830 }, { "epoch": 1.39, "learning_rate": 2.6777726105058022e-05, "loss": 0.6742, "step": 125835 }, { "epoch": 1.39, "learning_rate": 2.6776803377919513e-05, "loss": 0.6153, "step": 125840 }, { "epoch": 1.39, "learning_rate": 2.6775880650780998e-05, "loss": 0.5904, "step": 125845 }, { "epoch": 1.39, "learning_rate": 2.6774957923642486e-05, "loss": 0.7048, "step": 125850 }, { "epoch": 1.39, "learning_rate": 2.677403519650397e-05, "loss": 0.6532, "step": 125855 }, { "epoch": 1.39, "learning_rate": 2.6773112469365458e-05, "loss": 0.5826, "step": 125860 }, { "epoch": 1.39, "learning_rate": 2.677218974222695e-05, "loss": 0.6546, "step": 125865 }, { "epoch": 1.39, "learning_rate": 2.6771267015088437e-05, "loss": 0.6637, "step": 125870 }, { "epoch": 1.39, "learning_rate": 2.677034428794992e-05, "loss": 0.6829, "step": 125875 }, { "epoch": 1.39, "learning_rate": 2.676942156081141e-05, "loss": 0.6678, "step": 125880 }, { "epoch": 1.39, "learning_rate": 2.67684988336729e-05, "loss": 0.6868, "step": 125885 }, { "epoch": 1.39, "learning_rate": 2.6767576106534385e-05, "loss": 0.6357, "step": 125890 }, { "epoch": 1.39, "learning_rate": 2.6766653379395873e-05, "loss": 0.6013, "step": 125895 }, { "epoch": 1.39, "learning_rate": 2.676573065225736e-05, "loss": 0.6324, "step": 125900 }, { "epoch": 1.39, "learning_rate": 2.6764807925118852e-05, "loss": 0.6377, "step": 125905 }, { "epoch": 1.39, "learning_rate": 2.6763885197980337e-05, "loss": 0.6415, "step": 125910 }, { "epoch": 1.39, "learning_rate": 2.6762962470841824e-05, "loss": 0.5954, "step": 125915 }, { "epoch": 1.39, "learning_rate": 2.676203974370331e-05, "loss": 0.6907, "step": 125920 }, { "epoch": 1.39, "learning_rate": 2.6761117016564797e-05, "loss": 0.6499, "step": 125925 }, { "epoch": 1.39, "learning_rate": 2.6760194289426288e-05, "loss": 0.6404, "step": 125930 }, { "epoch": 1.39, "learning_rate": 2.6759271562287776e-05, "loss": 0.624, "step": 125935 }, { "epoch": 1.39, "learning_rate": 2.675834883514926e-05, "loss": 0.6693, "step": 125940 }, { "epoch": 1.39, "learning_rate": 2.6757426108010748e-05, "loss": 0.6604, "step": 125945 }, { "epoch": 1.39, "learning_rate": 2.675650338087224e-05, "loss": 0.6288, "step": 125950 }, { "epoch": 1.39, "learning_rate": 2.6755580653733724e-05, "loss": 0.6681, "step": 125955 }, { "epoch": 1.39, "learning_rate": 2.675465792659521e-05, "loss": 0.6915, "step": 125960 }, { "epoch": 1.39, "learning_rate": 2.6753735199456696e-05, "loss": 0.6453, "step": 125965 }, { "epoch": 1.39, "learning_rate": 2.675281247231819e-05, "loss": 0.6787, "step": 125970 }, { "epoch": 1.39, "learning_rate": 2.6751889745179675e-05, "loss": 0.6242, "step": 125975 }, { "epoch": 1.39, "learning_rate": 2.6750967018041163e-05, "loss": 0.6557, "step": 125980 }, { "epoch": 1.39, "learning_rate": 2.6750044290902647e-05, "loss": 0.6412, "step": 125985 }, { "epoch": 1.4, "learning_rate": 2.674912156376414e-05, "loss": 0.6759, "step": 125990 }, { "epoch": 1.4, "learning_rate": 2.6748198836625627e-05, "loss": 0.648, "step": 125995 }, { "epoch": 1.4, "learning_rate": 2.674727610948711e-05, "loss": 0.7036, "step": 126000 }, { "epoch": 1.4, "eval_loss": 0.6032633185386658, "eval_runtime": 69.2673, "eval_samples_per_second": 28.874, "eval_steps_per_second": 14.437, "step": 126000 }, { "epoch": 1.4, "learning_rate": 2.67463533823486e-05, "loss": 0.6297, "step": 126005 }, { "epoch": 1.4, "learning_rate": 2.6745430655210087e-05, "loss": 0.6775, "step": 126010 }, { "epoch": 1.4, "learning_rate": 2.6744507928071578e-05, "loss": 0.6072, "step": 126015 }, { "epoch": 1.4, "learning_rate": 2.6743585200933062e-05, "loss": 0.6765, "step": 126020 }, { "epoch": 1.4, "learning_rate": 2.674266247379455e-05, "loss": 0.6843, "step": 126025 }, { "epoch": 1.4, "learning_rate": 2.6741739746656035e-05, "loss": 0.7146, "step": 126030 }, { "epoch": 1.4, "learning_rate": 2.674081701951753e-05, "loss": 0.656, "step": 126035 }, { "epoch": 1.4, "learning_rate": 2.6739894292379014e-05, "loss": 0.6728, "step": 126040 }, { "epoch": 1.4, "learning_rate": 2.67389715652405e-05, "loss": 0.6538, "step": 126045 }, { "epoch": 1.4, "learning_rate": 2.6738048838101986e-05, "loss": 0.6563, "step": 126050 }, { "epoch": 1.4, "learning_rate": 2.6737126110963477e-05, "loss": 0.6212, "step": 126055 }, { "epoch": 1.4, "learning_rate": 2.6736203383824965e-05, "loss": 0.6469, "step": 126060 }, { "epoch": 1.4, "learning_rate": 2.673528065668645e-05, "loss": 0.6254, "step": 126065 }, { "epoch": 1.4, "learning_rate": 2.6734357929547938e-05, "loss": 0.6438, "step": 126070 }, { "epoch": 1.4, "learning_rate": 2.6733435202409422e-05, "loss": 0.624, "step": 126075 }, { "epoch": 1.4, "learning_rate": 2.6732512475270917e-05, "loss": 0.6688, "step": 126080 }, { "epoch": 1.4, "learning_rate": 2.67315897481324e-05, "loss": 0.6509, "step": 126085 }, { "epoch": 1.4, "learning_rate": 2.673066702099389e-05, "loss": 0.737, "step": 126090 }, { "epoch": 1.4, "learning_rate": 2.6729744293855373e-05, "loss": 0.7096, "step": 126095 }, { "epoch": 1.4, "learning_rate": 2.6728821566716865e-05, "loss": 0.6625, "step": 126100 }, { "epoch": 1.4, "learning_rate": 2.6727898839578352e-05, "loss": 0.7054, "step": 126105 }, { "epoch": 1.4, "learning_rate": 2.672697611243984e-05, "loss": 0.6128, "step": 126110 }, { "epoch": 1.4, "learning_rate": 2.6726053385301325e-05, "loss": 0.6113, "step": 126115 }, { "epoch": 1.4, "learning_rate": 2.6725130658162816e-05, "loss": 0.651, "step": 126120 }, { "epoch": 1.4, "learning_rate": 2.6724207931024304e-05, "loss": 0.6611, "step": 126125 }, { "epoch": 1.4, "learning_rate": 2.672328520388579e-05, "loss": 0.6334, "step": 126130 }, { "epoch": 1.4, "learning_rate": 2.6722362476747276e-05, "loss": 0.6577, "step": 126135 }, { "epoch": 1.4, "learning_rate": 2.6721439749608767e-05, "loss": 0.6741, "step": 126140 }, { "epoch": 1.4, "learning_rate": 2.6720517022470255e-05, "loss": 0.6161, "step": 126145 }, { "epoch": 1.4, "learning_rate": 2.671959429533174e-05, "loss": 0.6185, "step": 126150 }, { "epoch": 1.4, "learning_rate": 2.6718671568193228e-05, "loss": 0.6638, "step": 126155 }, { "epoch": 1.4, "learning_rate": 2.6717748841054712e-05, "loss": 0.6908, "step": 126160 }, { "epoch": 1.4, "learning_rate": 2.6716826113916203e-05, "loss": 0.7009, "step": 126165 }, { "epoch": 1.4, "learning_rate": 2.671590338677769e-05, "loss": 0.6516, "step": 126170 }, { "epoch": 1.4, "learning_rate": 2.6714980659639176e-05, "loss": 0.684, "step": 126175 }, { "epoch": 1.4, "learning_rate": 2.6714057932500663e-05, "loss": 0.7088, "step": 126180 }, { "epoch": 1.4, "learning_rate": 2.6713135205362155e-05, "loss": 0.615, "step": 126185 }, { "epoch": 1.4, "learning_rate": 2.6712212478223643e-05, "loss": 0.6333, "step": 126190 }, { "epoch": 1.4, "learning_rate": 2.6711289751085127e-05, "loss": 0.595, "step": 126195 }, { "epoch": 1.4, "learning_rate": 2.6710367023946615e-05, "loss": 0.6209, "step": 126200 }, { "epoch": 1.4, "learning_rate": 2.6709444296808106e-05, "loss": 0.671, "step": 126205 }, { "epoch": 1.4, "learning_rate": 2.6708521569669594e-05, "loss": 0.6995, "step": 126210 }, { "epoch": 1.4, "learning_rate": 2.670759884253108e-05, "loss": 0.659, "step": 126215 }, { "epoch": 1.4, "learning_rate": 2.6706676115392566e-05, "loss": 0.6365, "step": 126220 }, { "epoch": 1.4, "learning_rate": 2.670575338825405e-05, "loss": 0.6606, "step": 126225 }, { "epoch": 1.4, "learning_rate": 2.6704830661115542e-05, "loss": 0.6351, "step": 126230 }, { "epoch": 1.4, "learning_rate": 2.670390793397703e-05, "loss": 0.6804, "step": 126235 }, { "epoch": 1.4, "learning_rate": 2.6702985206838514e-05, "loss": 0.6808, "step": 126240 }, { "epoch": 1.4, "learning_rate": 2.6702062479700002e-05, "loss": 0.6156, "step": 126245 }, { "epoch": 1.4, "learning_rate": 2.6701139752561493e-05, "loss": 0.6647, "step": 126250 }, { "epoch": 1.4, "learning_rate": 2.670021702542298e-05, "loss": 0.6858, "step": 126255 }, { "epoch": 1.4, "learning_rate": 2.6699294298284466e-05, "loss": 0.6222, "step": 126260 }, { "epoch": 1.4, "learning_rate": 2.6698371571145953e-05, "loss": 0.6943, "step": 126265 }, { "epoch": 1.4, "learning_rate": 2.6697448844007445e-05, "loss": 0.6551, "step": 126270 }, { "epoch": 1.4, "learning_rate": 2.669652611686893e-05, "loss": 0.6197, "step": 126275 }, { "epoch": 1.4, "learning_rate": 2.6695603389730417e-05, "loss": 0.6265, "step": 126280 }, { "epoch": 1.4, "learning_rate": 2.6694680662591905e-05, "loss": 0.6655, "step": 126285 }, { "epoch": 1.4, "learning_rate": 2.6693757935453396e-05, "loss": 0.6111, "step": 126290 }, { "epoch": 1.4, "learning_rate": 2.669283520831488e-05, "loss": 0.6459, "step": 126295 }, { "epoch": 1.4, "learning_rate": 2.669191248117637e-05, "loss": 0.6578, "step": 126300 }, { "epoch": 1.4, "learning_rate": 2.6690989754037853e-05, "loss": 0.6797, "step": 126305 }, { "epoch": 1.4, "learning_rate": 2.669006702689934e-05, "loss": 0.6873, "step": 126310 }, { "epoch": 1.4, "learning_rate": 2.6689144299760832e-05, "loss": 0.648, "step": 126315 }, { "epoch": 1.4, "learning_rate": 2.668822157262232e-05, "loss": 0.6627, "step": 126320 }, { "epoch": 1.4, "learning_rate": 2.6687298845483804e-05, "loss": 0.6551, "step": 126325 }, { "epoch": 1.4, "learning_rate": 2.6686376118345292e-05, "loss": 0.6438, "step": 126330 }, { "epoch": 1.4, "learning_rate": 2.6685453391206783e-05, "loss": 0.6835, "step": 126335 }, { "epoch": 1.4, "learning_rate": 2.6684530664068268e-05, "loss": 0.6854, "step": 126340 }, { "epoch": 1.4, "learning_rate": 2.6683607936929756e-05, "loss": 0.6419, "step": 126345 }, { "epoch": 1.4, "learning_rate": 2.668268520979124e-05, "loss": 0.654, "step": 126350 }, { "epoch": 1.4, "learning_rate": 2.6681762482652735e-05, "loss": 0.661, "step": 126355 }, { "epoch": 1.4, "learning_rate": 2.668083975551422e-05, "loss": 0.6374, "step": 126360 }, { "epoch": 1.4, "learning_rate": 2.6679917028375707e-05, "loss": 0.6102, "step": 126365 }, { "epoch": 1.4, "learning_rate": 2.667899430123719e-05, "loss": 0.6238, "step": 126370 }, { "epoch": 1.4, "learning_rate": 2.6678071574098683e-05, "loss": 0.6639, "step": 126375 }, { "epoch": 1.4, "learning_rate": 2.667714884696017e-05, "loss": 0.6117, "step": 126380 }, { "epoch": 1.4, "learning_rate": 2.667622611982166e-05, "loss": 0.6804, "step": 126385 }, { "epoch": 1.4, "learning_rate": 2.6675303392683143e-05, "loss": 0.6664, "step": 126390 }, { "epoch": 1.4, "learning_rate": 2.667438066554463e-05, "loss": 0.62, "step": 126395 }, { "epoch": 1.4, "learning_rate": 2.6673457938406122e-05, "loss": 0.6905, "step": 126400 }, { "epoch": 1.4, "learning_rate": 2.6672535211267606e-05, "loss": 0.6658, "step": 126405 }, { "epoch": 1.4, "learning_rate": 2.6671612484129094e-05, "loss": 0.6451, "step": 126410 }, { "epoch": 1.4, "learning_rate": 2.667068975699058e-05, "loss": 0.6799, "step": 126415 }, { "epoch": 1.4, "learning_rate": 2.6669767029852073e-05, "loss": 0.6512, "step": 126420 }, { "epoch": 1.4, "learning_rate": 2.6668844302713558e-05, "loss": 0.7106, "step": 126425 }, { "epoch": 1.4, "learning_rate": 2.6667921575575046e-05, "loss": 0.6174, "step": 126430 }, { "epoch": 1.4, "learning_rate": 2.666699884843653e-05, "loss": 0.6538, "step": 126435 }, { "epoch": 1.4, "learning_rate": 2.666607612129802e-05, "loss": 0.633, "step": 126440 }, { "epoch": 1.4, "learning_rate": 2.666515339415951e-05, "loss": 0.6145, "step": 126445 }, { "epoch": 1.4, "learning_rate": 2.6664230667020994e-05, "loss": 0.6742, "step": 126450 }, { "epoch": 1.4, "learning_rate": 2.666330793988248e-05, "loss": 0.6044, "step": 126455 }, { "epoch": 1.4, "learning_rate": 2.666238521274397e-05, "loss": 0.597, "step": 126460 }, { "epoch": 1.4, "learning_rate": 2.666146248560546e-05, "loss": 0.6346, "step": 126465 }, { "epoch": 1.4, "learning_rate": 2.6660539758466945e-05, "loss": 0.6383, "step": 126470 }, { "epoch": 1.4, "learning_rate": 2.6659617031328433e-05, "loss": 0.6594, "step": 126475 }, { "epoch": 1.4, "learning_rate": 2.6658694304189917e-05, "loss": 0.6663, "step": 126480 }, { "epoch": 1.4, "learning_rate": 2.665777157705141e-05, "loss": 0.6604, "step": 126485 }, { "epoch": 1.4, "learning_rate": 2.6656848849912896e-05, "loss": 0.6504, "step": 126490 }, { "epoch": 1.4, "learning_rate": 2.6655926122774384e-05, "loss": 0.6872, "step": 126495 }, { "epoch": 1.4, "learning_rate": 2.665500339563587e-05, "loss": 0.6581, "step": 126500 }, { "epoch": 1.4, "learning_rate": 2.665408066849736e-05, "loss": 0.661, "step": 126505 }, { "epoch": 1.4, "learning_rate": 2.6653157941358848e-05, "loss": 0.6281, "step": 126510 }, { "epoch": 1.4, "learning_rate": 2.6652235214220332e-05, "loss": 0.6967, "step": 126515 }, { "epoch": 1.4, "learning_rate": 2.665131248708182e-05, "loss": 0.6696, "step": 126520 }, { "epoch": 1.4, "learning_rate": 2.665038975994331e-05, "loss": 0.6426, "step": 126525 }, { "epoch": 1.4, "learning_rate": 2.66494670328048e-05, "loss": 0.6443, "step": 126530 }, { "epoch": 1.4, "learning_rate": 2.6648544305666284e-05, "loss": 0.5793, "step": 126535 }, { "epoch": 1.4, "learning_rate": 2.664762157852777e-05, "loss": 0.6298, "step": 126540 }, { "epoch": 1.4, "learning_rate": 2.6646698851389256e-05, "loss": 0.6377, "step": 126545 }, { "epoch": 1.4, "learning_rate": 2.6645776124250747e-05, "loss": 0.7322, "step": 126550 }, { "epoch": 1.4, "learning_rate": 2.6644853397112235e-05, "loss": 0.7465, "step": 126555 }, { "epoch": 1.4, "learning_rate": 2.664393066997372e-05, "loss": 0.6967, "step": 126560 }, { "epoch": 1.4, "learning_rate": 2.6643007942835207e-05, "loss": 0.6317, "step": 126565 }, { "epoch": 1.4, "learning_rate": 2.66420852156967e-05, "loss": 0.6716, "step": 126570 }, { "epoch": 1.4, "learning_rate": 2.6641162488558187e-05, "loss": 0.6315, "step": 126575 }, { "epoch": 1.4, "learning_rate": 2.664023976141967e-05, "loss": 0.6074, "step": 126580 }, { "epoch": 1.4, "learning_rate": 2.663931703428116e-05, "loss": 0.6253, "step": 126585 }, { "epoch": 1.4, "learning_rate": 2.663839430714265e-05, "loss": 0.6307, "step": 126590 }, { "epoch": 1.4, "learning_rate": 2.6637471580004138e-05, "loss": 0.6395, "step": 126595 }, { "epoch": 1.4, "learning_rate": 2.6636548852865622e-05, "loss": 0.6515, "step": 126600 }, { "epoch": 1.4, "learning_rate": 2.663562612572711e-05, "loss": 0.6503, "step": 126605 }, { "epoch": 1.4, "learning_rate": 2.6634703398588595e-05, "loss": 0.7026, "step": 126610 }, { "epoch": 1.4, "learning_rate": 2.6633780671450086e-05, "loss": 0.6525, "step": 126615 }, { "epoch": 1.4, "learning_rate": 2.6632857944311574e-05, "loss": 0.6478, "step": 126620 }, { "epoch": 1.4, "learning_rate": 2.6631935217173058e-05, "loss": 0.6512, "step": 126625 }, { "epoch": 1.4, "learning_rate": 2.6631012490034546e-05, "loss": 0.7201, "step": 126630 }, { "epoch": 1.4, "learning_rate": 2.6630089762896037e-05, "loss": 0.6806, "step": 126635 }, { "epoch": 1.4, "learning_rate": 2.6629167035757525e-05, "loss": 0.6712, "step": 126640 }, { "epoch": 1.4, "learning_rate": 2.662824430861901e-05, "loss": 0.5815, "step": 126645 }, { "epoch": 1.4, "learning_rate": 2.6627321581480497e-05, "loss": 0.6483, "step": 126650 }, { "epoch": 1.4, "learning_rate": 2.662639885434199e-05, "loss": 0.5928, "step": 126655 }, { "epoch": 1.4, "learning_rate": 2.6625476127203473e-05, "loss": 0.6902, "step": 126660 }, { "epoch": 1.4, "learning_rate": 2.662455340006496e-05, "loss": 0.6916, "step": 126665 }, { "epoch": 1.4, "learning_rate": 2.662363067292645e-05, "loss": 0.6578, "step": 126670 }, { "epoch": 1.4, "learning_rate": 2.662270794578794e-05, "loss": 0.6276, "step": 126675 }, { "epoch": 1.4, "learning_rate": 2.6621785218649425e-05, "loss": 0.6206, "step": 126680 }, { "epoch": 1.4, "learning_rate": 2.6620862491510912e-05, "loss": 0.6262, "step": 126685 }, { "epoch": 1.4, "learning_rate": 2.6619939764372397e-05, "loss": 0.6941, "step": 126690 }, { "epoch": 1.4, "learning_rate": 2.6619017037233885e-05, "loss": 0.6822, "step": 126695 }, { "epoch": 1.4, "learning_rate": 2.6618094310095376e-05, "loss": 0.6128, "step": 126700 }, { "epoch": 1.4, "learning_rate": 2.6617171582956864e-05, "loss": 0.6302, "step": 126705 }, { "epoch": 1.4, "learning_rate": 2.6616248855818348e-05, "loss": 0.6493, "step": 126710 }, { "epoch": 1.4, "learning_rate": 2.6615326128679836e-05, "loss": 0.684, "step": 126715 }, { "epoch": 1.4, "learning_rate": 2.6614403401541327e-05, "loss": 0.6301, "step": 126720 }, { "epoch": 1.4, "learning_rate": 2.6613480674402812e-05, "loss": 0.6839, "step": 126725 }, { "epoch": 1.4, "learning_rate": 2.66125579472643e-05, "loss": 0.6532, "step": 126730 }, { "epoch": 1.4, "learning_rate": 2.6611635220125784e-05, "loss": 0.6546, "step": 126735 }, { "epoch": 1.4, "learning_rate": 2.661071249298728e-05, "loss": 0.6126, "step": 126740 }, { "epoch": 1.4, "learning_rate": 2.6609789765848763e-05, "loss": 0.7258, "step": 126745 }, { "epoch": 1.4, "learning_rate": 2.660886703871025e-05, "loss": 0.6354, "step": 126750 }, { "epoch": 1.4, "learning_rate": 2.6607944311571736e-05, "loss": 0.6673, "step": 126755 }, { "epoch": 1.4, "learning_rate": 2.6607021584433223e-05, "loss": 0.6686, "step": 126760 }, { "epoch": 1.4, "learning_rate": 2.6606098857294715e-05, "loss": 0.6205, "step": 126765 }, { "epoch": 1.4, "learning_rate": 2.6605176130156202e-05, "loss": 0.6835, "step": 126770 }, { "epoch": 1.4, "learning_rate": 2.6604253403017687e-05, "loss": 0.6434, "step": 126775 }, { "epoch": 1.4, "learning_rate": 2.6603330675879175e-05, "loss": 0.6138, "step": 126780 }, { "epoch": 1.4, "learning_rate": 2.6602407948740666e-05, "loss": 0.6216, "step": 126785 }, { "epoch": 1.4, "learning_rate": 2.660148522160215e-05, "loss": 0.6433, "step": 126790 }, { "epoch": 1.4, "learning_rate": 2.660056249446364e-05, "loss": 0.6283, "step": 126795 }, { "epoch": 1.4, "learning_rate": 2.6599639767325123e-05, "loss": 0.6251, "step": 126800 }, { "epoch": 1.4, "learning_rate": 2.6598717040186617e-05, "loss": 0.611, "step": 126805 }, { "epoch": 1.4, "learning_rate": 2.6597794313048102e-05, "loss": 0.6434, "step": 126810 }, { "epoch": 1.4, "learning_rate": 2.659687158590959e-05, "loss": 0.6813, "step": 126815 }, { "epoch": 1.4, "learning_rate": 2.6595948858771074e-05, "loss": 0.6243, "step": 126820 }, { "epoch": 1.4, "learning_rate": 2.6595026131632565e-05, "loss": 0.648, "step": 126825 }, { "epoch": 1.4, "learning_rate": 2.6594103404494053e-05, "loss": 0.6771, "step": 126830 }, { "epoch": 1.4, "learning_rate": 2.6593180677355538e-05, "loss": 0.6816, "step": 126835 }, { "epoch": 1.4, "learning_rate": 2.6592257950217026e-05, "loss": 0.602, "step": 126840 }, { "epoch": 1.4, "learning_rate": 2.6591335223078513e-05, "loss": 0.6745, "step": 126845 }, { "epoch": 1.4, "learning_rate": 2.6590412495940005e-05, "loss": 0.6258, "step": 126850 }, { "epoch": 1.4, "learning_rate": 2.658948976880149e-05, "loss": 0.655, "step": 126855 }, { "epoch": 1.4, "learning_rate": 2.6588567041662977e-05, "loss": 0.606, "step": 126860 }, { "epoch": 1.4, "learning_rate": 2.658764431452446e-05, "loss": 0.7208, "step": 126865 }, { "epoch": 1.4, "learning_rate": 2.6586721587385953e-05, "loss": 0.6285, "step": 126870 }, { "epoch": 1.4, "learning_rate": 2.658579886024744e-05, "loss": 0.6366, "step": 126875 }, { "epoch": 1.4, "learning_rate": 2.658487613310893e-05, "loss": 0.6404, "step": 126880 }, { "epoch": 1.4, "learning_rate": 2.6583953405970413e-05, "loss": 0.6252, "step": 126885 }, { "epoch": 1.41, "learning_rate": 2.6583030678831904e-05, "loss": 0.6412, "step": 126890 }, { "epoch": 1.41, "learning_rate": 2.6582107951693392e-05, "loss": 0.6846, "step": 126895 }, { "epoch": 1.41, "learning_rate": 2.6581185224554876e-05, "loss": 0.6591, "step": 126900 }, { "epoch": 1.41, "learning_rate": 2.6580262497416364e-05, "loss": 0.6646, "step": 126905 }, { "epoch": 1.41, "learning_rate": 2.657933977027785e-05, "loss": 0.6673, "step": 126910 }, { "epoch": 1.41, "learning_rate": 2.6578417043139343e-05, "loss": 0.6554, "step": 126915 }, { "epoch": 1.41, "learning_rate": 2.6577494316000828e-05, "loss": 0.6138, "step": 126920 }, { "epoch": 1.41, "learning_rate": 2.6576571588862316e-05, "loss": 0.671, "step": 126925 }, { "epoch": 1.41, "learning_rate": 2.65756488617238e-05, "loss": 0.6343, "step": 126930 }, { "epoch": 1.41, "learning_rate": 2.657472613458529e-05, "loss": 0.6529, "step": 126935 }, { "epoch": 1.41, "learning_rate": 2.657380340744678e-05, "loss": 0.5799, "step": 126940 }, { "epoch": 1.41, "learning_rate": 2.6572880680308264e-05, "loss": 0.6308, "step": 126945 }, { "epoch": 1.41, "learning_rate": 2.657195795316975e-05, "loss": 0.7035, "step": 126950 }, { "epoch": 1.41, "learning_rate": 2.6571035226031243e-05, "loss": 0.6522, "step": 126955 }, { "epoch": 1.41, "learning_rate": 2.657011249889273e-05, "loss": 0.6902, "step": 126960 }, { "epoch": 1.41, "learning_rate": 2.6569189771754215e-05, "loss": 0.656, "step": 126965 }, { "epoch": 1.41, "learning_rate": 2.6568267044615703e-05, "loss": 0.6654, "step": 126970 }, { "epoch": 1.41, "learning_rate": 2.6567344317477194e-05, "loss": 0.6912, "step": 126975 }, { "epoch": 1.41, "learning_rate": 2.6566421590338682e-05, "loss": 0.6745, "step": 126980 }, { "epoch": 1.41, "learning_rate": 2.6565498863200166e-05, "loss": 0.6306, "step": 126985 }, { "epoch": 1.41, "learning_rate": 2.6564576136061654e-05, "loss": 0.61, "step": 126990 }, { "epoch": 1.41, "learning_rate": 2.656365340892314e-05, "loss": 0.6191, "step": 126995 }, { "epoch": 1.41, "learning_rate": 2.656273068178463e-05, "loss": 0.645, "step": 127000 }, { "epoch": 1.41, "eval_loss": 0.6295164823532104, "eval_runtime": 69.264, "eval_samples_per_second": 28.875, "eval_steps_per_second": 14.438, "step": 127000 }, { "epoch": 1.41, "learning_rate": 2.6561807954646118e-05, "loss": 0.7014, "step": 127005 }, { "epoch": 1.41, "learning_rate": 2.6560885227507602e-05, "loss": 0.6414, "step": 127010 }, { "epoch": 1.41, "learning_rate": 2.655996250036909e-05, "loss": 0.6754, "step": 127015 }, { "epoch": 1.41, "learning_rate": 2.655903977323058e-05, "loss": 0.6247, "step": 127020 }, { "epoch": 1.41, "learning_rate": 2.655811704609207e-05, "loss": 0.5831, "step": 127025 }, { "epoch": 1.41, "learning_rate": 2.6557194318953554e-05, "loss": 0.6848, "step": 127030 }, { "epoch": 1.41, "learning_rate": 2.655627159181504e-05, "loss": 0.6757, "step": 127035 }, { "epoch": 1.41, "learning_rate": 2.6555348864676533e-05, "loss": 0.5972, "step": 127040 }, { "epoch": 1.41, "learning_rate": 2.6554426137538017e-05, "loss": 0.6456, "step": 127045 }, { "epoch": 1.41, "learning_rate": 2.6553503410399505e-05, "loss": 0.6446, "step": 127050 }, { "epoch": 1.41, "learning_rate": 2.6552580683260993e-05, "loss": 0.6505, "step": 127055 }, { "epoch": 1.41, "learning_rate": 2.6551657956122477e-05, "loss": 0.633, "step": 127060 }, { "epoch": 1.41, "learning_rate": 2.655073522898397e-05, "loss": 0.6747, "step": 127065 }, { "epoch": 1.41, "learning_rate": 2.6549812501845456e-05, "loss": 0.644, "step": 127070 }, { "epoch": 1.41, "learning_rate": 2.654888977470694e-05, "loss": 0.6614, "step": 127075 }, { "epoch": 1.41, "learning_rate": 2.654796704756843e-05, "loss": 0.6941, "step": 127080 }, { "epoch": 1.41, "learning_rate": 2.654704432042992e-05, "loss": 0.6396, "step": 127085 }, { "epoch": 1.41, "learning_rate": 2.6546121593291408e-05, "loss": 0.6193, "step": 127090 }, { "epoch": 1.41, "learning_rate": 2.6545198866152892e-05, "loss": 0.6689, "step": 127095 }, { "epoch": 1.41, "learning_rate": 2.654427613901438e-05, "loss": 0.7247, "step": 127100 }, { "epoch": 1.41, "learning_rate": 2.654335341187587e-05, "loss": 0.6796, "step": 127105 }, { "epoch": 1.41, "learning_rate": 2.6542430684737356e-05, "loss": 0.6264, "step": 127110 }, { "epoch": 1.41, "learning_rate": 2.6541507957598844e-05, "loss": 0.6168, "step": 127115 }, { "epoch": 1.41, "learning_rate": 2.6540585230460328e-05, "loss": 0.6628, "step": 127120 }, { "epoch": 1.41, "learning_rate": 2.6539662503321823e-05, "loss": 0.6112, "step": 127125 }, { "epoch": 1.41, "learning_rate": 2.6538739776183307e-05, "loss": 0.6691, "step": 127130 }, { "epoch": 1.41, "learning_rate": 2.6537817049044795e-05, "loss": 0.6506, "step": 127135 }, { "epoch": 1.41, "learning_rate": 2.653689432190628e-05, "loss": 0.6931, "step": 127140 }, { "epoch": 1.41, "learning_rate": 2.6535971594767767e-05, "loss": 0.655, "step": 127145 }, { "epoch": 1.41, "learning_rate": 2.653504886762926e-05, "loss": 0.6156, "step": 127150 }, { "epoch": 1.41, "learning_rate": 2.6534126140490746e-05, "loss": 0.6602, "step": 127155 }, { "epoch": 1.41, "learning_rate": 2.653320341335223e-05, "loss": 0.6588, "step": 127160 }, { "epoch": 1.41, "learning_rate": 2.653228068621372e-05, "loss": 0.656, "step": 127165 }, { "epoch": 1.41, "learning_rate": 2.653135795907521e-05, "loss": 0.5928, "step": 127170 }, { "epoch": 1.41, "learning_rate": 2.6530435231936694e-05, "loss": 0.6796, "step": 127175 }, { "epoch": 1.41, "learning_rate": 2.6529512504798182e-05, "loss": 0.6253, "step": 127180 }, { "epoch": 1.41, "learning_rate": 2.6528589777659667e-05, "loss": 0.6355, "step": 127185 }, { "epoch": 1.41, "learning_rate": 2.652766705052116e-05, "loss": 0.6514, "step": 127190 }, { "epoch": 1.41, "learning_rate": 2.6526744323382646e-05, "loss": 0.6771, "step": 127195 }, { "epoch": 1.41, "learning_rate": 2.6525821596244134e-05, "loss": 0.5921, "step": 127200 }, { "epoch": 1.41, "learning_rate": 2.6524898869105618e-05, "loss": 0.6259, "step": 127205 }, { "epoch": 1.41, "learning_rate": 2.6523976141967106e-05, "loss": 0.6578, "step": 127210 }, { "epoch": 1.41, "learning_rate": 2.6523053414828597e-05, "loss": 0.6558, "step": 127215 }, { "epoch": 1.41, "learning_rate": 2.6522130687690082e-05, "loss": 0.6291, "step": 127220 }, { "epoch": 1.41, "learning_rate": 2.652120796055157e-05, "loss": 0.6831, "step": 127225 }, { "epoch": 1.41, "learning_rate": 2.6520285233413057e-05, "loss": 0.6478, "step": 127230 }, { "epoch": 1.41, "learning_rate": 2.651936250627455e-05, "loss": 0.6431, "step": 127235 }, { "epoch": 1.41, "learning_rate": 2.6518439779136033e-05, "loss": 0.6463, "step": 127240 }, { "epoch": 1.41, "learning_rate": 2.651751705199752e-05, "loss": 0.6961, "step": 127245 }, { "epoch": 1.41, "learning_rate": 2.6516594324859005e-05, "loss": 0.6388, "step": 127250 }, { "epoch": 1.41, "learning_rate": 2.6515671597720497e-05, "loss": 0.645, "step": 127255 }, { "epoch": 1.41, "learning_rate": 2.6514748870581985e-05, "loss": 0.6899, "step": 127260 }, { "epoch": 1.41, "learning_rate": 2.6513826143443472e-05, "loss": 0.6314, "step": 127265 }, { "epoch": 1.41, "learning_rate": 2.6512903416304957e-05, "loss": 0.6515, "step": 127270 }, { "epoch": 1.41, "learning_rate": 2.6511980689166448e-05, "loss": 0.6459, "step": 127275 }, { "epoch": 1.41, "learning_rate": 2.6511057962027936e-05, "loss": 0.6417, "step": 127280 }, { "epoch": 1.41, "learning_rate": 2.651013523488942e-05, "loss": 0.6494, "step": 127285 }, { "epoch": 1.41, "learning_rate": 2.6509212507750908e-05, "loss": 0.6943, "step": 127290 }, { "epoch": 1.41, "learning_rate": 2.6508289780612393e-05, "loss": 0.6486, "step": 127295 }, { "epoch": 1.41, "learning_rate": 2.6507367053473887e-05, "loss": 0.6457, "step": 127300 }, { "epoch": 1.41, "learning_rate": 2.6506444326335372e-05, "loss": 0.6638, "step": 127305 }, { "epoch": 1.41, "learning_rate": 2.650552159919686e-05, "loss": 0.6645, "step": 127310 }, { "epoch": 1.41, "learning_rate": 2.6504598872058344e-05, "loss": 0.623, "step": 127315 }, { "epoch": 1.41, "learning_rate": 2.6503676144919835e-05, "loss": 0.6183, "step": 127320 }, { "epoch": 1.41, "learning_rate": 2.6502753417781323e-05, "loss": 0.6878, "step": 127325 }, { "epoch": 1.41, "learning_rate": 2.6501830690642808e-05, "loss": 0.664, "step": 127330 }, { "epoch": 1.41, "learning_rate": 2.6500907963504295e-05, "loss": 0.6925, "step": 127335 }, { "epoch": 1.41, "learning_rate": 2.6499985236365787e-05, "loss": 0.63, "step": 127340 }, { "epoch": 1.41, "learning_rate": 2.6499062509227275e-05, "loss": 0.6558, "step": 127345 }, { "epoch": 1.41, "learning_rate": 2.649813978208876e-05, "loss": 0.6501, "step": 127350 }, { "epoch": 1.41, "learning_rate": 2.6497217054950247e-05, "loss": 0.6467, "step": 127355 }, { "epoch": 1.41, "learning_rate": 2.6496294327811738e-05, "loss": 0.5914, "step": 127360 }, { "epoch": 1.41, "learning_rate": 2.6495371600673226e-05, "loss": 0.6722, "step": 127365 }, { "epoch": 1.41, "learning_rate": 2.649444887353471e-05, "loss": 0.6073, "step": 127370 }, { "epoch": 1.41, "learning_rate": 2.6493526146396198e-05, "loss": 0.6703, "step": 127375 }, { "epoch": 1.41, "learning_rate": 2.6492603419257683e-05, "loss": 0.6187, "step": 127380 }, { "epoch": 1.41, "learning_rate": 2.6491680692119174e-05, "loss": 0.6883, "step": 127385 }, { "epoch": 1.41, "learning_rate": 2.6490757964980662e-05, "loss": 0.5811, "step": 127390 }, { "epoch": 1.41, "learning_rate": 2.6489835237842146e-05, "loss": 0.6246, "step": 127395 }, { "epoch": 1.41, "learning_rate": 2.6488912510703634e-05, "loss": 0.5976, "step": 127400 }, { "epoch": 1.41, "learning_rate": 2.6487989783565125e-05, "loss": 0.6429, "step": 127405 }, { "epoch": 1.41, "learning_rate": 2.6487067056426613e-05, "loss": 0.6478, "step": 127410 }, { "epoch": 1.41, "learning_rate": 2.6486144329288098e-05, "loss": 0.6085, "step": 127415 }, { "epoch": 1.41, "learning_rate": 2.6485221602149586e-05, "loss": 0.6689, "step": 127420 }, { "epoch": 1.41, "learning_rate": 2.6484298875011077e-05, "loss": 0.714, "step": 127425 }, { "epoch": 1.41, "learning_rate": 2.648337614787256e-05, "loss": 0.6383, "step": 127430 }, { "epoch": 1.41, "learning_rate": 2.648245342073405e-05, "loss": 0.702, "step": 127435 }, { "epoch": 1.41, "learning_rate": 2.6481530693595537e-05, "loss": 0.7118, "step": 127440 }, { "epoch": 1.41, "learning_rate": 2.648060796645702e-05, "loss": 0.6421, "step": 127445 }, { "epoch": 1.41, "learning_rate": 2.6479685239318513e-05, "loss": 0.6913, "step": 127450 }, { "epoch": 1.41, "learning_rate": 2.647876251218e-05, "loss": 0.6092, "step": 127455 }, { "epoch": 1.41, "learning_rate": 2.6477839785041485e-05, "loss": 0.6592, "step": 127460 }, { "epoch": 1.41, "learning_rate": 2.6476917057902973e-05, "loss": 0.5833, "step": 127465 }, { "epoch": 1.41, "learning_rate": 2.6475994330764464e-05, "loss": 0.6261, "step": 127470 }, { "epoch": 1.41, "learning_rate": 2.6475071603625952e-05, "loss": 0.6414, "step": 127475 }, { "epoch": 1.41, "learning_rate": 2.6474148876487436e-05, "loss": 0.7067, "step": 127480 }, { "epoch": 1.41, "learning_rate": 2.6473226149348924e-05, "loss": 0.6257, "step": 127485 }, { "epoch": 1.41, "learning_rate": 2.6472303422210415e-05, "loss": 0.6528, "step": 127490 }, { "epoch": 1.41, "learning_rate": 2.64713806950719e-05, "loss": 0.6593, "step": 127495 }, { "epoch": 1.41, "learning_rate": 2.6470457967933388e-05, "loss": 0.6629, "step": 127500 }, { "epoch": 1.41, "learning_rate": 2.6469535240794872e-05, "loss": 0.6842, "step": 127505 }, { "epoch": 1.41, "learning_rate": 2.6468612513656367e-05, "loss": 0.7235, "step": 127510 }, { "epoch": 1.41, "learning_rate": 2.646768978651785e-05, "loss": 0.6526, "step": 127515 }, { "epoch": 1.41, "learning_rate": 2.646676705937934e-05, "loss": 0.6611, "step": 127520 }, { "epoch": 1.41, "learning_rate": 2.6465844332240824e-05, "loss": 0.6721, "step": 127525 }, { "epoch": 1.41, "learning_rate": 2.646492160510231e-05, "loss": 0.6176, "step": 127530 }, { "epoch": 1.41, "learning_rate": 2.6463998877963803e-05, "loss": 0.5863, "step": 127535 }, { "epoch": 1.41, "learning_rate": 2.646307615082529e-05, "loss": 0.673, "step": 127540 }, { "epoch": 1.41, "learning_rate": 2.6462153423686775e-05, "loss": 0.63, "step": 127545 }, { "epoch": 1.41, "learning_rate": 2.6461230696548263e-05, "loss": 0.6396, "step": 127550 }, { "epoch": 1.41, "learning_rate": 2.6460307969409754e-05, "loss": 0.5884, "step": 127555 }, { "epoch": 1.41, "learning_rate": 2.645938524227124e-05, "loss": 0.7149, "step": 127560 }, { "epoch": 1.41, "learning_rate": 2.6458462515132726e-05, "loss": 0.6525, "step": 127565 }, { "epoch": 1.41, "learning_rate": 2.645753978799421e-05, "loss": 0.6901, "step": 127570 }, { "epoch": 1.41, "learning_rate": 2.6456617060855705e-05, "loss": 0.6623, "step": 127575 }, { "epoch": 1.41, "learning_rate": 2.645569433371719e-05, "loss": 0.6903, "step": 127580 }, { "epoch": 1.41, "learning_rate": 2.6454771606578678e-05, "loss": 0.6807, "step": 127585 }, { "epoch": 1.41, "learning_rate": 2.6453848879440162e-05, "loss": 0.6374, "step": 127590 }, { "epoch": 1.41, "learning_rate": 2.645292615230165e-05, "loss": 0.659, "step": 127595 }, { "epoch": 1.41, "learning_rate": 2.645200342516314e-05, "loss": 0.7234, "step": 127600 }, { "epoch": 1.41, "learning_rate": 2.6451080698024626e-05, "loss": 0.6167, "step": 127605 }, { "epoch": 1.41, "learning_rate": 2.6450157970886114e-05, "loss": 0.6074, "step": 127610 }, { "epoch": 1.41, "learning_rate": 2.64492352437476e-05, "loss": 0.6932, "step": 127615 }, { "epoch": 1.41, "learning_rate": 2.6448312516609093e-05, "loss": 0.6236, "step": 127620 }, { "epoch": 1.41, "learning_rate": 2.6447389789470577e-05, "loss": 0.6275, "step": 127625 }, { "epoch": 1.41, "learning_rate": 2.6446467062332065e-05, "loss": 0.6745, "step": 127630 }, { "epoch": 1.41, "learning_rate": 2.644554433519355e-05, "loss": 0.6421, "step": 127635 }, { "epoch": 1.41, "learning_rate": 2.6444621608055044e-05, "loss": 0.6381, "step": 127640 }, { "epoch": 1.41, "learning_rate": 2.644369888091653e-05, "loss": 0.6299, "step": 127645 }, { "epoch": 1.41, "learning_rate": 2.6442776153778016e-05, "loss": 0.6599, "step": 127650 }, { "epoch": 1.41, "learning_rate": 2.64418534266395e-05, "loss": 0.6316, "step": 127655 }, { "epoch": 1.41, "learning_rate": 2.6440930699500992e-05, "loss": 0.665, "step": 127660 }, { "epoch": 1.41, "learning_rate": 2.644000797236248e-05, "loss": 0.649, "step": 127665 }, { "epoch": 1.41, "learning_rate": 2.6439085245223964e-05, "loss": 0.6403, "step": 127670 }, { "epoch": 1.41, "learning_rate": 2.6438162518085452e-05, "loss": 0.6694, "step": 127675 }, { "epoch": 1.41, "learning_rate": 2.6437239790946937e-05, "loss": 0.6599, "step": 127680 }, { "epoch": 1.41, "learning_rate": 2.643631706380843e-05, "loss": 0.6097, "step": 127685 }, { "epoch": 1.41, "learning_rate": 2.6435394336669916e-05, "loss": 0.6616, "step": 127690 }, { "epoch": 1.41, "learning_rate": 2.6434471609531404e-05, "loss": 0.6098, "step": 127695 }, { "epoch": 1.41, "learning_rate": 2.6433548882392888e-05, "loss": 0.677, "step": 127700 }, { "epoch": 1.41, "learning_rate": 2.643262615525438e-05, "loss": 0.6654, "step": 127705 }, { "epoch": 1.41, "learning_rate": 2.6431703428115867e-05, "loss": 0.6814, "step": 127710 }, { "epoch": 1.41, "learning_rate": 2.6430780700977355e-05, "loss": 0.6207, "step": 127715 }, { "epoch": 1.41, "learning_rate": 2.642985797383884e-05, "loss": 0.6862, "step": 127720 }, { "epoch": 1.41, "learning_rate": 2.642893524670033e-05, "loss": 0.6599, "step": 127725 }, { "epoch": 1.41, "learning_rate": 2.642801251956182e-05, "loss": 0.6517, "step": 127730 }, { "epoch": 1.41, "learning_rate": 2.6427089792423303e-05, "loss": 0.6603, "step": 127735 }, { "epoch": 1.41, "learning_rate": 2.642616706528479e-05, "loss": 0.6789, "step": 127740 }, { "epoch": 1.41, "learning_rate": 2.6425244338146275e-05, "loss": 0.6038, "step": 127745 }, { "epoch": 1.41, "learning_rate": 2.642432161100777e-05, "loss": 0.6631, "step": 127750 }, { "epoch": 1.41, "learning_rate": 2.6423398883869254e-05, "loss": 0.6326, "step": 127755 }, { "epoch": 1.41, "learning_rate": 2.6422476156730742e-05, "loss": 0.6839, "step": 127760 }, { "epoch": 1.41, "learning_rate": 2.6421553429592227e-05, "loss": 0.6005, "step": 127765 }, { "epoch": 1.41, "learning_rate": 2.6420630702453718e-05, "loss": 0.6908, "step": 127770 }, { "epoch": 1.41, "learning_rate": 2.6419707975315206e-05, "loss": 0.6403, "step": 127775 }, { "epoch": 1.41, "learning_rate": 2.641878524817669e-05, "loss": 0.7009, "step": 127780 }, { "epoch": 1.41, "learning_rate": 2.6417862521038178e-05, "loss": 0.6435, "step": 127785 }, { "epoch": 1.41, "learning_rate": 2.641693979389967e-05, "loss": 0.6683, "step": 127790 }, { "epoch": 1.42, "learning_rate": 2.6416017066761157e-05, "loss": 0.6272, "step": 127795 }, { "epoch": 1.42, "learning_rate": 2.641509433962264e-05, "loss": 0.6512, "step": 127800 }, { "epoch": 1.42, "learning_rate": 2.641417161248413e-05, "loss": 0.6783, "step": 127805 }, { "epoch": 1.42, "learning_rate": 2.641324888534562e-05, "loss": 0.6388, "step": 127810 }, { "epoch": 1.42, "learning_rate": 2.6412326158207105e-05, "loss": 0.627, "step": 127815 }, { "epoch": 1.42, "learning_rate": 2.6411403431068593e-05, "loss": 0.6567, "step": 127820 }, { "epoch": 1.42, "learning_rate": 2.641048070393008e-05, "loss": 0.6051, "step": 127825 }, { "epoch": 1.42, "learning_rate": 2.6409557976791565e-05, "loss": 0.6816, "step": 127830 }, { "epoch": 1.42, "learning_rate": 2.6408635249653057e-05, "loss": 0.7235, "step": 127835 }, { "epoch": 1.42, "learning_rate": 2.6407712522514544e-05, "loss": 0.6714, "step": 127840 }, { "epoch": 1.42, "learning_rate": 2.640678979537603e-05, "loss": 0.6445, "step": 127845 }, { "epoch": 1.42, "learning_rate": 2.6405867068237517e-05, "loss": 0.6367, "step": 127850 }, { "epoch": 1.42, "learning_rate": 2.6404944341099008e-05, "loss": 0.6845, "step": 127855 }, { "epoch": 1.42, "learning_rate": 2.6404021613960496e-05, "loss": 0.6908, "step": 127860 }, { "epoch": 1.42, "learning_rate": 2.640309888682198e-05, "loss": 0.6511, "step": 127865 }, { "epoch": 1.42, "learning_rate": 2.6402176159683468e-05, "loss": 0.6438, "step": 127870 }, { "epoch": 1.42, "learning_rate": 2.640125343254496e-05, "loss": 0.6313, "step": 127875 }, { "epoch": 1.42, "learning_rate": 2.6400330705406444e-05, "loss": 0.6122, "step": 127880 }, { "epoch": 1.42, "learning_rate": 2.6399407978267932e-05, "loss": 0.6948, "step": 127885 }, { "epoch": 1.42, "learning_rate": 2.6398485251129416e-05, "loss": 0.6836, "step": 127890 }, { "epoch": 1.42, "learning_rate": 2.6397562523990904e-05, "loss": 0.6538, "step": 127895 }, { "epoch": 1.42, "learning_rate": 2.6396639796852395e-05, "loss": 0.6724, "step": 127900 }, { "epoch": 1.42, "learning_rate": 2.6395717069713883e-05, "loss": 0.628, "step": 127905 }, { "epoch": 1.42, "learning_rate": 2.6394794342575368e-05, "loss": 0.6088, "step": 127910 }, { "epoch": 1.42, "learning_rate": 2.6393871615436855e-05, "loss": 0.615, "step": 127915 }, { "epoch": 1.42, "learning_rate": 2.6392948888298347e-05, "loss": 0.6661, "step": 127920 }, { "epoch": 1.42, "learning_rate": 2.6392026161159835e-05, "loss": 0.6828, "step": 127925 }, { "epoch": 1.42, "learning_rate": 2.639110343402132e-05, "loss": 0.5937, "step": 127930 }, { "epoch": 1.42, "learning_rate": 2.6390180706882807e-05, "loss": 0.7266, "step": 127935 }, { "epoch": 1.42, "learning_rate": 2.6389257979744298e-05, "loss": 0.6294, "step": 127940 }, { "epoch": 1.42, "learning_rate": 2.6388335252605783e-05, "loss": 0.6761, "step": 127945 }, { "epoch": 1.42, "learning_rate": 2.638741252546727e-05, "loss": 0.6498, "step": 127950 }, { "epoch": 1.42, "learning_rate": 2.6386489798328755e-05, "loss": 0.6415, "step": 127955 }, { "epoch": 1.42, "learning_rate": 2.638556707119025e-05, "loss": 0.6654, "step": 127960 }, { "epoch": 1.42, "learning_rate": 2.6384644344051734e-05, "loss": 0.6383, "step": 127965 }, { "epoch": 1.42, "learning_rate": 2.6383721616913222e-05, "loss": 0.6677, "step": 127970 }, { "epoch": 1.42, "learning_rate": 2.6382798889774706e-05, "loss": 0.63, "step": 127975 }, { "epoch": 1.42, "learning_rate": 2.6381876162636194e-05, "loss": 0.6287, "step": 127980 }, { "epoch": 1.42, "learning_rate": 2.6380953435497685e-05, "loss": 0.6499, "step": 127985 }, { "epoch": 1.42, "learning_rate": 2.638003070835917e-05, "loss": 0.6716, "step": 127990 }, { "epoch": 1.42, "learning_rate": 2.6379107981220658e-05, "loss": 0.6258, "step": 127995 }, { "epoch": 1.42, "learning_rate": 2.6378185254082145e-05, "loss": 0.6578, "step": 128000 }, { "epoch": 1.42, "eval_loss": 0.6347951889038086, "eval_runtime": 69.315, "eval_samples_per_second": 28.854, "eval_steps_per_second": 14.427, "step": 128000 }, { "epoch": 1.42, "learning_rate": 2.6377262526943637e-05, "loss": 0.576, "step": 128005 }, { "epoch": 1.42, "learning_rate": 2.637633979980512e-05, "loss": 0.6945, "step": 128010 }, { "epoch": 1.42, "learning_rate": 2.637541707266661e-05, "loss": 0.6646, "step": 128015 }, { "epoch": 1.42, "learning_rate": 2.6374494345528093e-05, "loss": 0.6522, "step": 128020 }, { "epoch": 1.42, "learning_rate": 2.6373571618389588e-05, "loss": 0.6355, "step": 128025 }, { "epoch": 1.42, "learning_rate": 2.6372648891251073e-05, "loss": 0.6282, "step": 128030 }, { "epoch": 1.42, "learning_rate": 2.637172616411256e-05, "loss": 0.6635, "step": 128035 }, { "epoch": 1.42, "learning_rate": 2.6370803436974045e-05, "loss": 0.6223, "step": 128040 }, { "epoch": 1.42, "learning_rate": 2.6369880709835533e-05, "loss": 0.6724, "step": 128045 }, { "epoch": 1.42, "learning_rate": 2.6368957982697024e-05, "loss": 0.6427, "step": 128050 }, { "epoch": 1.42, "learning_rate": 2.636803525555851e-05, "loss": 0.6691, "step": 128055 }, { "epoch": 1.42, "learning_rate": 2.6367112528419996e-05, "loss": 0.6079, "step": 128060 }, { "epoch": 1.42, "learning_rate": 2.636618980128148e-05, "loss": 0.6365, "step": 128065 }, { "epoch": 1.42, "learning_rate": 2.6365267074142975e-05, "loss": 0.6977, "step": 128070 }, { "epoch": 1.42, "learning_rate": 2.636434434700446e-05, "loss": 0.6663, "step": 128075 }, { "epoch": 1.42, "learning_rate": 2.6363421619865948e-05, "loss": 0.709, "step": 128080 }, { "epoch": 1.42, "learning_rate": 2.6362498892727432e-05, "loss": 0.5968, "step": 128085 }, { "epoch": 1.42, "learning_rate": 2.6361576165588923e-05, "loss": 0.6712, "step": 128090 }, { "epoch": 1.42, "learning_rate": 2.636065343845041e-05, "loss": 0.6628, "step": 128095 }, { "epoch": 1.42, "learning_rate": 2.63597307113119e-05, "loss": 0.6478, "step": 128100 }, { "epoch": 1.42, "learning_rate": 2.6358807984173384e-05, "loss": 0.6459, "step": 128105 }, { "epoch": 1.42, "learning_rate": 2.6357885257034875e-05, "loss": 0.5969, "step": 128110 }, { "epoch": 1.42, "learning_rate": 2.6356962529896363e-05, "loss": 0.5946, "step": 128115 }, { "epoch": 1.42, "learning_rate": 2.6356039802757847e-05, "loss": 0.6662, "step": 128120 }, { "epoch": 1.42, "learning_rate": 2.6355117075619335e-05, "loss": 0.6126, "step": 128125 }, { "epoch": 1.42, "learning_rate": 2.635419434848082e-05, "loss": 0.693, "step": 128130 }, { "epoch": 1.42, "learning_rate": 2.6353271621342314e-05, "loss": 0.6844, "step": 128135 }, { "epoch": 1.42, "learning_rate": 2.63523488942038e-05, "loss": 0.6706, "step": 128140 }, { "epoch": 1.42, "learning_rate": 2.6351426167065286e-05, "loss": 0.6674, "step": 128145 }, { "epoch": 1.42, "learning_rate": 2.635050343992677e-05, "loss": 0.5756, "step": 128150 }, { "epoch": 1.42, "learning_rate": 2.6349580712788262e-05, "loss": 0.6538, "step": 128155 }, { "epoch": 1.42, "learning_rate": 2.634865798564975e-05, "loss": 0.6565, "step": 128160 }, { "epoch": 1.42, "learning_rate": 2.6347735258511234e-05, "loss": 0.608, "step": 128165 }, { "epoch": 1.42, "learning_rate": 2.6346812531372722e-05, "loss": 0.5955, "step": 128170 }, { "epoch": 1.42, "learning_rate": 2.6345889804234213e-05, "loss": 0.6148, "step": 128175 }, { "epoch": 1.42, "learning_rate": 2.63449670770957e-05, "loss": 0.6231, "step": 128180 }, { "epoch": 1.42, "learning_rate": 2.6344044349957186e-05, "loss": 0.641, "step": 128185 }, { "epoch": 1.42, "learning_rate": 2.6343121622818674e-05, "loss": 0.6706, "step": 128190 }, { "epoch": 1.42, "learning_rate": 2.6342198895680165e-05, "loss": 0.6678, "step": 128195 }, { "epoch": 1.42, "learning_rate": 2.634127616854165e-05, "loss": 0.5762, "step": 128200 }, { "epoch": 1.42, "learning_rate": 2.6340353441403137e-05, "loss": 0.6063, "step": 128205 }, { "epoch": 1.42, "learning_rate": 2.6339430714264625e-05, "loss": 0.6767, "step": 128210 }, { "epoch": 1.42, "learning_rate": 2.633850798712611e-05, "loss": 0.6346, "step": 128215 }, { "epoch": 1.42, "learning_rate": 2.63375852599876e-05, "loss": 0.6376, "step": 128220 }, { "epoch": 1.42, "learning_rate": 2.633666253284909e-05, "loss": 0.6464, "step": 128225 }, { "epoch": 1.42, "learning_rate": 2.6335739805710573e-05, "loss": 0.6145, "step": 128230 }, { "epoch": 1.42, "learning_rate": 2.633481707857206e-05, "loss": 0.6881, "step": 128235 }, { "epoch": 1.42, "learning_rate": 2.6333894351433552e-05, "loss": 0.6883, "step": 128240 }, { "epoch": 1.42, "learning_rate": 2.633297162429504e-05, "loss": 0.6793, "step": 128245 }, { "epoch": 1.42, "learning_rate": 2.6332048897156524e-05, "loss": 0.6556, "step": 128250 }, { "epoch": 1.42, "learning_rate": 2.6331126170018012e-05, "loss": 0.6376, "step": 128255 }, { "epoch": 1.42, "learning_rate": 2.6330203442879503e-05, "loss": 0.6604, "step": 128260 }, { "epoch": 1.42, "learning_rate": 2.6329280715740988e-05, "loss": 0.62, "step": 128265 }, { "epoch": 1.42, "learning_rate": 2.6328357988602476e-05, "loss": 0.6996, "step": 128270 }, { "epoch": 1.42, "learning_rate": 2.632743526146396e-05, "loss": 0.6343, "step": 128275 }, { "epoch": 1.42, "learning_rate": 2.6326512534325448e-05, "loss": 0.7229, "step": 128280 }, { "epoch": 1.42, "learning_rate": 2.632558980718694e-05, "loss": 0.6085, "step": 128285 }, { "epoch": 1.42, "learning_rate": 2.6324667080048427e-05, "loss": 0.6709, "step": 128290 }, { "epoch": 1.42, "learning_rate": 2.632374435290991e-05, "loss": 0.6179, "step": 128295 }, { "epoch": 1.42, "learning_rate": 2.63228216257714e-05, "loss": 0.6622, "step": 128300 }, { "epoch": 1.42, "learning_rate": 2.632189889863289e-05, "loss": 0.6521, "step": 128305 }, { "epoch": 1.42, "learning_rate": 2.632097617149438e-05, "loss": 0.5923, "step": 128310 }, { "epoch": 1.42, "learning_rate": 2.6320053444355863e-05, "loss": 0.6292, "step": 128315 }, { "epoch": 1.42, "learning_rate": 2.631913071721735e-05, "loss": 0.5938, "step": 128320 }, { "epoch": 1.42, "learning_rate": 2.6318207990078842e-05, "loss": 0.6356, "step": 128325 }, { "epoch": 1.42, "learning_rate": 2.6317285262940327e-05, "loss": 0.6302, "step": 128330 }, { "epoch": 1.42, "learning_rate": 2.6316362535801814e-05, "loss": 0.6921, "step": 128335 }, { "epoch": 1.42, "learning_rate": 2.63154398086633e-05, "loss": 0.6897, "step": 128340 }, { "epoch": 1.42, "learning_rate": 2.6314517081524793e-05, "loss": 0.6338, "step": 128345 }, { "epoch": 1.42, "learning_rate": 2.6313594354386278e-05, "loss": 0.6833, "step": 128350 }, { "epoch": 1.42, "learning_rate": 2.6312671627247766e-05, "loss": 0.6517, "step": 128355 }, { "epoch": 1.42, "learning_rate": 2.631174890010925e-05, "loss": 0.6623, "step": 128360 }, { "epoch": 1.42, "learning_rate": 2.6310826172970738e-05, "loss": 0.6355, "step": 128365 }, { "epoch": 1.42, "learning_rate": 2.630990344583223e-05, "loss": 0.6575, "step": 128370 }, { "epoch": 1.42, "learning_rate": 2.6308980718693714e-05, "loss": 0.6387, "step": 128375 }, { "epoch": 1.42, "learning_rate": 2.63080579915552e-05, "loss": 0.6736, "step": 128380 }, { "epoch": 1.42, "learning_rate": 2.630713526441669e-05, "loss": 0.6118, "step": 128385 }, { "epoch": 1.42, "learning_rate": 2.630621253727818e-05, "loss": 0.6721, "step": 128390 }, { "epoch": 1.42, "learning_rate": 2.6305289810139665e-05, "loss": 0.6383, "step": 128395 }, { "epoch": 1.42, "learning_rate": 2.6304367083001153e-05, "loss": 0.566, "step": 128400 }, { "epoch": 1.42, "learning_rate": 2.6303444355862637e-05, "loss": 0.6584, "step": 128405 }, { "epoch": 1.42, "learning_rate": 2.6302521628724132e-05, "loss": 0.6378, "step": 128410 }, { "epoch": 1.42, "learning_rate": 2.6301598901585617e-05, "loss": 0.6786, "step": 128415 }, { "epoch": 1.42, "learning_rate": 2.6300676174447104e-05, "loss": 0.6403, "step": 128420 }, { "epoch": 1.42, "learning_rate": 2.629975344730859e-05, "loss": 0.7081, "step": 128425 }, { "epoch": 1.42, "learning_rate": 2.6298830720170077e-05, "loss": 0.6454, "step": 128430 }, { "epoch": 1.42, "learning_rate": 2.6297907993031568e-05, "loss": 0.687, "step": 128435 }, { "epoch": 1.42, "learning_rate": 2.6296985265893052e-05, "loss": 0.6597, "step": 128440 }, { "epoch": 1.42, "learning_rate": 2.629606253875454e-05, "loss": 0.6661, "step": 128445 }, { "epoch": 1.42, "learning_rate": 2.6295139811616025e-05, "loss": 0.6732, "step": 128450 }, { "epoch": 1.42, "learning_rate": 2.629421708447752e-05, "loss": 0.6148, "step": 128455 }, { "epoch": 1.42, "learning_rate": 2.6293294357339004e-05, "loss": 0.6227, "step": 128460 }, { "epoch": 1.42, "learning_rate": 2.629237163020049e-05, "loss": 0.6576, "step": 128465 }, { "epoch": 1.42, "learning_rate": 2.6291448903061976e-05, "loss": 0.668, "step": 128470 }, { "epoch": 1.42, "learning_rate": 2.6290526175923467e-05, "loss": 0.6789, "step": 128475 }, { "epoch": 1.42, "learning_rate": 2.6289603448784955e-05, "loss": 0.6795, "step": 128480 }, { "epoch": 1.42, "learning_rate": 2.6288680721646443e-05, "loss": 0.6414, "step": 128485 }, { "epoch": 1.42, "learning_rate": 2.6287757994507928e-05, "loss": 0.653, "step": 128490 }, { "epoch": 1.42, "learning_rate": 2.628683526736942e-05, "loss": 0.6976, "step": 128495 }, { "epoch": 1.42, "learning_rate": 2.6285912540230907e-05, "loss": 0.6589, "step": 128500 }, { "epoch": 1.42, "learning_rate": 2.628498981309239e-05, "loss": 0.6304, "step": 128505 }, { "epoch": 1.42, "learning_rate": 2.628406708595388e-05, "loss": 0.641, "step": 128510 }, { "epoch": 1.42, "learning_rate": 2.6283144358815363e-05, "loss": 0.6369, "step": 128515 }, { "epoch": 1.42, "learning_rate": 2.6282221631676858e-05, "loss": 0.7094, "step": 128520 }, { "epoch": 1.42, "learning_rate": 2.6281298904538342e-05, "loss": 0.655, "step": 128525 }, { "epoch": 1.42, "learning_rate": 2.628037617739983e-05, "loss": 0.7138, "step": 128530 }, { "epoch": 1.42, "learning_rate": 2.6279453450261315e-05, "loss": 0.6439, "step": 128535 }, { "epoch": 1.42, "learning_rate": 2.6278530723122806e-05, "loss": 0.6844, "step": 128540 }, { "epoch": 1.42, "learning_rate": 2.6277607995984294e-05, "loss": 0.6386, "step": 128545 }, { "epoch": 1.42, "learning_rate": 2.627668526884578e-05, "loss": 0.6857, "step": 128550 }, { "epoch": 1.42, "learning_rate": 2.6275762541707266e-05, "loss": 0.6364, "step": 128555 }, { "epoch": 1.42, "learning_rate": 2.6274839814568757e-05, "loss": 0.5955, "step": 128560 }, { "epoch": 1.42, "learning_rate": 2.6273917087430245e-05, "loss": 0.6788, "step": 128565 }, { "epoch": 1.42, "learning_rate": 2.627299436029173e-05, "loss": 0.7275, "step": 128570 }, { "epoch": 1.42, "learning_rate": 2.6272071633153218e-05, "loss": 0.6502, "step": 128575 }, { "epoch": 1.42, "learning_rate": 2.6271148906014702e-05, "loss": 0.6525, "step": 128580 }, { "epoch": 1.42, "learning_rate": 2.6270226178876197e-05, "loss": 0.6126, "step": 128585 }, { "epoch": 1.42, "learning_rate": 2.626930345173768e-05, "loss": 0.6184, "step": 128590 }, { "epoch": 1.42, "learning_rate": 2.626838072459917e-05, "loss": 0.6609, "step": 128595 }, { "epoch": 1.42, "learning_rate": 2.6267457997460653e-05, "loss": 0.6369, "step": 128600 }, { "epoch": 1.42, "learning_rate": 2.6266535270322145e-05, "loss": 0.6207, "step": 128605 }, { "epoch": 1.42, "learning_rate": 2.6265612543183633e-05, "loss": 0.6449, "step": 128610 }, { "epoch": 1.42, "learning_rate": 2.6264689816045117e-05, "loss": 0.6515, "step": 128615 }, { "epoch": 1.42, "learning_rate": 2.6263767088906605e-05, "loss": 0.6419, "step": 128620 }, { "epoch": 1.42, "learning_rate": 2.6262844361768096e-05, "loss": 0.6415, "step": 128625 }, { "epoch": 1.42, "learning_rate": 2.6261921634629584e-05, "loss": 0.6772, "step": 128630 }, { "epoch": 1.42, "learning_rate": 2.626099890749107e-05, "loss": 0.673, "step": 128635 }, { "epoch": 1.42, "learning_rate": 2.6260076180352556e-05, "loss": 0.6635, "step": 128640 }, { "epoch": 1.42, "learning_rate": 2.6259153453214047e-05, "loss": 0.6672, "step": 128645 }, { "epoch": 1.42, "learning_rate": 2.6258230726075532e-05, "loss": 0.651, "step": 128650 }, { "epoch": 1.42, "learning_rate": 2.625730799893702e-05, "loss": 0.662, "step": 128655 }, { "epoch": 1.42, "learning_rate": 2.6256385271798508e-05, "loss": 0.6783, "step": 128660 }, { "epoch": 1.42, "learning_rate": 2.6255462544659992e-05, "loss": 0.6825, "step": 128665 }, { "epoch": 1.42, "learning_rate": 2.6254539817521483e-05, "loss": 0.698, "step": 128670 }, { "epoch": 1.42, "learning_rate": 2.625361709038297e-05, "loss": 0.6274, "step": 128675 }, { "epoch": 1.42, "learning_rate": 2.6252694363244456e-05, "loss": 0.725, "step": 128680 }, { "epoch": 1.42, "learning_rate": 2.6251771636105943e-05, "loss": 0.6572, "step": 128685 }, { "epoch": 1.42, "learning_rate": 2.6250848908967435e-05, "loss": 0.6228, "step": 128690 }, { "epoch": 1.43, "learning_rate": 2.6249926181828923e-05, "loss": 0.6606, "step": 128695 }, { "epoch": 1.43, "learning_rate": 2.6249003454690407e-05, "loss": 0.6323, "step": 128700 }, { "epoch": 1.43, "learning_rate": 2.6248080727551895e-05, "loss": 0.6609, "step": 128705 }, { "epoch": 1.43, "learning_rate": 2.6247158000413386e-05, "loss": 0.6211, "step": 128710 }, { "epoch": 1.43, "learning_rate": 2.624623527327487e-05, "loss": 0.6399, "step": 128715 }, { "epoch": 1.43, "learning_rate": 2.624531254613636e-05, "loss": 0.6465, "step": 128720 }, { "epoch": 1.43, "learning_rate": 2.6244389818997843e-05, "loss": 0.5854, "step": 128725 }, { "epoch": 1.43, "learning_rate": 2.624346709185933e-05, "loss": 0.7066, "step": 128730 }, { "epoch": 1.43, "learning_rate": 2.6242544364720822e-05, "loss": 0.6221, "step": 128735 }, { "epoch": 1.43, "learning_rate": 2.624162163758231e-05, "loss": 0.6695, "step": 128740 }, { "epoch": 1.43, "learning_rate": 2.6240698910443794e-05, "loss": 0.639, "step": 128745 }, { "epoch": 1.43, "learning_rate": 2.6239776183305282e-05, "loss": 0.6559, "step": 128750 }, { "epoch": 1.43, "learning_rate": 2.6238853456166773e-05, "loss": 0.6234, "step": 128755 }, { "epoch": 1.43, "learning_rate": 2.6237930729028258e-05, "loss": 0.6014, "step": 128760 }, { "epoch": 1.43, "learning_rate": 2.6237008001889746e-05, "loss": 0.663, "step": 128765 }, { "epoch": 1.43, "learning_rate": 2.6236085274751234e-05, "loss": 0.6193, "step": 128770 }, { "epoch": 1.43, "learning_rate": 2.6235162547612725e-05, "loss": 0.6345, "step": 128775 }, { "epoch": 1.43, "learning_rate": 2.623423982047421e-05, "loss": 0.6212, "step": 128780 }, { "epoch": 1.43, "learning_rate": 2.6233317093335697e-05, "loss": 0.6086, "step": 128785 }, { "epoch": 1.43, "learning_rate": 2.623239436619718e-05, "loss": 0.606, "step": 128790 }, { "epoch": 1.43, "learning_rate": 2.6231471639058676e-05, "loss": 0.632, "step": 128795 }, { "epoch": 1.43, "learning_rate": 2.623054891192016e-05, "loss": 0.6557, "step": 128800 }, { "epoch": 1.43, "learning_rate": 2.622962618478165e-05, "loss": 0.7096, "step": 128805 }, { "epoch": 1.43, "learning_rate": 2.6228703457643133e-05, "loss": 0.6228, "step": 128810 }, { "epoch": 1.43, "learning_rate": 2.622778073050462e-05, "loss": 0.6923, "step": 128815 }, { "epoch": 1.43, "learning_rate": 2.6226858003366112e-05, "loss": 0.652, "step": 128820 }, { "epoch": 1.43, "learning_rate": 2.6225935276227596e-05, "loss": 0.6694, "step": 128825 }, { "epoch": 1.43, "learning_rate": 2.6225012549089084e-05, "loss": 0.5834, "step": 128830 }, { "epoch": 1.43, "learning_rate": 2.622408982195057e-05, "loss": 0.6078, "step": 128835 }, { "epoch": 1.43, "learning_rate": 2.6223167094812063e-05, "loss": 0.685, "step": 128840 }, { "epoch": 1.43, "learning_rate": 2.6222244367673548e-05, "loss": 0.6987, "step": 128845 }, { "epoch": 1.43, "learning_rate": 2.6221321640535036e-05, "loss": 0.6736, "step": 128850 }, { "epoch": 1.43, "learning_rate": 2.622039891339652e-05, "loss": 0.652, "step": 128855 }, { "epoch": 1.43, "learning_rate": 2.621947618625801e-05, "loss": 0.6317, "step": 128860 }, { "epoch": 1.43, "learning_rate": 2.62185534591195e-05, "loss": 0.6811, "step": 128865 }, { "epoch": 1.43, "learning_rate": 2.6217630731980987e-05, "loss": 0.5877, "step": 128870 }, { "epoch": 1.43, "learning_rate": 2.621670800484247e-05, "loss": 0.6337, "step": 128875 }, { "epoch": 1.43, "learning_rate": 2.621578527770396e-05, "loss": 0.6568, "step": 128880 }, { "epoch": 1.43, "learning_rate": 2.621486255056545e-05, "loss": 0.6493, "step": 128885 }, { "epoch": 1.43, "learning_rate": 2.6213939823426935e-05, "loss": 0.6018, "step": 128890 }, { "epoch": 1.43, "learning_rate": 2.6213017096288423e-05, "loss": 0.6074, "step": 128895 }, { "epoch": 1.43, "learning_rate": 2.6212094369149907e-05, "loss": 0.6578, "step": 128900 }, { "epoch": 1.43, "learning_rate": 2.6211171642011402e-05, "loss": 0.6742, "step": 128905 }, { "epoch": 1.43, "learning_rate": 2.6210248914872886e-05, "loss": 0.6967, "step": 128910 }, { "epoch": 1.43, "learning_rate": 2.6209326187734374e-05, "loss": 0.559, "step": 128915 }, { "epoch": 1.43, "learning_rate": 2.620840346059586e-05, "loss": 0.6763, "step": 128920 }, { "epoch": 1.43, "learning_rate": 2.620748073345735e-05, "loss": 0.619, "step": 128925 }, { "epoch": 1.43, "learning_rate": 2.6206558006318838e-05, "loss": 0.6857, "step": 128930 }, { "epoch": 1.43, "learning_rate": 2.6205635279180322e-05, "loss": 0.6099, "step": 128935 }, { "epoch": 1.43, "learning_rate": 2.620471255204181e-05, "loss": 0.6391, "step": 128940 }, { "epoch": 1.43, "learning_rate": 2.62037898249033e-05, "loss": 0.6769, "step": 128945 }, { "epoch": 1.43, "learning_rate": 2.620286709776479e-05, "loss": 0.6669, "step": 128950 }, { "epoch": 1.43, "learning_rate": 2.6201944370626274e-05, "loss": 0.6712, "step": 128955 }, { "epoch": 1.43, "learning_rate": 2.620102164348776e-05, "loss": 0.6111, "step": 128960 }, { "epoch": 1.43, "learning_rate": 2.6200098916349246e-05, "loss": 0.6573, "step": 128965 }, { "epoch": 1.43, "learning_rate": 2.619917618921074e-05, "loss": 0.6395, "step": 128970 }, { "epoch": 1.43, "learning_rate": 2.6198253462072225e-05, "loss": 0.6234, "step": 128975 }, { "epoch": 1.43, "learning_rate": 2.6197330734933713e-05, "loss": 0.7196, "step": 128980 }, { "epoch": 1.43, "learning_rate": 2.6196408007795197e-05, "loss": 0.6217, "step": 128985 }, { "epoch": 1.43, "learning_rate": 2.619548528065669e-05, "loss": 0.6363, "step": 128990 }, { "epoch": 1.43, "learning_rate": 2.6194562553518177e-05, "loss": 0.6421, "step": 128995 }, { "epoch": 1.43, "learning_rate": 2.619363982637966e-05, "loss": 0.651, "step": 129000 }, { "epoch": 1.43, "eval_loss": 0.6222099661827087, "eval_runtime": 69.6151, "eval_samples_per_second": 28.729, "eval_steps_per_second": 14.365, "step": 129000 }, { "epoch": 1.43, "learning_rate": 2.619271709924115e-05, "loss": 0.6264, "step": 129005 }, { "epoch": 1.43, "learning_rate": 2.619179437210264e-05, "loss": 0.6153, "step": 129010 }, { "epoch": 1.43, "learning_rate": 2.6190871644964128e-05, "loss": 0.6553, "step": 129015 }, { "epoch": 1.43, "learning_rate": 2.6189948917825612e-05, "loss": 0.6741, "step": 129020 }, { "epoch": 1.43, "learning_rate": 2.61890261906871e-05, "loss": 0.5572, "step": 129025 }, { "epoch": 1.43, "learning_rate": 2.618810346354859e-05, "loss": 0.6855, "step": 129030 }, { "epoch": 1.43, "learning_rate": 2.6187180736410076e-05, "loss": 0.5804, "step": 129035 }, { "epoch": 1.43, "learning_rate": 2.6186258009271564e-05, "loss": 0.6234, "step": 129040 }, { "epoch": 1.43, "learning_rate": 2.618533528213305e-05, "loss": 0.6717, "step": 129045 }, { "epoch": 1.43, "learning_rate": 2.6184412554994536e-05, "loss": 0.6723, "step": 129050 }, { "epoch": 1.43, "learning_rate": 2.6183489827856027e-05, "loss": 0.6458, "step": 129055 }, { "epoch": 1.43, "learning_rate": 2.6182567100717515e-05, "loss": 0.6971, "step": 129060 }, { "epoch": 1.43, "learning_rate": 2.6181644373579e-05, "loss": 0.6715, "step": 129065 }, { "epoch": 1.43, "learning_rate": 2.6180721646440487e-05, "loss": 0.6613, "step": 129070 }, { "epoch": 1.43, "learning_rate": 2.617979891930198e-05, "loss": 0.5842, "step": 129075 }, { "epoch": 1.43, "learning_rate": 2.6178876192163467e-05, "loss": 0.7074, "step": 129080 }, { "epoch": 1.43, "learning_rate": 2.617795346502495e-05, "loss": 0.6768, "step": 129085 }, { "epoch": 1.43, "learning_rate": 2.617703073788644e-05, "loss": 0.646, "step": 129090 }, { "epoch": 1.43, "learning_rate": 2.617610801074793e-05, "loss": 0.6363, "step": 129095 }, { "epoch": 1.43, "learning_rate": 2.6175185283609415e-05, "loss": 0.6799, "step": 129100 }, { "epoch": 1.43, "learning_rate": 2.6174262556470902e-05, "loss": 0.6288, "step": 129105 }, { "epoch": 1.43, "learning_rate": 2.6173339829332387e-05, "loss": 0.6968, "step": 129110 }, { "epoch": 1.43, "learning_rate": 2.6172417102193875e-05, "loss": 0.6604, "step": 129115 }, { "epoch": 1.43, "learning_rate": 2.6171494375055366e-05, "loss": 0.6328, "step": 129120 }, { "epoch": 1.43, "learning_rate": 2.6170571647916854e-05, "loss": 0.6066, "step": 129125 }, { "epoch": 1.43, "learning_rate": 2.6169648920778338e-05, "loss": 0.6197, "step": 129130 }, { "epoch": 1.43, "learning_rate": 2.6168726193639826e-05, "loss": 0.6534, "step": 129135 }, { "epoch": 1.43, "learning_rate": 2.6167803466501317e-05, "loss": 0.6739, "step": 129140 }, { "epoch": 1.43, "learning_rate": 2.6166880739362802e-05, "loss": 0.6828, "step": 129145 }, { "epoch": 1.43, "learning_rate": 2.616595801222429e-05, "loss": 0.6713, "step": 129150 }, { "epoch": 1.43, "learning_rate": 2.6165035285085778e-05, "loss": 0.6848, "step": 129155 }, { "epoch": 1.43, "learning_rate": 2.616411255794727e-05, "loss": 0.614, "step": 129160 }, { "epoch": 1.43, "learning_rate": 2.6163189830808753e-05, "loss": 0.6323, "step": 129165 }, { "epoch": 1.43, "learning_rate": 2.616226710367024e-05, "loss": 0.6679, "step": 129170 }, { "epoch": 1.43, "learning_rate": 2.6161344376531726e-05, "loss": 0.6998, "step": 129175 }, { "epoch": 1.43, "learning_rate": 2.616042164939322e-05, "loss": 0.5452, "step": 129180 }, { "epoch": 1.43, "learning_rate": 2.6159498922254705e-05, "loss": 0.6422, "step": 129185 }, { "epoch": 1.43, "learning_rate": 2.6158576195116192e-05, "loss": 0.6477, "step": 129190 }, { "epoch": 1.43, "learning_rate": 2.6157653467977677e-05, "loss": 0.6314, "step": 129195 }, { "epoch": 1.43, "learning_rate": 2.6156730740839165e-05, "loss": 0.6547, "step": 129200 }, { "epoch": 1.43, "learning_rate": 2.6155808013700656e-05, "loss": 0.6328, "step": 129205 }, { "epoch": 1.43, "learning_rate": 2.615488528656214e-05, "loss": 0.6468, "step": 129210 }, { "epoch": 1.43, "learning_rate": 2.615396255942363e-05, "loss": 0.6318, "step": 129215 }, { "epoch": 1.43, "learning_rate": 2.6153039832285113e-05, "loss": 0.6305, "step": 129220 }, { "epoch": 1.43, "learning_rate": 2.6152117105146607e-05, "loss": 0.599, "step": 129225 }, { "epoch": 1.43, "learning_rate": 2.6151194378008092e-05, "loss": 0.7059, "step": 129230 }, { "epoch": 1.43, "learning_rate": 2.615027165086958e-05, "loss": 0.6556, "step": 129235 }, { "epoch": 1.43, "learning_rate": 2.6149348923731064e-05, "loss": 0.6373, "step": 129240 }, { "epoch": 1.43, "learning_rate": 2.6148426196592555e-05, "loss": 0.6567, "step": 129245 }, { "epoch": 1.43, "learning_rate": 2.6147503469454043e-05, "loss": 0.6412, "step": 129250 }, { "epoch": 1.43, "learning_rate": 2.614658074231553e-05, "loss": 0.6246, "step": 129255 }, { "epoch": 1.43, "learning_rate": 2.6145658015177016e-05, "loss": 0.6707, "step": 129260 }, { "epoch": 1.43, "learning_rate": 2.6144735288038503e-05, "loss": 0.6894, "step": 129265 }, { "epoch": 1.43, "learning_rate": 2.6143812560899995e-05, "loss": 0.615, "step": 129270 }, { "epoch": 1.43, "learning_rate": 2.614288983376148e-05, "loss": 0.6553, "step": 129275 }, { "epoch": 1.43, "learning_rate": 2.6141967106622967e-05, "loss": 0.6385, "step": 129280 }, { "epoch": 1.43, "learning_rate": 2.614104437948445e-05, "loss": 0.6315, "step": 129285 }, { "epoch": 1.43, "learning_rate": 2.6140121652345946e-05, "loss": 0.6515, "step": 129290 }, { "epoch": 1.43, "learning_rate": 2.613919892520743e-05, "loss": 0.6827, "step": 129295 }, { "epoch": 1.43, "learning_rate": 2.613827619806892e-05, "loss": 0.6512, "step": 129300 }, { "epoch": 1.43, "learning_rate": 2.6137353470930403e-05, "loss": 0.6646, "step": 129305 }, { "epoch": 1.43, "learning_rate": 2.6136430743791894e-05, "loss": 0.6418, "step": 129310 }, { "epoch": 1.43, "learning_rate": 2.6135508016653382e-05, "loss": 0.6111, "step": 129315 }, { "epoch": 1.43, "learning_rate": 2.6134585289514866e-05, "loss": 0.6374, "step": 129320 }, { "epoch": 1.43, "learning_rate": 2.6133662562376354e-05, "loss": 0.6328, "step": 129325 }, { "epoch": 1.43, "learning_rate": 2.6132739835237845e-05, "loss": 0.619, "step": 129330 }, { "epoch": 1.43, "learning_rate": 2.6131817108099333e-05, "loss": 0.658, "step": 129335 }, { "epoch": 1.43, "learning_rate": 2.6130894380960818e-05, "loss": 0.5962, "step": 129340 }, { "epoch": 1.43, "learning_rate": 2.6129971653822306e-05, "loss": 0.6564, "step": 129345 }, { "epoch": 1.43, "learning_rate": 2.612904892668379e-05, "loss": 0.6867, "step": 129350 }, { "epoch": 1.43, "learning_rate": 2.6128126199545285e-05, "loss": 0.643, "step": 129355 }, { "epoch": 1.43, "learning_rate": 2.612720347240677e-05, "loss": 0.6734, "step": 129360 }, { "epoch": 1.43, "learning_rate": 2.6126280745268257e-05, "loss": 0.6823, "step": 129365 }, { "epoch": 1.43, "learning_rate": 2.612535801812974e-05, "loss": 0.7117, "step": 129370 }, { "epoch": 1.43, "learning_rate": 2.6124435290991233e-05, "loss": 0.6834, "step": 129375 }, { "epoch": 1.43, "learning_rate": 2.612351256385272e-05, "loss": 0.6574, "step": 129380 }, { "epoch": 1.43, "learning_rate": 2.6122589836714205e-05, "loss": 0.6081, "step": 129385 }, { "epoch": 1.43, "learning_rate": 2.6121667109575693e-05, "loss": 0.6555, "step": 129390 }, { "epoch": 1.43, "learning_rate": 2.6120744382437184e-05, "loss": 0.6151, "step": 129395 }, { "epoch": 1.43, "learning_rate": 2.6119821655298672e-05, "loss": 0.6981, "step": 129400 }, { "epoch": 1.43, "learning_rate": 2.6118898928160156e-05, "loss": 0.6783, "step": 129405 }, { "epoch": 1.43, "learning_rate": 2.6117976201021644e-05, "loss": 0.6502, "step": 129410 }, { "epoch": 1.43, "learning_rate": 2.611705347388313e-05, "loss": 0.7555, "step": 129415 }, { "epoch": 1.43, "learning_rate": 2.611613074674462e-05, "loss": 0.6793, "step": 129420 }, { "epoch": 1.43, "learning_rate": 2.6115208019606108e-05, "loss": 0.6116, "step": 129425 }, { "epoch": 1.43, "learning_rate": 2.6114285292467596e-05, "loss": 0.6511, "step": 129430 }, { "epoch": 1.43, "learning_rate": 2.611336256532908e-05, "loss": 0.6361, "step": 129435 }, { "epoch": 1.43, "learning_rate": 2.611243983819057e-05, "loss": 0.6625, "step": 129440 }, { "epoch": 1.43, "learning_rate": 2.611151711105206e-05, "loss": 0.6982, "step": 129445 }, { "epoch": 1.43, "learning_rate": 2.6110594383913544e-05, "loss": 0.6385, "step": 129450 }, { "epoch": 1.43, "learning_rate": 2.610967165677503e-05, "loss": 0.6984, "step": 129455 }, { "epoch": 1.43, "learning_rate": 2.6108748929636523e-05, "loss": 0.6497, "step": 129460 }, { "epoch": 1.43, "learning_rate": 2.610782620249801e-05, "loss": 0.6085, "step": 129465 }, { "epoch": 1.43, "learning_rate": 2.6106903475359495e-05, "loss": 0.6641, "step": 129470 }, { "epoch": 1.43, "learning_rate": 2.6105980748220983e-05, "loss": 0.6347, "step": 129475 }, { "epoch": 1.43, "learning_rate": 2.6105058021082474e-05, "loss": 0.603, "step": 129480 }, { "epoch": 1.43, "learning_rate": 2.610413529394396e-05, "loss": 0.6571, "step": 129485 }, { "epoch": 1.43, "learning_rate": 2.6103212566805446e-05, "loss": 0.6416, "step": 129490 }, { "epoch": 1.43, "learning_rate": 2.610228983966693e-05, "loss": 0.6242, "step": 129495 }, { "epoch": 1.43, "learning_rate": 2.610136711252842e-05, "loss": 0.6425, "step": 129500 }, { "epoch": 1.43, "learning_rate": 2.610044438538991e-05, "loss": 0.642, "step": 129505 }, { "epoch": 1.43, "learning_rate": 2.6099521658251398e-05, "loss": 0.6614, "step": 129510 }, { "epoch": 1.43, "learning_rate": 2.6098598931112882e-05, "loss": 0.5963, "step": 129515 }, { "epoch": 1.43, "learning_rate": 2.609767620397437e-05, "loss": 0.6613, "step": 129520 }, { "epoch": 1.43, "learning_rate": 2.609675347683586e-05, "loss": 0.6562, "step": 129525 }, { "epoch": 1.43, "learning_rate": 2.6095830749697346e-05, "loss": 0.6234, "step": 129530 }, { "epoch": 1.43, "learning_rate": 2.6094908022558834e-05, "loss": 0.6611, "step": 129535 }, { "epoch": 1.43, "learning_rate": 2.609398529542032e-05, "loss": 0.6822, "step": 129540 }, { "epoch": 1.43, "learning_rate": 2.6093062568281813e-05, "loss": 0.6772, "step": 129545 }, { "epoch": 1.43, "learning_rate": 2.6092139841143297e-05, "loss": 0.6517, "step": 129550 }, { "epoch": 1.43, "learning_rate": 2.6091217114004785e-05, "loss": 0.7052, "step": 129555 }, { "epoch": 1.43, "learning_rate": 2.609029438686627e-05, "loss": 0.699, "step": 129560 }, { "epoch": 1.43, "learning_rate": 2.6089371659727757e-05, "loss": 0.6629, "step": 129565 }, { "epoch": 1.43, "learning_rate": 2.608844893258925e-05, "loss": 0.6563, "step": 129570 }, { "epoch": 1.43, "learning_rate": 2.6087526205450736e-05, "loss": 0.6748, "step": 129575 }, { "epoch": 1.43, "learning_rate": 2.608660347831222e-05, "loss": 0.6209, "step": 129580 }, { "epoch": 1.43, "learning_rate": 2.608568075117371e-05, "loss": 0.6528, "step": 129585 }, { "epoch": 1.43, "learning_rate": 2.60847580240352e-05, "loss": 0.666, "step": 129590 }, { "epoch": 1.43, "learning_rate": 2.6083835296896684e-05, "loss": 0.6029, "step": 129595 }, { "epoch": 1.44, "learning_rate": 2.6082912569758172e-05, "loss": 0.6297, "step": 129600 }, { "epoch": 1.44, "learning_rate": 2.608198984261966e-05, "loss": 0.6229, "step": 129605 }, { "epoch": 1.44, "learning_rate": 2.608106711548115e-05, "loss": 0.629, "step": 129610 }, { "epoch": 1.44, "learning_rate": 2.6080144388342636e-05, "loss": 0.6687, "step": 129615 }, { "epoch": 1.44, "learning_rate": 2.6079221661204124e-05, "loss": 0.6544, "step": 129620 }, { "epoch": 1.44, "learning_rate": 2.6078298934065608e-05, "loss": 0.6388, "step": 129625 }, { "epoch": 1.44, "learning_rate": 2.60773762069271e-05, "loss": 0.6245, "step": 129630 }, { "epoch": 1.44, "learning_rate": 2.6076453479788587e-05, "loss": 0.6568, "step": 129635 }, { "epoch": 1.44, "learning_rate": 2.6075530752650075e-05, "loss": 0.6586, "step": 129640 }, { "epoch": 1.44, "learning_rate": 2.607460802551156e-05, "loss": 0.6288, "step": 129645 }, { "epoch": 1.44, "learning_rate": 2.6073685298373047e-05, "loss": 0.6438, "step": 129650 }, { "epoch": 1.44, "learning_rate": 2.607276257123454e-05, "loss": 0.6836, "step": 129655 }, { "epoch": 1.44, "learning_rate": 2.6071839844096023e-05, "loss": 0.6905, "step": 129660 }, { "epoch": 1.44, "learning_rate": 2.607091711695751e-05, "loss": 0.6363, "step": 129665 }, { "epoch": 1.44, "learning_rate": 2.6069994389818995e-05, "loss": 0.6032, "step": 129670 }, { "epoch": 1.44, "learning_rate": 2.606907166268049e-05, "loss": 0.6321, "step": 129675 }, { "epoch": 1.44, "learning_rate": 2.6068148935541975e-05, "loss": 0.6207, "step": 129680 }, { "epoch": 1.44, "learning_rate": 2.6067226208403462e-05, "loss": 0.6371, "step": 129685 }, { "epoch": 1.44, "learning_rate": 2.6066303481264947e-05, "loss": 0.6359, "step": 129690 }, { "epoch": 1.44, "learning_rate": 2.6065380754126438e-05, "loss": 0.6203, "step": 129695 }, { "epoch": 1.44, "learning_rate": 2.6064458026987926e-05, "loss": 0.6214, "step": 129700 }, { "epoch": 1.44, "learning_rate": 2.606353529984941e-05, "loss": 0.6127, "step": 129705 }, { "epoch": 1.44, "learning_rate": 2.6062612572710898e-05, "loss": 0.5932, "step": 129710 }, { "epoch": 1.44, "learning_rate": 2.6061689845572386e-05, "loss": 0.6407, "step": 129715 }, { "epoch": 1.44, "learning_rate": 2.6060767118433877e-05, "loss": 0.6185, "step": 129720 }, { "epoch": 1.44, "learning_rate": 2.6059844391295362e-05, "loss": 0.6545, "step": 129725 }, { "epoch": 1.44, "learning_rate": 2.605892166415685e-05, "loss": 0.6247, "step": 129730 }, { "epoch": 1.44, "learning_rate": 2.6057998937018334e-05, "loss": 0.6725, "step": 129735 }, { "epoch": 1.44, "learning_rate": 2.605707620987983e-05, "loss": 0.6086, "step": 129740 }, { "epoch": 1.44, "learning_rate": 2.6056153482741313e-05, "loss": 0.6303, "step": 129745 }, { "epoch": 1.44, "learning_rate": 2.60552307556028e-05, "loss": 0.6598, "step": 129750 }, { "epoch": 1.44, "learning_rate": 2.6054308028464285e-05, "loss": 0.6424, "step": 129755 }, { "epoch": 1.44, "learning_rate": 2.6053385301325777e-05, "loss": 0.6357, "step": 129760 }, { "epoch": 1.44, "learning_rate": 2.6052462574187265e-05, "loss": 0.6554, "step": 129765 }, { "epoch": 1.44, "learning_rate": 2.605153984704875e-05, "loss": 0.6317, "step": 129770 }, { "epoch": 1.44, "learning_rate": 2.6050617119910237e-05, "loss": 0.6454, "step": 129775 }, { "epoch": 1.44, "learning_rate": 2.6049694392771728e-05, "loss": 0.6396, "step": 129780 }, { "epoch": 1.44, "learning_rate": 2.6048771665633216e-05, "loss": 0.6341, "step": 129785 }, { "epoch": 1.44, "learning_rate": 2.60478489384947e-05, "loss": 0.6267, "step": 129790 }, { "epoch": 1.44, "learning_rate": 2.6046926211356188e-05, "loss": 0.6569, "step": 129795 }, { "epoch": 1.44, "learning_rate": 2.6046003484217673e-05, "loss": 0.6838, "step": 129800 }, { "epoch": 1.44, "learning_rate": 2.6045080757079164e-05, "loss": 0.6137, "step": 129805 }, { "epoch": 1.44, "learning_rate": 2.6044158029940652e-05, "loss": 0.6876, "step": 129810 }, { "epoch": 1.44, "learning_rate": 2.604323530280214e-05, "loss": 0.6611, "step": 129815 }, { "epoch": 1.44, "learning_rate": 2.6042312575663624e-05, "loss": 0.6369, "step": 129820 }, { "epoch": 1.44, "learning_rate": 2.6041389848525115e-05, "loss": 0.6685, "step": 129825 }, { "epoch": 1.44, "learning_rate": 2.6040467121386603e-05, "loss": 0.677, "step": 129830 }, { "epoch": 1.44, "learning_rate": 2.6039544394248088e-05, "loss": 0.666, "step": 129835 }, { "epoch": 1.44, "learning_rate": 2.6038621667109576e-05, "loss": 0.6656, "step": 129840 }, { "epoch": 1.44, "learning_rate": 2.6037698939971067e-05, "loss": 0.6268, "step": 129845 }, { "epoch": 1.44, "learning_rate": 2.6036776212832555e-05, "loss": 0.6279, "step": 129850 }, { "epoch": 1.44, "learning_rate": 2.603585348569404e-05, "loss": 0.6448, "step": 129855 }, { "epoch": 1.44, "learning_rate": 2.6034930758555527e-05, "loss": 0.6127, "step": 129860 }, { "epoch": 1.44, "learning_rate": 2.6034008031417018e-05, "loss": 0.636, "step": 129865 }, { "epoch": 1.44, "learning_rate": 2.6033085304278503e-05, "loss": 0.6351, "step": 129870 }, { "epoch": 1.44, "learning_rate": 2.603216257713999e-05, "loss": 0.6388, "step": 129875 }, { "epoch": 1.44, "learning_rate": 2.6031239850001475e-05, "loss": 0.6537, "step": 129880 }, { "epoch": 1.44, "learning_rate": 2.6030317122862963e-05, "loss": 0.6127, "step": 129885 }, { "epoch": 1.44, "learning_rate": 2.6029394395724454e-05, "loss": 0.6961, "step": 129890 }, { "epoch": 1.44, "learning_rate": 2.6028471668585942e-05, "loss": 0.6496, "step": 129895 }, { "epoch": 1.44, "learning_rate": 2.6027548941447426e-05, "loss": 0.6623, "step": 129900 }, { "epoch": 1.44, "learning_rate": 2.6026626214308914e-05, "loss": 0.6422, "step": 129905 }, { "epoch": 1.44, "learning_rate": 2.6025703487170405e-05, "loss": 0.6584, "step": 129910 }, { "epoch": 1.44, "learning_rate": 2.6024780760031893e-05, "loss": 0.7142, "step": 129915 }, { "epoch": 1.44, "learning_rate": 2.6023858032893378e-05, "loss": 0.6385, "step": 129920 }, { "epoch": 1.44, "learning_rate": 2.6022935305754866e-05, "loss": 0.6122, "step": 129925 }, { "epoch": 1.44, "learning_rate": 2.6022012578616357e-05, "loss": 0.6151, "step": 129930 }, { "epoch": 1.44, "learning_rate": 2.602108985147784e-05, "loss": 0.644, "step": 129935 }, { "epoch": 1.44, "learning_rate": 2.602016712433933e-05, "loss": 0.6803, "step": 129940 }, { "epoch": 1.44, "learning_rate": 2.6019244397200814e-05, "loss": 0.6215, "step": 129945 }, { "epoch": 1.44, "learning_rate": 2.60183216700623e-05, "loss": 0.6508, "step": 129950 }, { "epoch": 1.44, "learning_rate": 2.6017398942923793e-05, "loss": 0.618, "step": 129955 }, { "epoch": 1.44, "learning_rate": 2.601647621578528e-05, "loss": 0.6343, "step": 129960 }, { "epoch": 1.44, "learning_rate": 2.6015553488646765e-05, "loss": 0.6283, "step": 129965 }, { "epoch": 1.44, "learning_rate": 2.6014630761508253e-05, "loss": 0.6438, "step": 129970 }, { "epoch": 1.44, "learning_rate": 2.6013708034369744e-05, "loss": 0.6856, "step": 129975 }, { "epoch": 1.44, "learning_rate": 2.601278530723123e-05, "loss": 0.6298, "step": 129980 }, { "epoch": 1.44, "learning_rate": 2.6011862580092716e-05, "loss": 0.6127, "step": 129985 }, { "epoch": 1.44, "learning_rate": 2.6010939852954204e-05, "loss": 0.6128, "step": 129990 }, { "epoch": 1.44, "learning_rate": 2.6010017125815695e-05, "loss": 0.6533, "step": 129995 }, { "epoch": 1.44, "learning_rate": 2.600909439867718e-05, "loss": 0.5558, "step": 130000 }, { "epoch": 1.44, "eval_loss": 0.6231483817100525, "eval_runtime": 69.6222, "eval_samples_per_second": 28.726, "eval_steps_per_second": 14.363, "step": 130000 }, { "epoch": 1.44, "learning_rate": 2.6008171671538668e-05, "loss": 0.6772, "step": 130005 }, { "epoch": 1.44, "learning_rate": 2.6007248944400152e-05, "loss": 0.6522, "step": 130010 }, { "epoch": 1.44, "learning_rate": 2.6006326217261643e-05, "loss": 0.5927, "step": 130015 }, { "epoch": 1.44, "learning_rate": 2.600540349012313e-05, "loss": 0.6395, "step": 130020 }, { "epoch": 1.44, "learning_rate": 2.600448076298462e-05, "loss": 0.6549, "step": 130025 }, { "epoch": 1.44, "learning_rate": 2.6003558035846104e-05, "loss": 0.6193, "step": 130030 }, { "epoch": 1.44, "learning_rate": 2.600263530870759e-05, "loss": 0.6854, "step": 130035 }, { "epoch": 1.44, "learning_rate": 2.6001712581569083e-05, "loss": 0.7071, "step": 130040 }, { "epoch": 1.44, "learning_rate": 2.6000789854430567e-05, "loss": 0.6969, "step": 130045 }, { "epoch": 1.44, "learning_rate": 2.5999867127292055e-05, "loss": 0.6351, "step": 130050 }, { "epoch": 1.44, "learning_rate": 2.599894440015354e-05, "loss": 0.6967, "step": 130055 }, { "epoch": 1.44, "learning_rate": 2.5998021673015034e-05, "loss": 0.6228, "step": 130060 }, { "epoch": 1.44, "learning_rate": 2.599709894587652e-05, "loss": 0.6367, "step": 130065 }, { "epoch": 1.44, "learning_rate": 2.5996176218738006e-05, "loss": 0.6548, "step": 130070 }, { "epoch": 1.44, "learning_rate": 2.599525349159949e-05, "loss": 0.6916, "step": 130075 }, { "epoch": 1.44, "learning_rate": 2.5994330764460982e-05, "loss": 0.6948, "step": 130080 }, { "epoch": 1.44, "learning_rate": 2.599340803732247e-05, "loss": 0.5952, "step": 130085 }, { "epoch": 1.44, "learning_rate": 2.5992485310183954e-05, "loss": 0.6261, "step": 130090 }, { "epoch": 1.44, "learning_rate": 2.5991562583045442e-05, "loss": 0.6641, "step": 130095 }, { "epoch": 1.44, "learning_rate": 2.599063985590693e-05, "loss": 0.6409, "step": 130100 }, { "epoch": 1.44, "learning_rate": 2.598971712876842e-05, "loss": 0.6469, "step": 130105 }, { "epoch": 1.44, "learning_rate": 2.5988794401629906e-05, "loss": 0.6913, "step": 130110 }, { "epoch": 1.44, "learning_rate": 2.5987871674491394e-05, "loss": 0.6461, "step": 130115 }, { "epoch": 1.44, "learning_rate": 2.5986948947352878e-05, "loss": 0.5941, "step": 130120 }, { "epoch": 1.44, "learning_rate": 2.5986026220214373e-05, "loss": 0.6309, "step": 130125 }, { "epoch": 1.44, "learning_rate": 2.5985103493075857e-05, "loss": 0.6662, "step": 130130 }, { "epoch": 1.44, "learning_rate": 2.5984180765937345e-05, "loss": 0.6125, "step": 130135 }, { "epoch": 1.44, "learning_rate": 2.598325803879883e-05, "loss": 0.6174, "step": 130140 }, { "epoch": 1.44, "learning_rate": 2.598233531166032e-05, "loss": 0.645, "step": 130145 }, { "epoch": 1.44, "learning_rate": 2.598141258452181e-05, "loss": 0.6097, "step": 130150 }, { "epoch": 1.44, "learning_rate": 2.5980489857383293e-05, "loss": 0.6985, "step": 130155 }, { "epoch": 1.44, "learning_rate": 2.597956713024478e-05, "loss": 0.6662, "step": 130160 }, { "epoch": 1.44, "learning_rate": 2.5978644403106272e-05, "loss": 0.7141, "step": 130165 }, { "epoch": 1.44, "learning_rate": 2.597772167596776e-05, "loss": 0.6833, "step": 130170 }, { "epoch": 1.44, "learning_rate": 2.5976798948829244e-05, "loss": 0.6831, "step": 130175 }, { "epoch": 1.44, "learning_rate": 2.5975876221690732e-05, "loss": 0.6696, "step": 130180 }, { "epoch": 1.44, "learning_rate": 2.5974953494552217e-05, "loss": 0.7075, "step": 130185 }, { "epoch": 1.44, "learning_rate": 2.5974030767413708e-05, "loss": 0.6424, "step": 130190 }, { "epoch": 1.44, "learning_rate": 2.5973108040275196e-05, "loss": 0.7418, "step": 130195 }, { "epoch": 1.44, "learning_rate": 2.5972185313136684e-05, "loss": 0.7001, "step": 130200 }, { "epoch": 1.44, "learning_rate": 2.5971262585998168e-05, "loss": 0.6831, "step": 130205 }, { "epoch": 1.44, "learning_rate": 2.597033985885966e-05, "loss": 0.6463, "step": 130210 }, { "epoch": 1.44, "learning_rate": 2.5969417131721147e-05, "loss": 0.6254, "step": 130215 }, { "epoch": 1.44, "learning_rate": 2.5968494404582632e-05, "loss": 0.6661, "step": 130220 }, { "epoch": 1.44, "learning_rate": 2.596757167744412e-05, "loss": 0.7049, "step": 130225 }, { "epoch": 1.44, "learning_rate": 2.596664895030561e-05, "loss": 0.6071, "step": 130230 }, { "epoch": 1.44, "learning_rate": 2.59657262231671e-05, "loss": 0.593, "step": 130235 }, { "epoch": 1.44, "learning_rate": 2.5964803496028583e-05, "loss": 0.6638, "step": 130240 }, { "epoch": 1.44, "learning_rate": 2.596388076889007e-05, "loss": 0.7101, "step": 130245 }, { "epoch": 1.44, "learning_rate": 2.5962958041751555e-05, "loss": 0.626, "step": 130250 }, { "epoch": 1.44, "learning_rate": 2.5962035314613047e-05, "loss": 0.6998, "step": 130255 }, { "epoch": 1.44, "learning_rate": 2.5961112587474534e-05, "loss": 0.6789, "step": 130260 }, { "epoch": 1.44, "learning_rate": 2.596018986033602e-05, "loss": 0.7222, "step": 130265 }, { "epoch": 1.44, "learning_rate": 2.5959267133197507e-05, "loss": 0.6498, "step": 130270 }, { "epoch": 1.44, "learning_rate": 2.5958344406058998e-05, "loss": 0.6816, "step": 130275 }, { "epoch": 1.44, "learning_rate": 2.5957421678920486e-05, "loss": 0.6734, "step": 130280 }, { "epoch": 1.44, "learning_rate": 2.595649895178197e-05, "loss": 0.6312, "step": 130285 }, { "epoch": 1.44, "learning_rate": 2.5955576224643458e-05, "loss": 0.6351, "step": 130290 }, { "epoch": 1.44, "learning_rate": 2.595465349750495e-05, "loss": 0.6775, "step": 130295 }, { "epoch": 1.44, "learning_rate": 2.5953730770366437e-05, "loss": 0.6622, "step": 130300 }, { "epoch": 1.44, "learning_rate": 2.5952808043227922e-05, "loss": 0.6911, "step": 130305 }, { "epoch": 1.44, "learning_rate": 2.595188531608941e-05, "loss": 0.6569, "step": 130310 }, { "epoch": 1.44, "learning_rate": 2.59509625889509e-05, "loss": 0.6488, "step": 130315 }, { "epoch": 1.44, "learning_rate": 2.5950039861812385e-05, "loss": 0.6594, "step": 130320 }, { "epoch": 1.44, "learning_rate": 2.5949117134673873e-05, "loss": 0.628, "step": 130325 }, { "epoch": 1.44, "learning_rate": 2.5948194407535358e-05, "loss": 0.6651, "step": 130330 }, { "epoch": 1.44, "learning_rate": 2.5947271680396845e-05, "loss": 0.6085, "step": 130335 }, { "epoch": 1.44, "learning_rate": 2.5946348953258337e-05, "loss": 0.6378, "step": 130340 }, { "epoch": 1.44, "learning_rate": 2.5945426226119825e-05, "loss": 0.6911, "step": 130345 }, { "epoch": 1.44, "learning_rate": 2.594450349898131e-05, "loss": 0.6652, "step": 130350 }, { "epoch": 1.44, "learning_rate": 2.5943580771842797e-05, "loss": 0.6506, "step": 130355 }, { "epoch": 1.44, "learning_rate": 2.5942658044704288e-05, "loss": 0.6353, "step": 130360 }, { "epoch": 1.44, "learning_rate": 2.5941735317565773e-05, "loss": 0.679, "step": 130365 }, { "epoch": 1.44, "learning_rate": 2.594081259042726e-05, "loss": 0.6497, "step": 130370 }, { "epoch": 1.44, "learning_rate": 2.5939889863288748e-05, "loss": 0.6475, "step": 130375 }, { "epoch": 1.44, "learning_rate": 2.593896713615024e-05, "loss": 0.6883, "step": 130380 }, { "epoch": 1.44, "learning_rate": 2.5938044409011724e-05, "loss": 0.6554, "step": 130385 }, { "epoch": 1.44, "learning_rate": 2.5937121681873212e-05, "loss": 0.6383, "step": 130390 }, { "epoch": 1.44, "learning_rate": 2.5936198954734696e-05, "loss": 0.616, "step": 130395 }, { "epoch": 1.44, "learning_rate": 2.5935276227596184e-05, "loss": 0.653, "step": 130400 }, { "epoch": 1.44, "learning_rate": 2.5934353500457675e-05, "loss": 0.6848, "step": 130405 }, { "epoch": 1.44, "learning_rate": 2.5933430773319163e-05, "loss": 0.664, "step": 130410 }, { "epoch": 1.44, "learning_rate": 2.5932508046180648e-05, "loss": 0.6467, "step": 130415 }, { "epoch": 1.44, "learning_rate": 2.5931585319042135e-05, "loss": 0.5949, "step": 130420 }, { "epoch": 1.44, "learning_rate": 2.5930662591903627e-05, "loss": 0.6242, "step": 130425 }, { "epoch": 1.44, "learning_rate": 2.592973986476511e-05, "loss": 0.6287, "step": 130430 }, { "epoch": 1.44, "learning_rate": 2.59288171376266e-05, "loss": 0.6342, "step": 130435 }, { "epoch": 1.44, "learning_rate": 2.5927894410488083e-05, "loss": 0.672, "step": 130440 }, { "epoch": 1.44, "learning_rate": 2.5926971683349578e-05, "loss": 0.6232, "step": 130445 }, { "epoch": 1.44, "learning_rate": 2.5926048956211063e-05, "loss": 0.5614, "step": 130450 }, { "epoch": 1.44, "learning_rate": 2.592512622907255e-05, "loss": 0.6809, "step": 130455 }, { "epoch": 1.44, "learning_rate": 2.5924203501934035e-05, "loss": 0.6103, "step": 130460 }, { "epoch": 1.44, "learning_rate": 2.5923280774795526e-05, "loss": 0.6054, "step": 130465 }, { "epoch": 1.44, "learning_rate": 2.5922358047657014e-05, "loss": 0.6918, "step": 130470 }, { "epoch": 1.44, "learning_rate": 2.59214353205185e-05, "loss": 0.6534, "step": 130475 }, { "epoch": 1.44, "learning_rate": 2.5920512593379986e-05, "loss": 0.613, "step": 130480 }, { "epoch": 1.44, "learning_rate": 2.5919589866241474e-05, "loss": 0.661, "step": 130485 }, { "epoch": 1.44, "learning_rate": 2.5918667139102965e-05, "loss": 0.6297, "step": 130490 }, { "epoch": 1.44, "learning_rate": 2.591774441196445e-05, "loss": 0.6575, "step": 130495 }, { "epoch": 1.44, "learning_rate": 2.5916821684825938e-05, "loss": 0.5882, "step": 130500 }, { "epoch": 1.45, "learning_rate": 2.5915898957687422e-05, "loss": 0.6158, "step": 130505 }, { "epoch": 1.45, "learning_rate": 2.5914976230548917e-05, "loss": 0.6577, "step": 130510 }, { "epoch": 1.45, "learning_rate": 2.59140535034104e-05, "loss": 0.6364, "step": 130515 }, { "epoch": 1.45, "learning_rate": 2.591313077627189e-05, "loss": 0.7122, "step": 130520 }, { "epoch": 1.45, "learning_rate": 2.5912208049133374e-05, "loss": 0.646, "step": 130525 }, { "epoch": 1.45, "learning_rate": 2.5911285321994865e-05, "loss": 0.641, "step": 130530 }, { "epoch": 1.45, "learning_rate": 2.5910362594856353e-05, "loss": 0.6625, "step": 130535 }, { "epoch": 1.45, "learning_rate": 2.5909439867717837e-05, "loss": 0.66, "step": 130540 }, { "epoch": 1.45, "learning_rate": 2.5908517140579325e-05, "loss": 0.6309, "step": 130545 }, { "epoch": 1.45, "learning_rate": 2.590759441344081e-05, "loss": 0.6188, "step": 130550 }, { "epoch": 1.45, "learning_rate": 2.5906671686302304e-05, "loss": 0.7016, "step": 130555 }, { "epoch": 1.45, "learning_rate": 2.590574895916379e-05, "loss": 0.7022, "step": 130560 }, { "epoch": 1.45, "learning_rate": 2.5904826232025276e-05, "loss": 0.642, "step": 130565 }, { "epoch": 1.45, "learning_rate": 2.590390350488676e-05, "loss": 0.6363, "step": 130570 }, { "epoch": 1.45, "learning_rate": 2.5902980777748252e-05, "loss": 0.5954, "step": 130575 }, { "epoch": 1.45, "learning_rate": 2.590205805060974e-05, "loss": 0.6026, "step": 130580 }, { "epoch": 1.45, "learning_rate": 2.5901135323471228e-05, "loss": 0.6649, "step": 130585 }, { "epoch": 1.45, "learning_rate": 2.5900212596332712e-05, "loss": 0.6146, "step": 130590 }, { "epoch": 1.45, "learning_rate": 2.5899289869194203e-05, "loss": 0.6586, "step": 130595 }, { "epoch": 1.45, "learning_rate": 2.589836714205569e-05, "loss": 0.6166, "step": 130600 }, { "epoch": 1.45, "learning_rate": 2.5897444414917176e-05, "loss": 0.6101, "step": 130605 }, { "epoch": 1.45, "learning_rate": 2.5896521687778664e-05, "loss": 0.6756, "step": 130610 }, { "epoch": 1.45, "learning_rate": 2.5895598960640155e-05, "loss": 0.5968, "step": 130615 }, { "epoch": 1.45, "learning_rate": 2.5894676233501643e-05, "loss": 0.634, "step": 130620 }, { "epoch": 1.45, "learning_rate": 2.5893753506363127e-05, "loss": 0.6182, "step": 130625 }, { "epoch": 1.45, "learning_rate": 2.5892830779224615e-05, "loss": 0.5951, "step": 130630 }, { "epoch": 1.45, "learning_rate": 2.58919080520861e-05, "loss": 0.6432, "step": 130635 }, { "epoch": 1.45, "learning_rate": 2.589098532494759e-05, "loss": 0.6736, "step": 130640 }, { "epoch": 1.45, "learning_rate": 2.589006259780908e-05, "loss": 0.6399, "step": 130645 }, { "epoch": 1.45, "learning_rate": 2.5889139870670563e-05, "loss": 0.6788, "step": 130650 }, { "epoch": 1.45, "learning_rate": 2.588821714353205e-05, "loss": 0.6201, "step": 130655 }, { "epoch": 1.45, "learning_rate": 2.5887294416393542e-05, "loss": 0.6439, "step": 130660 }, { "epoch": 1.45, "learning_rate": 2.588637168925503e-05, "loss": 0.6701, "step": 130665 }, { "epoch": 1.45, "learning_rate": 2.5885448962116514e-05, "loss": 0.6324, "step": 130670 }, { "epoch": 1.45, "learning_rate": 2.5884526234978002e-05, "loss": 0.6579, "step": 130675 }, { "epoch": 1.45, "learning_rate": 2.5883603507839493e-05, "loss": 0.684, "step": 130680 }, { "epoch": 1.45, "learning_rate": 2.588268078070098e-05, "loss": 0.6797, "step": 130685 }, { "epoch": 1.45, "learning_rate": 2.5881758053562466e-05, "loss": 0.6614, "step": 130690 }, { "epoch": 1.45, "learning_rate": 2.5880835326423954e-05, "loss": 0.6579, "step": 130695 }, { "epoch": 1.45, "learning_rate": 2.5879912599285445e-05, "loss": 0.6747, "step": 130700 }, { "epoch": 1.45, "learning_rate": 2.587898987214693e-05, "loss": 0.68, "step": 130705 }, { "epoch": 1.45, "learning_rate": 2.5878067145008417e-05, "loss": 0.6475, "step": 130710 }, { "epoch": 1.45, "learning_rate": 2.58771444178699e-05, "loss": 0.6442, "step": 130715 }, { "epoch": 1.45, "learning_rate": 2.587622169073139e-05, "loss": 0.6514, "step": 130720 }, { "epoch": 1.45, "learning_rate": 2.587529896359288e-05, "loss": 0.6203, "step": 130725 }, { "epoch": 1.45, "learning_rate": 2.587437623645437e-05, "loss": 0.6135, "step": 130730 }, { "epoch": 1.45, "learning_rate": 2.5873453509315853e-05, "loss": 0.6812, "step": 130735 }, { "epoch": 1.45, "learning_rate": 2.587253078217734e-05, "loss": 0.6564, "step": 130740 }, { "epoch": 1.45, "learning_rate": 2.5871608055038832e-05, "loss": 0.6299, "step": 130745 }, { "epoch": 1.45, "learning_rate": 2.5870685327900317e-05, "loss": 0.7055, "step": 130750 }, { "epoch": 1.45, "learning_rate": 2.5869762600761804e-05, "loss": 0.6303, "step": 130755 }, { "epoch": 1.45, "learning_rate": 2.5868839873623292e-05, "loss": 0.666, "step": 130760 }, { "epoch": 1.45, "learning_rate": 2.5867917146484783e-05, "loss": 0.6784, "step": 130765 }, { "epoch": 1.45, "learning_rate": 2.5866994419346268e-05, "loss": 0.6667, "step": 130770 }, { "epoch": 1.45, "learning_rate": 2.5866071692207756e-05, "loss": 0.6663, "step": 130775 }, { "epoch": 1.45, "learning_rate": 2.586514896506924e-05, "loss": 0.6512, "step": 130780 }, { "epoch": 1.45, "learning_rate": 2.5864226237930728e-05, "loss": 0.6536, "step": 130785 }, { "epoch": 1.45, "learning_rate": 2.586330351079222e-05, "loss": 0.5704, "step": 130790 }, { "epoch": 1.45, "learning_rate": 2.5862380783653707e-05, "loss": 0.6382, "step": 130795 }, { "epoch": 1.45, "learning_rate": 2.586145805651519e-05, "loss": 0.6277, "step": 130800 }, { "epoch": 1.45, "learning_rate": 2.586053532937668e-05, "loss": 0.6799, "step": 130805 }, { "epoch": 1.45, "learning_rate": 2.585961260223817e-05, "loss": 0.5977, "step": 130810 }, { "epoch": 1.45, "learning_rate": 2.5858689875099655e-05, "loss": 0.6547, "step": 130815 }, { "epoch": 1.45, "learning_rate": 2.5857767147961143e-05, "loss": 0.6413, "step": 130820 }, { "epoch": 1.45, "learning_rate": 2.5856844420822628e-05, "loss": 0.6767, "step": 130825 }, { "epoch": 1.45, "learning_rate": 2.5855921693684122e-05, "loss": 0.6502, "step": 130830 }, { "epoch": 1.45, "learning_rate": 2.5854998966545607e-05, "loss": 0.6225, "step": 130835 }, { "epoch": 1.45, "learning_rate": 2.5854076239407094e-05, "loss": 0.5804, "step": 130840 }, { "epoch": 1.45, "learning_rate": 2.585315351226858e-05, "loss": 0.6749, "step": 130845 }, { "epoch": 1.45, "learning_rate": 2.585223078513007e-05, "loss": 0.603, "step": 130850 }, { "epoch": 1.45, "learning_rate": 2.5851308057991558e-05, "loss": 0.644, "step": 130855 }, { "epoch": 1.45, "learning_rate": 2.5850385330853046e-05, "loss": 0.6868, "step": 130860 }, { "epoch": 1.45, "learning_rate": 2.584946260371453e-05, "loss": 0.6164, "step": 130865 }, { "epoch": 1.45, "learning_rate": 2.5848539876576018e-05, "loss": 0.6459, "step": 130870 }, { "epoch": 1.45, "learning_rate": 2.584761714943751e-05, "loss": 0.6174, "step": 130875 }, { "epoch": 1.45, "learning_rate": 2.5846694422298994e-05, "loss": 0.6366, "step": 130880 }, { "epoch": 1.45, "learning_rate": 2.584577169516048e-05, "loss": 0.6783, "step": 130885 }, { "epoch": 1.45, "learning_rate": 2.5844848968021966e-05, "loss": 0.6535, "step": 130890 }, { "epoch": 1.45, "learning_rate": 2.584392624088346e-05, "loss": 0.5942, "step": 130895 }, { "epoch": 1.45, "learning_rate": 2.5843003513744945e-05, "loss": 0.5773, "step": 130900 }, { "epoch": 1.45, "learning_rate": 2.5842080786606433e-05, "loss": 0.6762, "step": 130905 }, { "epoch": 1.45, "learning_rate": 2.5841158059467918e-05, "loss": 0.6897, "step": 130910 }, { "epoch": 1.45, "learning_rate": 2.584023533232941e-05, "loss": 0.6696, "step": 130915 }, { "epoch": 1.45, "learning_rate": 2.5839312605190897e-05, "loss": 0.6094, "step": 130920 }, { "epoch": 1.45, "learning_rate": 2.583838987805238e-05, "loss": 0.6584, "step": 130925 }, { "epoch": 1.45, "learning_rate": 2.583746715091387e-05, "loss": 0.6456, "step": 130930 }, { "epoch": 1.45, "learning_rate": 2.5836544423775357e-05, "loss": 0.6982, "step": 130935 }, { "epoch": 1.45, "learning_rate": 2.5835621696636848e-05, "loss": 0.624, "step": 130940 }, { "epoch": 1.45, "learning_rate": 2.5834698969498332e-05, "loss": 0.6586, "step": 130945 }, { "epoch": 1.45, "learning_rate": 2.583377624235982e-05, "loss": 0.6766, "step": 130950 }, { "epoch": 1.45, "learning_rate": 2.5832853515221305e-05, "loss": 0.6918, "step": 130955 }, { "epoch": 1.45, "learning_rate": 2.5831930788082796e-05, "loss": 0.6807, "step": 130960 }, { "epoch": 1.45, "learning_rate": 2.5831008060944284e-05, "loss": 0.6325, "step": 130965 }, { "epoch": 1.45, "learning_rate": 2.5830085333805772e-05, "loss": 0.6306, "step": 130970 }, { "epoch": 1.45, "learning_rate": 2.5829162606667256e-05, "loss": 0.6594, "step": 130975 }, { "epoch": 1.45, "learning_rate": 2.5828239879528747e-05, "loss": 0.6096, "step": 130980 }, { "epoch": 1.45, "learning_rate": 2.5827317152390235e-05, "loss": 0.6871, "step": 130985 }, { "epoch": 1.45, "learning_rate": 2.582639442525172e-05, "loss": 0.6343, "step": 130990 }, { "epoch": 1.45, "learning_rate": 2.5825471698113208e-05, "loss": 0.6162, "step": 130995 }, { "epoch": 1.45, "learning_rate": 2.58245489709747e-05, "loss": 0.6601, "step": 131000 }, { "epoch": 1.45, "eval_loss": 0.6302406787872314, "eval_runtime": 69.8276, "eval_samples_per_second": 28.642, "eval_steps_per_second": 14.321, "step": 131000 }, { "epoch": 1.45, "learning_rate": 2.5823626243836187e-05, "loss": 0.5748, "step": 131005 }, { "epoch": 1.45, "learning_rate": 2.582270351669767e-05, "loss": 0.7045, "step": 131010 }, { "epoch": 1.45, "learning_rate": 2.582178078955916e-05, "loss": 0.6493, "step": 131015 }, { "epoch": 1.45, "learning_rate": 2.5820858062420643e-05, "loss": 0.6211, "step": 131020 }, { "epoch": 1.45, "learning_rate": 2.5819935335282135e-05, "loss": 0.693, "step": 131025 }, { "epoch": 1.45, "learning_rate": 2.5819012608143623e-05, "loss": 0.609, "step": 131030 }, { "epoch": 1.45, "learning_rate": 2.5818089881005107e-05, "loss": 0.6624, "step": 131035 }, { "epoch": 1.45, "learning_rate": 2.5817167153866595e-05, "loss": 0.6038, "step": 131040 }, { "epoch": 1.45, "learning_rate": 2.5816244426728086e-05, "loss": 0.7107, "step": 131045 }, { "epoch": 1.45, "learning_rate": 2.5815321699589574e-05, "loss": 0.6653, "step": 131050 }, { "epoch": 1.45, "learning_rate": 2.581439897245106e-05, "loss": 0.5982, "step": 131055 }, { "epoch": 1.45, "learning_rate": 2.5813476245312546e-05, "loss": 0.6328, "step": 131060 }, { "epoch": 1.45, "learning_rate": 2.5812553518174037e-05, "loss": 0.702, "step": 131065 }, { "epoch": 1.45, "learning_rate": 2.5811630791035525e-05, "loss": 0.6568, "step": 131070 }, { "epoch": 1.45, "learning_rate": 2.581070806389701e-05, "loss": 0.6774, "step": 131075 }, { "epoch": 1.45, "learning_rate": 2.5809785336758498e-05, "loss": 0.6889, "step": 131080 }, { "epoch": 1.45, "learning_rate": 2.5808862609619982e-05, "loss": 0.6152, "step": 131085 }, { "epoch": 1.45, "learning_rate": 2.5807939882481473e-05, "loss": 0.6113, "step": 131090 }, { "epoch": 1.45, "learning_rate": 2.580701715534296e-05, "loss": 0.6907, "step": 131095 }, { "epoch": 1.45, "learning_rate": 2.5806094428204446e-05, "loss": 0.7065, "step": 131100 }, { "epoch": 1.45, "learning_rate": 2.5805171701065933e-05, "loss": 0.627, "step": 131105 }, { "epoch": 1.45, "learning_rate": 2.5804248973927425e-05, "loss": 0.6413, "step": 131110 }, { "epoch": 1.45, "learning_rate": 2.5803326246788913e-05, "loss": 0.7064, "step": 131115 }, { "epoch": 1.45, "learning_rate": 2.5802403519650397e-05, "loss": 0.7265, "step": 131120 }, { "epoch": 1.45, "learning_rate": 2.5801480792511885e-05, "loss": 0.612, "step": 131125 }, { "epoch": 1.45, "learning_rate": 2.5800558065373376e-05, "loss": 0.5795, "step": 131130 }, { "epoch": 1.45, "learning_rate": 2.579963533823486e-05, "loss": 0.6759, "step": 131135 }, { "epoch": 1.45, "learning_rate": 2.579871261109635e-05, "loss": 0.6048, "step": 131140 }, { "epoch": 1.45, "learning_rate": 2.5797789883957836e-05, "loss": 0.6857, "step": 131145 }, { "epoch": 1.45, "learning_rate": 2.5796867156819328e-05, "loss": 0.6625, "step": 131150 }, { "epoch": 1.45, "learning_rate": 2.5795944429680812e-05, "loss": 0.7, "step": 131155 }, { "epoch": 1.45, "learning_rate": 2.57950217025423e-05, "loss": 0.6352, "step": 131160 }, { "epoch": 1.45, "learning_rate": 2.5794098975403784e-05, "loss": 0.6358, "step": 131165 }, { "epoch": 1.45, "learning_rate": 2.5793176248265272e-05, "loss": 0.6164, "step": 131170 }, { "epoch": 1.45, "learning_rate": 2.5792253521126763e-05, "loss": 0.6445, "step": 131175 }, { "epoch": 1.45, "learning_rate": 2.579133079398825e-05, "loss": 0.6587, "step": 131180 }, { "epoch": 1.45, "learning_rate": 2.5790408066849736e-05, "loss": 0.6774, "step": 131185 }, { "epoch": 1.45, "learning_rate": 2.5789485339711224e-05, "loss": 0.588, "step": 131190 }, { "epoch": 1.45, "learning_rate": 2.5788562612572715e-05, "loss": 0.6855, "step": 131195 }, { "epoch": 1.45, "learning_rate": 2.57876398854342e-05, "loss": 0.6311, "step": 131200 }, { "epoch": 1.45, "learning_rate": 2.5786717158295687e-05, "loss": 0.7139, "step": 131205 }, { "epoch": 1.45, "learning_rate": 2.578579443115717e-05, "loss": 0.6731, "step": 131210 }, { "epoch": 1.45, "learning_rate": 2.5784871704018666e-05, "loss": 0.6838, "step": 131215 }, { "epoch": 1.45, "learning_rate": 2.578394897688015e-05, "loss": 0.6652, "step": 131220 }, { "epoch": 1.45, "learning_rate": 2.578302624974164e-05, "loss": 0.7075, "step": 131225 }, { "epoch": 1.45, "learning_rate": 2.5782103522603123e-05, "loss": 0.6786, "step": 131230 }, { "epoch": 1.45, "learning_rate": 2.578118079546461e-05, "loss": 0.6251, "step": 131235 }, { "epoch": 1.45, "learning_rate": 2.5780258068326102e-05, "loss": 0.6689, "step": 131240 }, { "epoch": 1.45, "learning_rate": 2.577933534118759e-05, "loss": 0.6767, "step": 131245 }, { "epoch": 1.45, "learning_rate": 2.5778412614049074e-05, "loss": 0.6439, "step": 131250 }, { "epoch": 1.45, "learning_rate": 2.5777489886910562e-05, "loss": 0.6296, "step": 131255 }, { "epoch": 1.45, "learning_rate": 2.5776567159772053e-05, "loss": 0.6186, "step": 131260 }, { "epoch": 1.45, "learning_rate": 2.5775644432633538e-05, "loss": 0.7045, "step": 131265 }, { "epoch": 1.45, "learning_rate": 2.5774721705495026e-05, "loss": 0.6239, "step": 131270 }, { "epoch": 1.45, "learning_rate": 2.577379897835651e-05, "loss": 0.6611, "step": 131275 }, { "epoch": 1.45, "learning_rate": 2.5772876251218005e-05, "loss": 0.6863, "step": 131280 }, { "epoch": 1.45, "learning_rate": 2.577195352407949e-05, "loss": 0.6312, "step": 131285 }, { "epoch": 1.45, "learning_rate": 2.5771030796940977e-05, "loss": 0.6624, "step": 131290 }, { "epoch": 1.45, "learning_rate": 2.577010806980246e-05, "loss": 0.664, "step": 131295 }, { "epoch": 1.45, "learning_rate": 2.5769185342663953e-05, "loss": 0.6415, "step": 131300 }, { "epoch": 1.45, "learning_rate": 2.576826261552544e-05, "loss": 0.6386, "step": 131305 }, { "epoch": 1.45, "learning_rate": 2.5767339888386925e-05, "loss": 0.6285, "step": 131310 }, { "epoch": 1.45, "learning_rate": 2.5766417161248413e-05, "loss": 0.6538, "step": 131315 }, { "epoch": 1.45, "learning_rate": 2.57654944341099e-05, "loss": 0.5683, "step": 131320 }, { "epoch": 1.45, "learning_rate": 2.5764571706971392e-05, "loss": 0.5822, "step": 131325 }, { "epoch": 1.45, "learning_rate": 2.5763648979832877e-05, "loss": 0.6279, "step": 131330 }, { "epoch": 1.45, "learning_rate": 2.5762726252694364e-05, "loss": 0.657, "step": 131335 }, { "epoch": 1.45, "learning_rate": 2.576180352555585e-05, "loss": 0.6104, "step": 131340 }, { "epoch": 1.45, "learning_rate": 2.576088079841734e-05, "loss": 0.6547, "step": 131345 }, { "epoch": 1.45, "learning_rate": 2.5759958071278828e-05, "loss": 0.6648, "step": 131350 }, { "epoch": 1.45, "learning_rate": 2.5759035344140316e-05, "loss": 0.7404, "step": 131355 }, { "epoch": 1.45, "learning_rate": 2.57581126170018e-05, "loss": 0.6618, "step": 131360 }, { "epoch": 1.45, "learning_rate": 2.575718988986329e-05, "loss": 0.65, "step": 131365 }, { "epoch": 1.45, "learning_rate": 2.575626716272478e-05, "loss": 0.643, "step": 131370 }, { "epoch": 1.45, "learning_rate": 2.5755344435586264e-05, "loss": 0.6944, "step": 131375 }, { "epoch": 1.45, "learning_rate": 2.575442170844775e-05, "loss": 0.5987, "step": 131380 }, { "epoch": 1.45, "learning_rate": 2.5753498981309236e-05, "loss": 0.6775, "step": 131385 }, { "epoch": 1.45, "learning_rate": 2.575257625417073e-05, "loss": 0.6364, "step": 131390 }, { "epoch": 1.45, "learning_rate": 2.5751653527032215e-05, "loss": 0.6395, "step": 131395 }, { "epoch": 1.45, "learning_rate": 2.5750730799893703e-05, "loss": 0.6432, "step": 131400 }, { "epoch": 1.46, "learning_rate": 2.5749808072755187e-05, "loss": 0.6372, "step": 131405 }, { "epoch": 1.46, "learning_rate": 2.574888534561668e-05, "loss": 0.6227, "step": 131410 }, { "epoch": 1.46, "learning_rate": 2.5747962618478167e-05, "loss": 0.6324, "step": 131415 }, { "epoch": 1.46, "learning_rate": 2.574703989133965e-05, "loss": 0.5863, "step": 131420 }, { "epoch": 1.46, "learning_rate": 2.574611716420114e-05, "loss": 0.6381, "step": 131425 }, { "epoch": 1.46, "learning_rate": 2.574519443706263e-05, "loss": 0.7061, "step": 131430 }, { "epoch": 1.46, "learning_rate": 2.5744271709924118e-05, "loss": 0.6735, "step": 131435 }, { "epoch": 1.46, "learning_rate": 2.5743348982785602e-05, "loss": 0.6043, "step": 131440 }, { "epoch": 1.46, "learning_rate": 2.574242625564709e-05, "loss": 0.7058, "step": 131445 }, { "epoch": 1.46, "learning_rate": 2.574150352850858e-05, "loss": 0.6255, "step": 131450 }, { "epoch": 1.46, "learning_rate": 2.574058080137007e-05, "loss": 0.6308, "step": 131455 }, { "epoch": 1.46, "learning_rate": 2.5739658074231554e-05, "loss": 0.6579, "step": 131460 }, { "epoch": 1.46, "learning_rate": 2.573873534709304e-05, "loss": 0.6171, "step": 131465 }, { "epoch": 1.46, "learning_rate": 2.5737812619954526e-05, "loss": 0.6557, "step": 131470 }, { "epoch": 1.46, "learning_rate": 2.5736889892816017e-05, "loss": 0.6541, "step": 131475 }, { "epoch": 1.46, "learning_rate": 2.5735967165677505e-05, "loss": 0.6882, "step": 131480 }, { "epoch": 1.46, "learning_rate": 2.573504443853899e-05, "loss": 0.6246, "step": 131485 }, { "epoch": 1.46, "learning_rate": 2.5734121711400478e-05, "loss": 0.6759, "step": 131490 }, { "epoch": 1.46, "learning_rate": 2.573319898426197e-05, "loss": 0.6412, "step": 131495 }, { "epoch": 1.46, "learning_rate": 2.5732276257123457e-05, "loss": 0.6344, "step": 131500 }, { "epoch": 1.46, "learning_rate": 2.573135352998494e-05, "loss": 0.6226, "step": 131505 }, { "epoch": 1.46, "learning_rate": 2.573043080284643e-05, "loss": 0.6826, "step": 131510 }, { "epoch": 1.46, "learning_rate": 2.572950807570792e-05, "loss": 0.6036, "step": 131515 }, { "epoch": 1.46, "learning_rate": 2.5728585348569405e-05, "loss": 0.5968, "step": 131520 }, { "epoch": 1.46, "learning_rate": 2.5727662621430892e-05, "loss": 0.677, "step": 131525 }, { "epoch": 1.46, "learning_rate": 2.572673989429238e-05, "loss": 0.6966, "step": 131530 }, { "epoch": 1.46, "learning_rate": 2.572581716715387e-05, "loss": 0.6754, "step": 131535 }, { "epoch": 1.46, "learning_rate": 2.5724894440015356e-05, "loss": 0.66, "step": 131540 }, { "epoch": 1.46, "learning_rate": 2.5723971712876844e-05, "loss": 0.6292, "step": 131545 }, { "epoch": 1.46, "learning_rate": 2.5723048985738328e-05, "loss": 0.621, "step": 131550 }, { "epoch": 1.46, "learning_rate": 2.5722126258599816e-05, "loss": 0.623, "step": 131555 }, { "epoch": 1.46, "learning_rate": 2.5721203531461307e-05, "loss": 0.6671, "step": 131560 }, { "epoch": 1.46, "learning_rate": 2.5720280804322795e-05, "loss": 0.6497, "step": 131565 }, { "epoch": 1.46, "learning_rate": 2.571935807718428e-05, "loss": 0.611, "step": 131570 }, { "epoch": 1.46, "learning_rate": 2.5718435350045768e-05, "loss": 0.683, "step": 131575 }, { "epoch": 1.46, "learning_rate": 2.571751262290726e-05, "loss": 0.689, "step": 131580 }, { "epoch": 1.46, "learning_rate": 2.5716589895768743e-05, "loss": 0.6406, "step": 131585 }, { "epoch": 1.46, "learning_rate": 2.571566716863023e-05, "loss": 0.6231, "step": 131590 }, { "epoch": 1.46, "learning_rate": 2.5714744441491716e-05, "loss": 0.6226, "step": 131595 }, { "epoch": 1.46, "learning_rate": 2.571382171435321e-05, "loss": 0.5991, "step": 131600 }, { "epoch": 1.46, "learning_rate": 2.5712898987214695e-05, "loss": 0.6848, "step": 131605 }, { "epoch": 1.46, "learning_rate": 2.5711976260076182e-05, "loss": 0.6473, "step": 131610 }, { "epoch": 1.46, "learning_rate": 2.5711053532937667e-05, "loss": 0.6669, "step": 131615 }, { "epoch": 1.46, "learning_rate": 2.5710130805799155e-05, "loss": 0.6572, "step": 131620 }, { "epoch": 1.46, "learning_rate": 2.5709208078660646e-05, "loss": 0.6638, "step": 131625 }, { "epoch": 1.46, "learning_rate": 2.5708285351522134e-05, "loss": 0.6749, "step": 131630 }, { "epoch": 1.46, "learning_rate": 2.570736262438362e-05, "loss": 0.659, "step": 131635 }, { "epoch": 1.46, "learning_rate": 2.5706439897245106e-05, "loss": 0.7077, "step": 131640 }, { "epoch": 1.46, "learning_rate": 2.5705517170106597e-05, "loss": 0.6419, "step": 131645 }, { "epoch": 1.46, "learning_rate": 2.5704594442968082e-05, "loss": 0.6694, "step": 131650 }, { "epoch": 1.46, "learning_rate": 2.570367171582957e-05, "loss": 0.6587, "step": 131655 }, { "epoch": 1.46, "learning_rate": 2.5702748988691054e-05, "loss": 0.6638, "step": 131660 }, { "epoch": 1.46, "learning_rate": 2.570182626155255e-05, "loss": 0.6772, "step": 131665 }, { "epoch": 1.46, "learning_rate": 2.5700903534414033e-05, "loss": 0.6137, "step": 131670 }, { "epoch": 1.46, "learning_rate": 2.569998080727552e-05, "loss": 0.648, "step": 131675 }, { "epoch": 1.46, "learning_rate": 2.5699058080137006e-05, "loss": 0.6738, "step": 131680 }, { "epoch": 1.46, "learning_rate": 2.5698135352998497e-05, "loss": 0.6081, "step": 131685 }, { "epoch": 1.46, "learning_rate": 2.5697212625859985e-05, "loss": 0.5971, "step": 131690 }, { "epoch": 1.46, "learning_rate": 2.569628989872147e-05, "loss": 0.6183, "step": 131695 }, { "epoch": 1.46, "learning_rate": 2.5695367171582957e-05, "loss": 0.6424, "step": 131700 }, { "epoch": 1.46, "learning_rate": 2.5694444444444445e-05, "loss": 0.6408, "step": 131705 }, { "epoch": 1.46, "learning_rate": 2.5693521717305936e-05, "loss": 0.6488, "step": 131710 }, { "epoch": 1.46, "learning_rate": 2.569259899016742e-05, "loss": 0.6433, "step": 131715 }, { "epoch": 1.46, "learning_rate": 2.569167626302891e-05, "loss": 0.6463, "step": 131720 }, { "epoch": 1.46, "learning_rate": 2.5690753535890393e-05, "loss": 0.658, "step": 131725 }, { "epoch": 1.46, "learning_rate": 2.5689830808751884e-05, "loss": 0.65, "step": 131730 }, { "epoch": 1.46, "learning_rate": 2.5688908081613372e-05, "loss": 0.6787, "step": 131735 }, { "epoch": 1.46, "learning_rate": 2.568798535447486e-05, "loss": 0.6346, "step": 131740 }, { "epoch": 1.46, "learning_rate": 2.5687062627336344e-05, "loss": 0.646, "step": 131745 }, { "epoch": 1.46, "learning_rate": 2.5686139900197835e-05, "loss": 0.6236, "step": 131750 }, { "epoch": 1.46, "learning_rate": 2.5685217173059323e-05, "loss": 0.6535, "step": 131755 }, { "epoch": 1.46, "learning_rate": 2.5684294445920808e-05, "loss": 0.6918, "step": 131760 }, { "epoch": 1.46, "learning_rate": 2.5683371718782296e-05, "loss": 0.6933, "step": 131765 }, { "epoch": 1.46, "learning_rate": 2.568244899164378e-05, "loss": 0.6538, "step": 131770 }, { "epoch": 1.46, "learning_rate": 2.5681526264505275e-05, "loss": 0.6247, "step": 131775 }, { "epoch": 1.46, "learning_rate": 2.568060353736676e-05, "loss": 0.6612, "step": 131780 }, { "epoch": 1.46, "learning_rate": 2.5679680810228247e-05, "loss": 0.6671, "step": 131785 }, { "epoch": 1.46, "learning_rate": 2.567875808308973e-05, "loss": 0.6403, "step": 131790 }, { "epoch": 1.46, "learning_rate": 2.5677835355951223e-05, "loss": 0.6622, "step": 131795 }, { "epoch": 1.46, "learning_rate": 2.567691262881271e-05, "loss": 0.6106, "step": 131800 }, { "epoch": 1.46, "learning_rate": 2.5675989901674195e-05, "loss": 0.6201, "step": 131805 }, { "epoch": 1.46, "learning_rate": 2.5675067174535683e-05, "loss": 0.6138, "step": 131810 }, { "epoch": 1.46, "learning_rate": 2.5674144447397174e-05, "loss": 0.6243, "step": 131815 }, { "epoch": 1.46, "learning_rate": 2.5673221720258662e-05, "loss": 0.6198, "step": 131820 }, { "epoch": 1.46, "learning_rate": 2.5672298993120146e-05, "loss": 0.6289, "step": 131825 }, { "epoch": 1.46, "learning_rate": 2.5671376265981634e-05, "loss": 0.6203, "step": 131830 }, { "epoch": 1.46, "learning_rate": 2.5670453538843126e-05, "loss": 0.6412, "step": 131835 }, { "epoch": 1.46, "learning_rate": 2.5669530811704613e-05, "loss": 0.6642, "step": 131840 }, { "epoch": 1.46, "learning_rate": 2.5668608084566098e-05, "loss": 0.6202, "step": 131845 }, { "epoch": 1.46, "learning_rate": 2.5667685357427586e-05, "loss": 0.6729, "step": 131850 }, { "epoch": 1.46, "learning_rate": 2.566676263028907e-05, "loss": 0.6888, "step": 131855 }, { "epoch": 1.46, "learning_rate": 2.566583990315056e-05, "loss": 0.6688, "step": 131860 }, { "epoch": 1.46, "learning_rate": 2.566491717601205e-05, "loss": 0.6975, "step": 131865 }, { "epoch": 1.46, "learning_rate": 2.5663994448873534e-05, "loss": 0.6361, "step": 131870 }, { "epoch": 1.46, "learning_rate": 2.566307172173502e-05, "loss": 0.6237, "step": 131875 }, { "epoch": 1.46, "learning_rate": 2.5662148994596513e-05, "loss": 0.6547, "step": 131880 }, { "epoch": 1.46, "learning_rate": 2.5661226267458e-05, "loss": 0.5947, "step": 131885 }, { "epoch": 1.46, "learning_rate": 2.5660303540319485e-05, "loss": 0.6412, "step": 131890 }, { "epoch": 1.46, "learning_rate": 2.5659380813180973e-05, "loss": 0.6114, "step": 131895 }, { "epoch": 1.46, "learning_rate": 2.5658458086042464e-05, "loss": 0.6032, "step": 131900 }, { "epoch": 1.46, "learning_rate": 2.565753535890395e-05, "loss": 0.6134, "step": 131905 }, { "epoch": 1.46, "learning_rate": 2.5656612631765436e-05, "loss": 0.6055, "step": 131910 }, { "epoch": 1.46, "learning_rate": 2.5655689904626924e-05, "loss": 0.6919, "step": 131915 }, { "epoch": 1.46, "learning_rate": 2.565476717748841e-05, "loss": 0.6635, "step": 131920 }, { "epoch": 1.46, "learning_rate": 2.56538444503499e-05, "loss": 0.6676, "step": 131925 }, { "epoch": 1.46, "learning_rate": 2.5652921723211388e-05, "loss": 0.6653, "step": 131930 }, { "epoch": 1.46, "learning_rate": 2.5651998996072872e-05, "loss": 0.6454, "step": 131935 }, { "epoch": 1.46, "learning_rate": 2.565107626893436e-05, "loss": 0.6943, "step": 131940 }, { "epoch": 1.46, "learning_rate": 2.565015354179585e-05, "loss": 0.6227, "step": 131945 }, { "epoch": 1.46, "learning_rate": 2.564923081465734e-05, "loss": 0.6386, "step": 131950 }, { "epoch": 1.46, "learning_rate": 2.5648308087518824e-05, "loss": 0.6755, "step": 131955 }, { "epoch": 1.46, "learning_rate": 2.564738536038031e-05, "loss": 0.664, "step": 131960 }, { "epoch": 1.46, "learning_rate": 2.5646462633241803e-05, "loss": 0.6089, "step": 131965 }, { "epoch": 1.46, "learning_rate": 2.5645539906103287e-05, "loss": 0.6995, "step": 131970 }, { "epoch": 1.46, "learning_rate": 2.5644617178964775e-05, "loss": 0.6669, "step": 131975 }, { "epoch": 1.46, "learning_rate": 2.564369445182626e-05, "loss": 0.6726, "step": 131980 }, { "epoch": 1.46, "learning_rate": 2.5642771724687754e-05, "loss": 0.6427, "step": 131985 }, { "epoch": 1.46, "learning_rate": 2.564184899754924e-05, "loss": 0.6633, "step": 131990 }, { "epoch": 1.46, "learning_rate": 2.5640926270410726e-05, "loss": 0.6589, "step": 131995 }, { "epoch": 1.46, "learning_rate": 2.564000354327221e-05, "loss": 0.6304, "step": 132000 }, { "epoch": 1.46, "eval_loss": 0.6127191781997681, "eval_runtime": 69.6267, "eval_samples_per_second": 28.725, "eval_steps_per_second": 14.362, "step": 132000 }, { "epoch": 1.46, "learning_rate": 2.56390808161337e-05, "loss": 0.6074, "step": 132005 }, { "epoch": 1.46, "learning_rate": 2.563815808899519e-05, "loss": 0.6422, "step": 132010 }, { "epoch": 1.46, "learning_rate": 2.5637235361856678e-05, "loss": 0.6502, "step": 132015 }, { "epoch": 1.46, "learning_rate": 2.5636312634718162e-05, "loss": 0.643, "step": 132020 }, { "epoch": 1.46, "learning_rate": 2.563538990757965e-05, "loss": 0.6208, "step": 132025 }, { "epoch": 1.46, "learning_rate": 2.563446718044114e-05, "loss": 0.6351, "step": 132030 }, { "epoch": 1.46, "learning_rate": 2.5633544453302626e-05, "loss": 0.6568, "step": 132035 }, { "epoch": 1.46, "learning_rate": 2.5632621726164114e-05, "loss": 0.6388, "step": 132040 }, { "epoch": 1.46, "learning_rate": 2.5631698999025598e-05, "loss": 0.6831, "step": 132045 }, { "epoch": 1.46, "learning_rate": 2.5630776271887093e-05, "loss": 0.6526, "step": 132050 }, { "epoch": 1.46, "learning_rate": 2.5629853544748577e-05, "loss": 0.6147, "step": 132055 }, { "epoch": 1.46, "learning_rate": 2.5628930817610065e-05, "loss": 0.6467, "step": 132060 }, { "epoch": 1.46, "learning_rate": 2.562800809047155e-05, "loss": 0.651, "step": 132065 }, { "epoch": 1.46, "learning_rate": 2.5627085363333037e-05, "loss": 0.6601, "step": 132070 }, { "epoch": 1.46, "learning_rate": 2.562616263619453e-05, "loss": 0.6369, "step": 132075 }, { "epoch": 1.46, "learning_rate": 2.5625239909056013e-05, "loss": 0.6365, "step": 132080 }, { "epoch": 1.46, "learning_rate": 2.56243171819175e-05, "loss": 0.6767, "step": 132085 }, { "epoch": 1.46, "learning_rate": 2.562339445477899e-05, "loss": 0.6096, "step": 132090 }, { "epoch": 1.46, "learning_rate": 2.562247172764048e-05, "loss": 0.693, "step": 132095 }, { "epoch": 1.46, "learning_rate": 2.5621549000501965e-05, "loss": 0.6067, "step": 132100 }, { "epoch": 1.46, "learning_rate": 2.5620626273363452e-05, "loss": 0.6822, "step": 132105 }, { "epoch": 1.46, "learning_rate": 2.5619703546224937e-05, "loss": 0.6615, "step": 132110 }, { "epoch": 1.46, "learning_rate": 2.561878081908643e-05, "loss": 0.6245, "step": 132115 }, { "epoch": 1.46, "learning_rate": 2.5617858091947916e-05, "loss": 0.6177, "step": 132120 }, { "epoch": 1.46, "learning_rate": 2.5616935364809404e-05, "loss": 0.6505, "step": 132125 }, { "epoch": 1.46, "learning_rate": 2.5616012637670888e-05, "loss": 0.6399, "step": 132130 }, { "epoch": 1.46, "learning_rate": 2.561508991053238e-05, "loss": 0.6209, "step": 132135 }, { "epoch": 1.46, "learning_rate": 2.5614167183393867e-05, "loss": 0.6175, "step": 132140 }, { "epoch": 1.46, "learning_rate": 2.5613244456255352e-05, "loss": 0.6318, "step": 132145 }, { "epoch": 1.46, "learning_rate": 2.561232172911684e-05, "loss": 0.7119, "step": 132150 }, { "epoch": 1.46, "learning_rate": 2.5611399001978324e-05, "loss": 0.6409, "step": 132155 }, { "epoch": 1.46, "learning_rate": 2.561047627483982e-05, "loss": 0.6596, "step": 132160 }, { "epoch": 1.46, "learning_rate": 2.5609553547701303e-05, "loss": 0.6538, "step": 132165 }, { "epoch": 1.46, "learning_rate": 2.560863082056279e-05, "loss": 0.6077, "step": 132170 }, { "epoch": 1.46, "learning_rate": 2.5607708093424275e-05, "loss": 0.6782, "step": 132175 }, { "epoch": 1.46, "learning_rate": 2.5606785366285767e-05, "loss": 0.6117, "step": 132180 }, { "epoch": 1.46, "learning_rate": 2.5605862639147255e-05, "loss": 0.6198, "step": 132185 }, { "epoch": 1.46, "learning_rate": 2.5604939912008742e-05, "loss": 0.6141, "step": 132190 }, { "epoch": 1.46, "learning_rate": 2.5604017184870227e-05, "loss": 0.6547, "step": 132195 }, { "epoch": 1.46, "learning_rate": 2.5603094457731718e-05, "loss": 0.6162, "step": 132200 }, { "epoch": 1.46, "learning_rate": 2.5602171730593206e-05, "loss": 0.6709, "step": 132205 }, { "epoch": 1.46, "learning_rate": 2.560124900345469e-05, "loss": 0.651, "step": 132210 }, { "epoch": 1.46, "learning_rate": 2.5600326276316178e-05, "loss": 0.6361, "step": 132215 }, { "epoch": 1.46, "learning_rate": 2.5599403549177663e-05, "loss": 0.6524, "step": 132220 }, { "epoch": 1.46, "learning_rate": 2.5598480822039157e-05, "loss": 0.6397, "step": 132225 }, { "epoch": 1.46, "learning_rate": 2.5597558094900642e-05, "loss": 0.6281, "step": 132230 }, { "epoch": 1.46, "learning_rate": 2.559663536776213e-05, "loss": 0.6162, "step": 132235 }, { "epoch": 1.46, "learning_rate": 2.5595712640623614e-05, "loss": 0.6431, "step": 132240 }, { "epoch": 1.46, "learning_rate": 2.5594789913485105e-05, "loss": 0.655, "step": 132245 }, { "epoch": 1.46, "learning_rate": 2.5593867186346593e-05, "loss": 0.6284, "step": 132250 }, { "epoch": 1.46, "learning_rate": 2.5592944459208078e-05, "loss": 0.6664, "step": 132255 }, { "epoch": 1.46, "learning_rate": 2.5592021732069566e-05, "loss": 0.5971, "step": 132260 }, { "epoch": 1.46, "learning_rate": 2.5591099004931057e-05, "loss": 0.6652, "step": 132265 }, { "epoch": 1.46, "learning_rate": 2.5590176277792545e-05, "loss": 0.6584, "step": 132270 }, { "epoch": 1.46, "learning_rate": 2.558925355065403e-05, "loss": 0.6627, "step": 132275 }, { "epoch": 1.46, "learning_rate": 2.5588330823515517e-05, "loss": 0.6213, "step": 132280 }, { "epoch": 1.46, "learning_rate": 2.5587408096377008e-05, "loss": 0.676, "step": 132285 }, { "epoch": 1.46, "learning_rate": 2.5586485369238493e-05, "loss": 0.592, "step": 132290 }, { "epoch": 1.46, "learning_rate": 2.558556264209998e-05, "loss": 0.6493, "step": 132295 }, { "epoch": 1.46, "learning_rate": 2.558463991496147e-05, "loss": 0.6702, "step": 132300 }, { "epoch": 1.46, "learning_rate": 2.5583717187822953e-05, "loss": 0.648, "step": 132305 }, { "epoch": 1.47, "learning_rate": 2.5582794460684444e-05, "loss": 0.6472, "step": 132310 }, { "epoch": 1.47, "learning_rate": 2.5581871733545932e-05, "loss": 0.613, "step": 132315 }, { "epoch": 1.47, "learning_rate": 2.5580949006407416e-05, "loss": 0.638, "step": 132320 }, { "epoch": 1.47, "learning_rate": 2.5580026279268904e-05, "loss": 0.6614, "step": 132325 }, { "epoch": 1.47, "learning_rate": 2.5579103552130395e-05, "loss": 0.6701, "step": 132330 }, { "epoch": 1.47, "learning_rate": 2.5578180824991883e-05, "loss": 0.6306, "step": 132335 }, { "epoch": 1.47, "learning_rate": 2.5577258097853368e-05, "loss": 0.6109, "step": 132340 }, { "epoch": 1.47, "learning_rate": 2.5576335370714856e-05, "loss": 0.601, "step": 132345 }, { "epoch": 1.47, "learning_rate": 2.5575412643576347e-05, "loss": 0.5989, "step": 132350 }, { "epoch": 1.47, "learning_rate": 2.557448991643783e-05, "loss": 0.6162, "step": 132355 }, { "epoch": 1.47, "learning_rate": 2.557356718929932e-05, "loss": 0.6191, "step": 132360 }, { "epoch": 1.47, "learning_rate": 2.5572644462160804e-05, "loss": 0.6241, "step": 132365 }, { "epoch": 1.47, "learning_rate": 2.5571721735022298e-05, "loss": 0.6712, "step": 132370 }, { "epoch": 1.47, "learning_rate": 2.5570799007883783e-05, "loss": 0.6273, "step": 132375 }, { "epoch": 1.47, "learning_rate": 2.556987628074527e-05, "loss": 0.6107, "step": 132380 }, { "epoch": 1.47, "learning_rate": 2.5568953553606755e-05, "loss": 0.6067, "step": 132385 }, { "epoch": 1.47, "learning_rate": 2.5568030826468243e-05, "loss": 0.6669, "step": 132390 }, { "epoch": 1.47, "learning_rate": 2.5567108099329734e-05, "loss": 0.6676, "step": 132395 }, { "epoch": 1.47, "learning_rate": 2.5566185372191222e-05, "loss": 0.6573, "step": 132400 }, { "epoch": 1.47, "learning_rate": 2.5565262645052706e-05, "loss": 0.6846, "step": 132405 }, { "epoch": 1.47, "learning_rate": 2.5564339917914194e-05, "loss": 0.5718, "step": 132410 }, { "epoch": 1.47, "learning_rate": 2.5563417190775685e-05, "loss": 0.6747, "step": 132415 }, { "epoch": 1.47, "learning_rate": 2.556249446363717e-05, "loss": 0.584, "step": 132420 }, { "epoch": 1.47, "learning_rate": 2.5561571736498658e-05, "loss": 0.6616, "step": 132425 }, { "epoch": 1.47, "learning_rate": 2.5560649009360142e-05, "loss": 0.6593, "step": 132430 }, { "epoch": 1.47, "learning_rate": 2.5559726282221637e-05, "loss": 0.5982, "step": 132435 }, { "epoch": 1.47, "learning_rate": 2.555880355508312e-05, "loss": 0.6603, "step": 132440 }, { "epoch": 1.47, "learning_rate": 2.555788082794461e-05, "loss": 0.6837, "step": 132445 }, { "epoch": 1.47, "learning_rate": 2.5556958100806094e-05, "loss": 0.6603, "step": 132450 }, { "epoch": 1.47, "learning_rate": 2.555603537366758e-05, "loss": 0.6252, "step": 132455 }, { "epoch": 1.47, "learning_rate": 2.5555112646529073e-05, "loss": 0.661, "step": 132460 }, { "epoch": 1.47, "learning_rate": 2.5554189919390557e-05, "loss": 0.646, "step": 132465 }, { "epoch": 1.47, "learning_rate": 2.5553267192252045e-05, "loss": 0.6137, "step": 132470 }, { "epoch": 1.47, "learning_rate": 2.5552344465113533e-05, "loss": 0.6664, "step": 132475 }, { "epoch": 1.47, "learning_rate": 2.5551421737975024e-05, "loss": 0.6513, "step": 132480 }, { "epoch": 1.47, "learning_rate": 2.555049901083651e-05, "loss": 0.6848, "step": 132485 }, { "epoch": 1.47, "learning_rate": 2.5549576283697996e-05, "loss": 0.6411, "step": 132490 }, { "epoch": 1.47, "learning_rate": 2.554865355655948e-05, "loss": 0.618, "step": 132495 }, { "epoch": 1.47, "learning_rate": 2.5547730829420975e-05, "loss": 0.6795, "step": 132500 }, { "epoch": 1.47, "learning_rate": 2.554680810228246e-05, "loss": 0.6248, "step": 132505 }, { "epoch": 1.47, "learning_rate": 2.5545885375143948e-05, "loss": 0.6708, "step": 132510 }, { "epoch": 1.47, "learning_rate": 2.5544962648005432e-05, "loss": 0.6361, "step": 132515 }, { "epoch": 1.47, "learning_rate": 2.5544039920866924e-05, "loss": 0.6747, "step": 132520 }, { "epoch": 1.47, "learning_rate": 2.554311719372841e-05, "loss": 0.6392, "step": 132525 }, { "epoch": 1.47, "learning_rate": 2.5542194466589896e-05, "loss": 0.6107, "step": 132530 }, { "epoch": 1.47, "learning_rate": 2.5541271739451384e-05, "loss": 0.6308, "step": 132535 }, { "epoch": 1.47, "learning_rate": 2.5540349012312868e-05, "loss": 0.6565, "step": 132540 }, { "epoch": 1.47, "learning_rate": 2.5539426285174363e-05, "loss": 0.6663, "step": 132545 }, { "epoch": 1.47, "learning_rate": 2.5538503558035847e-05, "loss": 0.6254, "step": 132550 }, { "epoch": 1.47, "learning_rate": 2.5537580830897335e-05, "loss": 0.5973, "step": 132555 }, { "epoch": 1.47, "learning_rate": 2.553665810375882e-05, "loss": 0.6447, "step": 132560 }, { "epoch": 1.47, "learning_rate": 2.553573537662031e-05, "loss": 0.6479, "step": 132565 }, { "epoch": 1.47, "learning_rate": 2.55348126494818e-05, "loss": 0.6414, "step": 132570 }, { "epoch": 1.47, "learning_rate": 2.5533889922343286e-05, "loss": 0.6233, "step": 132575 }, { "epoch": 1.47, "learning_rate": 2.553296719520477e-05, "loss": 0.6358, "step": 132580 }, { "epoch": 1.47, "learning_rate": 2.5532044468066262e-05, "loss": 0.6326, "step": 132585 }, { "epoch": 1.47, "learning_rate": 2.553112174092775e-05, "loss": 0.6372, "step": 132590 }, { "epoch": 1.47, "learning_rate": 2.5530199013789234e-05, "loss": 0.6418, "step": 132595 }, { "epoch": 1.47, "learning_rate": 2.5529276286650722e-05, "loss": 0.6811, "step": 132600 }, { "epoch": 1.47, "learning_rate": 2.5528353559512207e-05, "loss": 0.6175, "step": 132605 }, { "epoch": 1.47, "learning_rate": 2.55274308323737e-05, "loss": 0.6031, "step": 132610 }, { "epoch": 1.47, "learning_rate": 2.5526508105235186e-05, "loss": 0.641, "step": 132615 }, { "epoch": 1.47, "learning_rate": 2.5525585378096674e-05, "loss": 0.64, "step": 132620 }, { "epoch": 1.47, "learning_rate": 2.5524662650958158e-05, "loss": 0.6265, "step": 132625 }, { "epoch": 1.47, "learning_rate": 2.552373992381965e-05, "loss": 0.6424, "step": 132630 }, { "epoch": 1.47, "learning_rate": 2.5522817196681137e-05, "loss": 0.6824, "step": 132635 }, { "epoch": 1.47, "learning_rate": 2.5521894469542622e-05, "loss": 0.6776, "step": 132640 }, { "epoch": 1.47, "learning_rate": 2.552097174240411e-05, "loss": 0.6368, "step": 132645 }, { "epoch": 1.47, "learning_rate": 2.55200490152656e-05, "loss": 0.6235, "step": 132650 }, { "epoch": 1.47, "learning_rate": 2.551912628812709e-05, "loss": 0.6487, "step": 132655 }, { "epoch": 1.47, "learning_rate": 2.5518203560988573e-05, "loss": 0.7173, "step": 132660 }, { "epoch": 1.47, "learning_rate": 2.551728083385006e-05, "loss": 0.6233, "step": 132665 }, { "epoch": 1.47, "learning_rate": 2.5516358106711552e-05, "loss": 0.6373, "step": 132670 }, { "epoch": 1.47, "learning_rate": 2.5515435379573037e-05, "loss": 0.6295, "step": 132675 }, { "epoch": 1.47, "learning_rate": 2.5514512652434524e-05, "loss": 0.6317, "step": 132680 }, { "epoch": 1.47, "learning_rate": 2.5513589925296012e-05, "loss": 0.6584, "step": 132685 }, { "epoch": 1.47, "learning_rate": 2.5512667198157497e-05, "loss": 0.6819, "step": 132690 }, { "epoch": 1.47, "learning_rate": 2.5511744471018988e-05, "loss": 0.5832, "step": 132695 }, { "epoch": 1.47, "learning_rate": 2.5510821743880476e-05, "loss": 0.6348, "step": 132700 }, { "epoch": 1.47, "learning_rate": 2.550989901674196e-05, "loss": 0.6289, "step": 132705 }, { "epoch": 1.47, "learning_rate": 2.5508976289603448e-05, "loss": 0.6251, "step": 132710 }, { "epoch": 1.47, "learning_rate": 2.550805356246494e-05, "loss": 0.7035, "step": 132715 }, { "epoch": 1.47, "learning_rate": 2.5507130835326427e-05, "loss": 0.5932, "step": 132720 }, { "epoch": 1.47, "learning_rate": 2.5506208108187912e-05, "loss": 0.6551, "step": 132725 }, { "epoch": 1.47, "learning_rate": 2.55052853810494e-05, "loss": 0.5966, "step": 132730 }, { "epoch": 1.47, "learning_rate": 2.550436265391089e-05, "loss": 0.6469, "step": 132735 }, { "epoch": 1.47, "learning_rate": 2.5503439926772375e-05, "loss": 0.6654, "step": 132740 }, { "epoch": 1.47, "learning_rate": 2.5502517199633863e-05, "loss": 0.6198, "step": 132745 }, { "epoch": 1.47, "learning_rate": 2.5501594472495348e-05, "loss": 0.5788, "step": 132750 }, { "epoch": 1.47, "learning_rate": 2.5500671745356835e-05, "loss": 0.6868, "step": 132755 }, { "epoch": 1.47, "learning_rate": 2.5499749018218327e-05, "loss": 0.6117, "step": 132760 }, { "epoch": 1.47, "learning_rate": 2.5498826291079815e-05, "loss": 0.6554, "step": 132765 }, { "epoch": 1.47, "learning_rate": 2.54979035639413e-05, "loss": 0.6336, "step": 132770 }, { "epoch": 1.47, "learning_rate": 2.5496980836802787e-05, "loss": 0.6271, "step": 132775 }, { "epoch": 1.47, "learning_rate": 2.5496058109664278e-05, "loss": 0.6476, "step": 132780 }, { "epoch": 1.47, "learning_rate": 2.5495135382525766e-05, "loss": 0.599, "step": 132785 }, { "epoch": 1.47, "learning_rate": 2.549421265538725e-05, "loss": 0.6852, "step": 132790 }, { "epoch": 1.47, "learning_rate": 2.5493289928248738e-05, "loss": 0.626, "step": 132795 }, { "epoch": 1.47, "learning_rate": 2.549236720111023e-05, "loss": 0.6994, "step": 132800 }, { "epoch": 1.47, "learning_rate": 2.5491444473971714e-05, "loss": 0.6076, "step": 132805 }, { "epoch": 1.47, "learning_rate": 2.5490521746833202e-05, "loss": 0.6463, "step": 132810 }, { "epoch": 1.47, "learning_rate": 2.5489599019694686e-05, "loss": 0.6674, "step": 132815 }, { "epoch": 1.47, "learning_rate": 2.548867629255618e-05, "loss": 0.6556, "step": 132820 }, { "epoch": 1.47, "learning_rate": 2.5487753565417665e-05, "loss": 0.6514, "step": 132825 }, { "epoch": 1.47, "learning_rate": 2.5486830838279153e-05, "loss": 0.6316, "step": 132830 }, { "epoch": 1.47, "learning_rate": 2.5485908111140638e-05, "loss": 0.6584, "step": 132835 }, { "epoch": 1.47, "learning_rate": 2.5484985384002125e-05, "loss": 0.697, "step": 132840 }, { "epoch": 1.47, "learning_rate": 2.5484062656863617e-05, "loss": 0.6406, "step": 132845 }, { "epoch": 1.47, "learning_rate": 2.54831399297251e-05, "loss": 0.7123, "step": 132850 }, { "epoch": 1.47, "learning_rate": 2.548221720258659e-05, "loss": 0.6575, "step": 132855 }, { "epoch": 1.47, "learning_rate": 2.5481294475448077e-05, "loss": 0.6593, "step": 132860 }, { "epoch": 1.47, "learning_rate": 2.5480371748309568e-05, "loss": 0.597, "step": 132865 }, { "epoch": 1.47, "learning_rate": 2.5479449021171053e-05, "loss": 0.6553, "step": 132870 }, { "epoch": 1.47, "learning_rate": 2.547852629403254e-05, "loss": 0.6038, "step": 132875 }, { "epoch": 1.47, "learning_rate": 2.5477603566894025e-05, "loss": 0.6393, "step": 132880 }, { "epoch": 1.47, "learning_rate": 2.547668083975552e-05, "loss": 0.6415, "step": 132885 }, { "epoch": 1.47, "learning_rate": 2.5475758112617004e-05, "loss": 0.6333, "step": 132890 }, { "epoch": 1.47, "learning_rate": 2.5474835385478492e-05, "loss": 0.6677, "step": 132895 }, { "epoch": 1.47, "learning_rate": 2.5473912658339976e-05, "loss": 0.6565, "step": 132900 }, { "epoch": 1.47, "learning_rate": 2.5472989931201464e-05, "loss": 0.677, "step": 132905 }, { "epoch": 1.47, "learning_rate": 2.5472067204062955e-05, "loss": 0.6007, "step": 132910 }, { "epoch": 1.47, "learning_rate": 2.547114447692444e-05, "loss": 0.6374, "step": 132915 }, { "epoch": 1.47, "learning_rate": 2.5470221749785928e-05, "loss": 0.622, "step": 132920 }, { "epoch": 1.47, "learning_rate": 2.5469299022647412e-05, "loss": 0.6624, "step": 132925 }, { "epoch": 1.47, "learning_rate": 2.5468376295508907e-05, "loss": 0.6531, "step": 132930 }, { "epoch": 1.47, "learning_rate": 2.546745356837039e-05, "loss": 0.645, "step": 132935 }, { "epoch": 1.47, "learning_rate": 2.546653084123188e-05, "loss": 0.6875, "step": 132940 }, { "epoch": 1.47, "learning_rate": 2.5465608114093364e-05, "loss": 0.6265, "step": 132945 }, { "epoch": 1.47, "learning_rate": 2.5464685386954855e-05, "loss": 0.6131, "step": 132950 }, { "epoch": 1.47, "learning_rate": 2.5463762659816343e-05, "loss": 0.5951, "step": 132955 }, { "epoch": 1.47, "learning_rate": 2.546283993267783e-05, "loss": 0.6541, "step": 132960 }, { "epoch": 1.47, "learning_rate": 2.5461917205539315e-05, "loss": 0.5942, "step": 132965 }, { "epoch": 1.47, "learning_rate": 2.5460994478400806e-05, "loss": 0.6472, "step": 132970 }, { "epoch": 1.47, "learning_rate": 2.5460071751262294e-05, "loss": 0.6637, "step": 132975 }, { "epoch": 1.47, "learning_rate": 2.545914902412378e-05, "loss": 0.6022, "step": 132980 }, { "epoch": 1.47, "learning_rate": 2.5458226296985266e-05, "loss": 0.6477, "step": 132985 }, { "epoch": 1.47, "learning_rate": 2.545730356984675e-05, "loss": 0.6393, "step": 132990 }, { "epoch": 1.47, "learning_rate": 2.5456380842708245e-05, "loss": 0.6551, "step": 132995 }, { "epoch": 1.47, "learning_rate": 2.545545811556973e-05, "loss": 0.6177, "step": 133000 }, { "epoch": 1.47, "eval_loss": 0.6046963930130005, "eval_runtime": 69.1848, "eval_samples_per_second": 28.908, "eval_steps_per_second": 14.454, "step": 133000 }, { "epoch": 1.47, "learning_rate": 2.5454535388431218e-05, "loss": 0.6654, "step": 133005 }, { "epoch": 1.47, "learning_rate": 2.5453612661292702e-05, "loss": 0.5925, "step": 133010 }, { "epoch": 1.47, "learning_rate": 2.5452689934154193e-05, "loss": 0.5912, "step": 133015 }, { "epoch": 1.47, "learning_rate": 2.545176720701568e-05, "loss": 0.6238, "step": 133020 }, { "epoch": 1.47, "learning_rate": 2.5450844479877166e-05, "loss": 0.6604, "step": 133025 }, { "epoch": 1.47, "learning_rate": 2.5449921752738654e-05, "loss": 0.6522, "step": 133030 }, { "epoch": 1.47, "learning_rate": 2.5448999025600145e-05, "loss": 0.6023, "step": 133035 }, { "epoch": 1.47, "learning_rate": 2.5448076298461633e-05, "loss": 0.6267, "step": 133040 }, { "epoch": 1.47, "learning_rate": 2.5447153571323117e-05, "loss": 0.5913, "step": 133045 }, { "epoch": 1.47, "learning_rate": 2.5446230844184605e-05, "loss": 0.6536, "step": 133050 }, { "epoch": 1.47, "learning_rate": 2.544530811704609e-05, "loss": 0.6856, "step": 133055 }, { "epoch": 1.47, "learning_rate": 2.544438538990758e-05, "loss": 0.6802, "step": 133060 }, { "epoch": 1.47, "learning_rate": 2.544346266276907e-05, "loss": 0.7069, "step": 133065 }, { "epoch": 1.47, "learning_rate": 2.5442539935630556e-05, "loss": 0.6821, "step": 133070 }, { "epoch": 1.47, "learning_rate": 2.544161720849204e-05, "loss": 0.6643, "step": 133075 }, { "epoch": 1.47, "learning_rate": 2.5440694481353532e-05, "loss": 0.6529, "step": 133080 }, { "epoch": 1.47, "learning_rate": 2.543977175421502e-05, "loss": 0.7029, "step": 133085 }, { "epoch": 1.47, "learning_rate": 2.5438849027076504e-05, "loss": 0.5996, "step": 133090 }, { "epoch": 1.47, "learning_rate": 2.5437926299937992e-05, "loss": 0.6116, "step": 133095 }, { "epoch": 1.47, "learning_rate": 2.5437003572799483e-05, "loss": 0.6657, "step": 133100 }, { "epoch": 1.47, "learning_rate": 2.543608084566097e-05, "loss": 0.6973, "step": 133105 }, { "epoch": 1.47, "learning_rate": 2.5435158118522456e-05, "loss": 0.6116, "step": 133110 }, { "epoch": 1.47, "learning_rate": 2.5434235391383944e-05, "loss": 0.6549, "step": 133115 }, { "epoch": 1.47, "learning_rate": 2.5433312664245435e-05, "loss": 0.6835, "step": 133120 }, { "epoch": 1.47, "learning_rate": 2.543238993710692e-05, "loss": 0.643, "step": 133125 }, { "epoch": 1.47, "learning_rate": 2.5431467209968407e-05, "loss": 0.6033, "step": 133130 }, { "epoch": 1.47, "learning_rate": 2.5430544482829895e-05, "loss": 0.6067, "step": 133135 }, { "epoch": 1.47, "learning_rate": 2.542962175569138e-05, "loss": 0.5964, "step": 133140 }, { "epoch": 1.47, "learning_rate": 2.542869902855287e-05, "loss": 0.6669, "step": 133145 }, { "epoch": 1.47, "learning_rate": 2.542777630141436e-05, "loss": 0.671, "step": 133150 }, { "epoch": 1.47, "learning_rate": 2.5426853574275843e-05, "loss": 0.6135, "step": 133155 }, { "epoch": 1.47, "learning_rate": 2.542593084713733e-05, "loss": 0.6689, "step": 133160 }, { "epoch": 1.47, "learning_rate": 2.5425008119998822e-05, "loss": 0.6361, "step": 133165 }, { "epoch": 1.47, "learning_rate": 2.542408539286031e-05, "loss": 0.6102, "step": 133170 }, { "epoch": 1.47, "learning_rate": 2.5423162665721794e-05, "loss": 0.704, "step": 133175 }, { "epoch": 1.47, "learning_rate": 2.5422239938583282e-05, "loss": 0.6357, "step": 133180 }, { "epoch": 1.47, "learning_rate": 2.5421317211444773e-05, "loss": 0.6935, "step": 133185 }, { "epoch": 1.47, "learning_rate": 2.5420394484306258e-05, "loss": 0.6968, "step": 133190 }, { "epoch": 1.47, "learning_rate": 2.5419471757167746e-05, "loss": 0.6765, "step": 133195 }, { "epoch": 1.47, "learning_rate": 2.541854903002923e-05, "loss": 0.5889, "step": 133200 }, { "epoch": 1.47, "learning_rate": 2.5417626302890725e-05, "loss": 0.662, "step": 133205 }, { "epoch": 1.47, "learning_rate": 2.541670357575221e-05, "loss": 0.6483, "step": 133210 }, { "epoch": 1.48, "learning_rate": 2.5415780848613697e-05, "loss": 0.6516, "step": 133215 }, { "epoch": 1.48, "learning_rate": 2.541485812147518e-05, "loss": 0.6731, "step": 133220 }, { "epoch": 1.48, "learning_rate": 2.541393539433667e-05, "loss": 0.7032, "step": 133225 }, { "epoch": 1.48, "learning_rate": 2.541301266719816e-05, "loss": 0.6526, "step": 133230 }, { "epoch": 1.48, "learning_rate": 2.5412089940059645e-05, "loss": 0.672, "step": 133235 }, { "epoch": 1.48, "learning_rate": 2.5411167212921133e-05, "loss": 0.6737, "step": 133240 }, { "epoch": 1.48, "learning_rate": 2.541024448578262e-05, "loss": 0.5891, "step": 133245 }, { "epoch": 1.48, "learning_rate": 2.5409321758644112e-05, "loss": 0.6574, "step": 133250 }, { "epoch": 1.48, "learning_rate": 2.5408399031505597e-05, "loss": 0.6338, "step": 133255 }, { "epoch": 1.48, "learning_rate": 2.5407476304367084e-05, "loss": 0.648, "step": 133260 }, { "epoch": 1.48, "learning_rate": 2.540655357722857e-05, "loss": 0.6503, "step": 133265 }, { "epoch": 1.48, "learning_rate": 2.5405630850090064e-05, "loss": 0.677, "step": 133270 }, { "epoch": 1.48, "learning_rate": 2.5404708122951548e-05, "loss": 0.6523, "step": 133275 }, { "epoch": 1.48, "learning_rate": 2.5403785395813036e-05, "loss": 0.6636, "step": 133280 }, { "epoch": 1.48, "learning_rate": 2.540286266867452e-05, "loss": 0.6183, "step": 133285 }, { "epoch": 1.48, "learning_rate": 2.5401939941536008e-05, "loss": 0.6662, "step": 133290 }, { "epoch": 1.48, "learning_rate": 2.54010172143975e-05, "loss": 0.6552, "step": 133295 }, { "epoch": 1.48, "learning_rate": 2.5400094487258984e-05, "loss": 0.6307, "step": 133300 }, { "epoch": 1.48, "learning_rate": 2.5399171760120472e-05, "loss": 0.6802, "step": 133305 }, { "epoch": 1.48, "learning_rate": 2.5398249032981956e-05, "loss": 0.6721, "step": 133310 }, { "epoch": 1.48, "learning_rate": 2.539732630584345e-05, "loss": 0.6594, "step": 133315 }, { "epoch": 1.48, "learning_rate": 2.5396403578704935e-05, "loss": 0.6583, "step": 133320 }, { "epoch": 1.48, "learning_rate": 2.5395480851566423e-05, "loss": 0.6268, "step": 133325 }, { "epoch": 1.48, "learning_rate": 2.5394558124427908e-05, "loss": 0.6625, "step": 133330 }, { "epoch": 1.48, "learning_rate": 2.53936353972894e-05, "loss": 0.627, "step": 133335 }, { "epoch": 1.48, "learning_rate": 2.5392712670150887e-05, "loss": 0.6987, "step": 133340 }, { "epoch": 1.48, "learning_rate": 2.5391789943012374e-05, "loss": 0.6286, "step": 133345 }, { "epoch": 1.48, "learning_rate": 2.539086721587386e-05, "loss": 0.6374, "step": 133350 }, { "epoch": 1.48, "learning_rate": 2.538994448873535e-05, "loss": 0.606, "step": 133355 }, { "epoch": 1.48, "learning_rate": 2.5389021761596838e-05, "loss": 0.6033, "step": 133360 }, { "epoch": 1.48, "learning_rate": 2.5388099034458322e-05, "loss": 0.6403, "step": 133365 }, { "epoch": 1.48, "learning_rate": 2.538717630731981e-05, "loss": 0.6303, "step": 133370 }, { "epoch": 1.48, "learning_rate": 2.5386253580181295e-05, "loss": 0.6622, "step": 133375 }, { "epoch": 1.48, "learning_rate": 2.538533085304279e-05, "loss": 0.7025, "step": 133380 }, { "epoch": 1.48, "learning_rate": 2.5384408125904274e-05, "loss": 0.6757, "step": 133385 }, { "epoch": 1.48, "learning_rate": 2.5383485398765762e-05, "loss": 0.6286, "step": 133390 }, { "epoch": 1.48, "learning_rate": 2.5382562671627246e-05, "loss": 0.625, "step": 133395 }, { "epoch": 1.48, "learning_rate": 2.5381639944488737e-05, "loss": 0.623, "step": 133400 }, { "epoch": 1.48, "learning_rate": 2.5380717217350225e-05, "loss": 0.6895, "step": 133405 }, { "epoch": 1.48, "learning_rate": 2.537979449021171e-05, "loss": 0.6081, "step": 133410 }, { "epoch": 1.48, "learning_rate": 2.5378871763073198e-05, "loss": 0.5866, "step": 133415 }, { "epoch": 1.48, "learning_rate": 2.537794903593469e-05, "loss": 0.6083, "step": 133420 }, { "epoch": 1.48, "learning_rate": 2.5377026308796177e-05, "loss": 0.6461, "step": 133425 }, { "epoch": 1.48, "learning_rate": 2.537610358165766e-05, "loss": 0.5784, "step": 133430 }, { "epoch": 1.48, "learning_rate": 2.537518085451915e-05, "loss": 0.633, "step": 133435 }, { "epoch": 1.48, "learning_rate": 2.5374258127380633e-05, "loss": 0.7047, "step": 133440 }, { "epoch": 1.48, "learning_rate": 2.5373335400242128e-05, "loss": 0.6343, "step": 133445 }, { "epoch": 1.48, "learning_rate": 2.5372412673103613e-05, "loss": 0.6321, "step": 133450 }, { "epoch": 1.48, "learning_rate": 2.53714899459651e-05, "loss": 0.6054, "step": 133455 }, { "epoch": 1.48, "learning_rate": 2.5370567218826585e-05, "loss": 0.6297, "step": 133460 }, { "epoch": 1.48, "learning_rate": 2.5369644491688076e-05, "loss": 0.6328, "step": 133465 }, { "epoch": 1.48, "learning_rate": 2.5368721764549564e-05, "loss": 0.604, "step": 133470 }, { "epoch": 1.48, "learning_rate": 2.536779903741105e-05, "loss": 0.6431, "step": 133475 }, { "epoch": 1.48, "learning_rate": 2.5366876310272536e-05, "loss": 0.6472, "step": 133480 }, { "epoch": 1.48, "learning_rate": 2.5365953583134027e-05, "loss": 0.5879, "step": 133485 }, { "epoch": 1.48, "learning_rate": 2.5365030855995515e-05, "loss": 0.6767, "step": 133490 }, { "epoch": 1.48, "learning_rate": 2.5364108128857e-05, "loss": 0.6506, "step": 133495 }, { "epoch": 1.48, "learning_rate": 2.5363185401718488e-05, "loss": 0.6718, "step": 133500 }, { "epoch": 1.48, "learning_rate": 2.536226267457998e-05, "loss": 0.6507, "step": 133505 }, { "epoch": 1.48, "learning_rate": 2.5361339947441463e-05, "loss": 0.6273, "step": 133510 }, { "epoch": 1.48, "learning_rate": 2.536041722030295e-05, "loss": 0.6605, "step": 133515 }, { "epoch": 1.48, "learning_rate": 2.535949449316444e-05, "loss": 0.6096, "step": 133520 }, { "epoch": 1.48, "learning_rate": 2.5358571766025923e-05, "loss": 0.6208, "step": 133525 }, { "epoch": 1.48, "learning_rate": 2.5357649038887415e-05, "loss": 0.6328, "step": 133530 }, { "epoch": 1.48, "learning_rate": 2.5356726311748903e-05, "loss": 0.6436, "step": 133535 }, { "epoch": 1.48, "learning_rate": 2.5355803584610387e-05, "loss": 0.595, "step": 133540 }, { "epoch": 1.48, "learning_rate": 2.5354880857471875e-05, "loss": 0.5575, "step": 133545 }, { "epoch": 1.48, "learning_rate": 2.5353958130333366e-05, "loss": 0.6274, "step": 133550 }, { "epoch": 1.48, "learning_rate": 2.5353035403194854e-05, "loss": 0.6352, "step": 133555 }, { "epoch": 1.48, "learning_rate": 2.535211267605634e-05, "loss": 0.6819, "step": 133560 }, { "epoch": 1.48, "learning_rate": 2.5351189948917826e-05, "loss": 0.6477, "step": 133565 }, { "epoch": 1.48, "learning_rate": 2.5350267221779318e-05, "loss": 0.6192, "step": 133570 }, { "epoch": 1.48, "learning_rate": 2.5349344494640802e-05, "loss": 0.6123, "step": 133575 }, { "epoch": 1.48, "learning_rate": 2.534842176750229e-05, "loss": 0.6631, "step": 133580 }, { "epoch": 1.48, "learning_rate": 2.5347499040363774e-05, "loss": 0.626, "step": 133585 }, { "epoch": 1.48, "learning_rate": 2.5346576313225262e-05, "loss": 0.6301, "step": 133590 }, { "epoch": 1.48, "learning_rate": 2.5345653586086753e-05, "loss": 0.6664, "step": 133595 }, { "epoch": 1.48, "learning_rate": 2.534473085894824e-05, "loss": 0.6116, "step": 133600 }, { "epoch": 1.48, "learning_rate": 2.5343808131809726e-05, "loss": 0.6435, "step": 133605 }, { "epoch": 1.48, "learning_rate": 2.5342885404671214e-05, "loss": 0.6732, "step": 133610 }, { "epoch": 1.48, "learning_rate": 2.5341962677532705e-05, "loss": 0.6557, "step": 133615 }, { "epoch": 1.48, "learning_rate": 2.534103995039419e-05, "loss": 0.6365, "step": 133620 }, { "epoch": 1.48, "learning_rate": 2.5340117223255677e-05, "loss": 0.6349, "step": 133625 }, { "epoch": 1.48, "learning_rate": 2.5339194496117165e-05, "loss": 0.5768, "step": 133630 }, { "epoch": 1.48, "learning_rate": 2.5338271768978656e-05, "loss": 0.664, "step": 133635 }, { "epoch": 1.48, "learning_rate": 2.533734904184014e-05, "loss": 0.6754, "step": 133640 }, { "epoch": 1.48, "learning_rate": 2.533642631470163e-05, "loss": 0.6567, "step": 133645 }, { "epoch": 1.48, "learning_rate": 2.5335503587563113e-05, "loss": 0.6456, "step": 133650 }, { "epoch": 1.48, "learning_rate": 2.5334580860424608e-05, "loss": 0.6745, "step": 133655 }, { "epoch": 1.48, "learning_rate": 2.5333658133286092e-05, "loss": 0.6594, "step": 133660 }, { "epoch": 1.48, "learning_rate": 2.533273540614758e-05, "loss": 0.6389, "step": 133665 }, { "epoch": 1.48, "learning_rate": 2.5331812679009064e-05, "loss": 0.6926, "step": 133670 }, { "epoch": 1.48, "learning_rate": 2.5330889951870552e-05, "loss": 0.668, "step": 133675 }, { "epoch": 1.48, "learning_rate": 2.5329967224732043e-05, "loss": 0.5942, "step": 133680 }, { "epoch": 1.48, "learning_rate": 2.5329044497593528e-05, "loss": 0.6233, "step": 133685 }, { "epoch": 1.48, "learning_rate": 2.5328121770455016e-05, "loss": 0.6909, "step": 133690 }, { "epoch": 1.48, "learning_rate": 2.53271990433165e-05, "loss": 0.6764, "step": 133695 }, { "epoch": 1.48, "learning_rate": 2.5326276316177995e-05, "loss": 0.658, "step": 133700 }, { "epoch": 1.48, "learning_rate": 2.532535358903948e-05, "loss": 0.5819, "step": 133705 }, { "epoch": 1.48, "learning_rate": 2.5324430861900967e-05, "loss": 0.6409, "step": 133710 }, { "epoch": 1.48, "learning_rate": 2.532350813476245e-05, "loss": 0.6522, "step": 133715 }, { "epoch": 1.48, "learning_rate": 2.5322585407623943e-05, "loss": 0.7268, "step": 133720 }, { "epoch": 1.48, "learning_rate": 2.532166268048543e-05, "loss": 0.6189, "step": 133725 }, { "epoch": 1.48, "learning_rate": 2.532073995334692e-05, "loss": 0.6438, "step": 133730 }, { "epoch": 1.48, "learning_rate": 2.5319817226208403e-05, "loss": 0.6405, "step": 133735 }, { "epoch": 1.48, "learning_rate": 2.531889449906989e-05, "loss": 0.6826, "step": 133740 }, { "epoch": 1.48, "learning_rate": 2.5317971771931382e-05, "loss": 0.6694, "step": 133745 }, { "epoch": 1.48, "learning_rate": 2.5317049044792867e-05, "loss": 0.6723, "step": 133750 }, { "epoch": 1.48, "learning_rate": 2.5316126317654354e-05, "loss": 0.6614, "step": 133755 }, { "epoch": 1.48, "learning_rate": 2.531520359051584e-05, "loss": 0.6345, "step": 133760 }, { "epoch": 1.48, "learning_rate": 2.5314280863377333e-05, "loss": 0.6396, "step": 133765 }, { "epoch": 1.48, "learning_rate": 2.5313358136238818e-05, "loss": 0.6498, "step": 133770 }, { "epoch": 1.48, "learning_rate": 2.5312435409100306e-05, "loss": 0.6633, "step": 133775 }, { "epoch": 1.48, "learning_rate": 2.531151268196179e-05, "loss": 0.6551, "step": 133780 }, { "epoch": 1.48, "learning_rate": 2.531058995482328e-05, "loss": 0.6377, "step": 133785 }, { "epoch": 1.48, "learning_rate": 2.530966722768477e-05, "loss": 0.6227, "step": 133790 }, { "epoch": 1.48, "learning_rate": 2.5308744500546254e-05, "loss": 0.6447, "step": 133795 }, { "epoch": 1.48, "learning_rate": 2.530782177340774e-05, "loss": 0.5952, "step": 133800 }, { "epoch": 1.48, "learning_rate": 2.5306899046269233e-05, "loss": 0.6668, "step": 133805 }, { "epoch": 1.48, "learning_rate": 2.530597631913072e-05, "loss": 0.663, "step": 133810 }, { "epoch": 1.48, "learning_rate": 2.5305053591992205e-05, "loss": 0.6135, "step": 133815 }, { "epoch": 1.48, "learning_rate": 2.5304130864853693e-05, "loss": 0.5954, "step": 133820 }, { "epoch": 1.48, "learning_rate": 2.5303208137715177e-05, "loss": 0.7131, "step": 133825 }, { "epoch": 1.48, "learning_rate": 2.5302285410576672e-05, "loss": 0.6881, "step": 133830 }, { "epoch": 1.48, "learning_rate": 2.5301362683438157e-05, "loss": 0.6446, "step": 133835 }, { "epoch": 1.48, "learning_rate": 2.5300439956299644e-05, "loss": 0.682, "step": 133840 }, { "epoch": 1.48, "learning_rate": 2.529951722916113e-05, "loss": 0.6311, "step": 133845 }, { "epoch": 1.48, "learning_rate": 2.529859450202262e-05, "loss": 0.6357, "step": 133850 }, { "epoch": 1.48, "learning_rate": 2.5297671774884108e-05, "loss": 0.6062, "step": 133855 }, { "epoch": 1.48, "learning_rate": 2.5296749047745592e-05, "loss": 0.6049, "step": 133860 }, { "epoch": 1.48, "learning_rate": 2.529582632060708e-05, "loss": 0.5728, "step": 133865 }, { "epoch": 1.48, "learning_rate": 2.529490359346857e-05, "loss": 0.6411, "step": 133870 }, { "epoch": 1.48, "learning_rate": 2.529398086633006e-05, "loss": 0.6133, "step": 133875 }, { "epoch": 1.48, "learning_rate": 2.5293058139191544e-05, "loss": 0.6226, "step": 133880 }, { "epoch": 1.48, "learning_rate": 2.529213541205303e-05, "loss": 0.6408, "step": 133885 }, { "epoch": 1.48, "learning_rate": 2.5291212684914516e-05, "loss": 0.6126, "step": 133890 }, { "epoch": 1.48, "learning_rate": 2.5290289957776007e-05, "loss": 0.6718, "step": 133895 }, { "epoch": 1.48, "learning_rate": 2.5289367230637495e-05, "loss": 0.6397, "step": 133900 }, { "epoch": 1.48, "learning_rate": 2.5288444503498983e-05, "loss": 0.6147, "step": 133905 }, { "epoch": 1.48, "learning_rate": 2.5287521776360468e-05, "loss": 0.6871, "step": 133910 }, { "epoch": 1.48, "learning_rate": 2.528659904922196e-05, "loss": 0.6762, "step": 133915 }, { "epoch": 1.48, "learning_rate": 2.5285676322083447e-05, "loss": 0.6305, "step": 133920 }, { "epoch": 1.48, "learning_rate": 2.528475359494493e-05, "loss": 0.6701, "step": 133925 }, { "epoch": 1.48, "learning_rate": 2.528383086780642e-05, "loss": 0.6136, "step": 133930 }, { "epoch": 1.48, "learning_rate": 2.528290814066791e-05, "loss": 0.678, "step": 133935 }, { "epoch": 1.48, "learning_rate": 2.5281985413529398e-05, "loss": 0.6689, "step": 133940 }, { "epoch": 1.48, "learning_rate": 2.5281062686390882e-05, "loss": 0.5994, "step": 133945 }, { "epoch": 1.48, "learning_rate": 2.528013995925237e-05, "loss": 0.6416, "step": 133950 }, { "epoch": 1.48, "learning_rate": 2.527921723211386e-05, "loss": 0.5906, "step": 133955 }, { "epoch": 1.48, "learning_rate": 2.5278294504975346e-05, "loss": 0.6298, "step": 133960 }, { "epoch": 1.48, "learning_rate": 2.5277371777836834e-05, "loss": 0.6604, "step": 133965 }, { "epoch": 1.48, "learning_rate": 2.527644905069832e-05, "loss": 0.668, "step": 133970 }, { "epoch": 1.48, "learning_rate": 2.5275526323559806e-05, "loss": 0.6427, "step": 133975 }, { "epoch": 1.48, "learning_rate": 2.5274603596421297e-05, "loss": 0.6615, "step": 133980 }, { "epoch": 1.48, "learning_rate": 2.5273680869282785e-05, "loss": 0.6675, "step": 133985 }, { "epoch": 1.48, "learning_rate": 2.527275814214427e-05, "loss": 0.6228, "step": 133990 }, { "epoch": 1.48, "learning_rate": 2.5271835415005758e-05, "loss": 0.61, "step": 133995 }, { "epoch": 1.48, "learning_rate": 2.527091268786725e-05, "loss": 0.5933, "step": 134000 }, { "epoch": 1.48, "eval_loss": 0.6169156432151794, "eval_runtime": 69.3177, "eval_samples_per_second": 28.853, "eval_steps_per_second": 14.426, "step": 134000 }, { "epoch": 1.48, "learning_rate": 2.5269989960728733e-05, "loss": 0.6244, "step": 134005 }, { "epoch": 1.48, "learning_rate": 2.526906723359022e-05, "loss": 0.6649, "step": 134010 }, { "epoch": 1.48, "learning_rate": 2.526814450645171e-05, "loss": 0.6277, "step": 134015 }, { "epoch": 1.48, "learning_rate": 2.52672217793132e-05, "loss": 0.6423, "step": 134020 }, { "epoch": 1.48, "learning_rate": 2.5266299052174685e-05, "loss": 0.6175, "step": 134025 }, { "epoch": 1.48, "learning_rate": 2.5265376325036172e-05, "loss": 0.6419, "step": 134030 }, { "epoch": 1.48, "learning_rate": 2.5264453597897657e-05, "loss": 0.621, "step": 134035 }, { "epoch": 1.48, "learning_rate": 2.526353087075915e-05, "loss": 0.621, "step": 134040 }, { "epoch": 1.48, "learning_rate": 2.5262608143620636e-05, "loss": 0.6006, "step": 134045 }, { "epoch": 1.48, "learning_rate": 2.5261685416482124e-05, "loss": 0.605, "step": 134050 }, { "epoch": 1.48, "learning_rate": 2.526076268934361e-05, "loss": 0.6295, "step": 134055 }, { "epoch": 1.48, "learning_rate": 2.5259839962205096e-05, "loss": 0.6708, "step": 134060 }, { "epoch": 1.48, "learning_rate": 2.5258917235066587e-05, "loss": 0.6162, "step": 134065 }, { "epoch": 1.48, "learning_rate": 2.5257994507928072e-05, "loss": 0.5958, "step": 134070 }, { "epoch": 1.48, "learning_rate": 2.525707178078956e-05, "loss": 0.5987, "step": 134075 }, { "epoch": 1.48, "learning_rate": 2.5256149053651044e-05, "loss": 0.6303, "step": 134080 }, { "epoch": 1.48, "learning_rate": 2.525522632651254e-05, "loss": 0.6485, "step": 134085 }, { "epoch": 1.48, "learning_rate": 2.5254303599374023e-05, "loss": 0.6191, "step": 134090 }, { "epoch": 1.48, "learning_rate": 2.525338087223551e-05, "loss": 0.6144, "step": 134095 }, { "epoch": 1.48, "learning_rate": 2.5252458145096996e-05, "loss": 0.6592, "step": 134100 }, { "epoch": 1.48, "learning_rate": 2.5251535417958487e-05, "loss": 0.6646, "step": 134105 }, { "epoch": 1.48, "learning_rate": 2.5250612690819975e-05, "loss": 0.6545, "step": 134110 }, { "epoch": 1.49, "learning_rate": 2.5249689963681463e-05, "loss": 0.5865, "step": 134115 }, { "epoch": 1.49, "learning_rate": 2.5248767236542947e-05, "loss": 0.6823, "step": 134120 }, { "epoch": 1.49, "learning_rate": 2.5247844509404435e-05, "loss": 0.5819, "step": 134125 }, { "epoch": 1.49, "learning_rate": 2.5246921782265926e-05, "loss": 0.6843, "step": 134130 }, { "epoch": 1.49, "learning_rate": 2.524599905512741e-05, "loss": 0.6209, "step": 134135 }, { "epoch": 1.49, "learning_rate": 2.52450763279889e-05, "loss": 0.6224, "step": 134140 }, { "epoch": 1.49, "learning_rate": 2.5244153600850383e-05, "loss": 0.6316, "step": 134145 }, { "epoch": 1.49, "learning_rate": 2.5243230873711877e-05, "loss": 0.6076, "step": 134150 }, { "epoch": 1.49, "learning_rate": 2.5242308146573362e-05, "loss": 0.6456, "step": 134155 }, { "epoch": 1.49, "learning_rate": 2.524138541943485e-05, "loss": 0.6325, "step": 134160 }, { "epoch": 1.49, "learning_rate": 2.5240462692296334e-05, "loss": 0.632, "step": 134165 }, { "epoch": 1.49, "learning_rate": 2.5239539965157825e-05, "loss": 0.6579, "step": 134170 }, { "epoch": 1.49, "learning_rate": 2.5238617238019313e-05, "loss": 0.678, "step": 134175 }, { "epoch": 1.49, "learning_rate": 2.5237694510880798e-05, "loss": 0.627, "step": 134180 }, { "epoch": 1.49, "learning_rate": 2.5236771783742286e-05, "loss": 0.6349, "step": 134185 }, { "epoch": 1.49, "learning_rate": 2.5235849056603777e-05, "loss": 0.6996, "step": 134190 }, { "epoch": 1.49, "learning_rate": 2.5234926329465265e-05, "loss": 0.6474, "step": 134195 }, { "epoch": 1.49, "learning_rate": 2.523400360232675e-05, "loss": 0.638, "step": 134200 }, { "epoch": 1.49, "learning_rate": 2.5233080875188237e-05, "loss": 0.5977, "step": 134205 }, { "epoch": 1.49, "learning_rate": 2.523215814804972e-05, "loss": 0.6286, "step": 134210 }, { "epoch": 1.49, "learning_rate": 2.5231235420911216e-05, "loss": 0.654, "step": 134215 }, { "epoch": 1.49, "learning_rate": 2.52303126937727e-05, "loss": 0.6271, "step": 134220 }, { "epoch": 1.49, "learning_rate": 2.522938996663419e-05, "loss": 0.6274, "step": 134225 }, { "epoch": 1.49, "learning_rate": 2.5228467239495673e-05, "loss": 0.6202, "step": 134230 }, { "epoch": 1.49, "learning_rate": 2.5227544512357164e-05, "loss": 0.6417, "step": 134235 }, { "epoch": 1.49, "learning_rate": 2.5226621785218652e-05, "loss": 0.6699, "step": 134240 }, { "epoch": 1.49, "learning_rate": 2.5225699058080136e-05, "loss": 0.678, "step": 134245 }, { "epoch": 1.49, "learning_rate": 2.5224776330941624e-05, "loss": 0.6691, "step": 134250 }, { "epoch": 1.49, "learning_rate": 2.5223853603803116e-05, "loss": 0.5712, "step": 134255 }, { "epoch": 1.49, "learning_rate": 2.5222930876664603e-05, "loss": 0.6597, "step": 134260 }, { "epoch": 1.49, "learning_rate": 2.5222008149526088e-05, "loss": 0.7144, "step": 134265 }, { "epoch": 1.49, "learning_rate": 2.5221085422387576e-05, "loss": 0.6048, "step": 134270 }, { "epoch": 1.49, "learning_rate": 2.522016269524906e-05, "loss": 0.6869, "step": 134275 }, { "epoch": 1.49, "learning_rate": 2.521923996811055e-05, "loss": 0.6675, "step": 134280 }, { "epoch": 1.49, "learning_rate": 2.521831724097204e-05, "loss": 0.6407, "step": 134285 }, { "epoch": 1.49, "learning_rate": 2.5217394513833527e-05, "loss": 0.6541, "step": 134290 }, { "epoch": 1.49, "learning_rate": 2.521647178669501e-05, "loss": 0.6326, "step": 134295 }, { "epoch": 1.49, "learning_rate": 2.5215549059556503e-05, "loss": 0.6497, "step": 134300 }, { "epoch": 1.49, "learning_rate": 2.521462633241799e-05, "loss": 0.6436, "step": 134305 }, { "epoch": 1.49, "learning_rate": 2.5213703605279475e-05, "loss": 0.6394, "step": 134310 }, { "epoch": 1.49, "learning_rate": 2.5212780878140963e-05, "loss": 0.671, "step": 134315 }, { "epoch": 1.49, "learning_rate": 2.5211858151002454e-05, "loss": 0.6333, "step": 134320 }, { "epoch": 1.49, "learning_rate": 2.5210935423863942e-05, "loss": 0.6407, "step": 134325 }, { "epoch": 1.49, "learning_rate": 2.5210012696725426e-05, "loss": 0.6816, "step": 134330 }, { "epoch": 1.49, "learning_rate": 2.5209089969586914e-05, "loss": 0.6905, "step": 134335 }, { "epoch": 1.49, "learning_rate": 2.5208167242448406e-05, "loss": 0.6098, "step": 134340 }, { "epoch": 1.49, "learning_rate": 2.520724451530989e-05, "loss": 0.6041, "step": 134345 }, { "epoch": 1.49, "learning_rate": 2.5206321788171378e-05, "loss": 0.6276, "step": 134350 }, { "epoch": 1.49, "learning_rate": 2.5205399061032862e-05, "loss": 0.6086, "step": 134355 }, { "epoch": 1.49, "learning_rate": 2.520447633389435e-05, "loss": 0.6575, "step": 134360 }, { "epoch": 1.49, "learning_rate": 2.520355360675584e-05, "loss": 0.6852, "step": 134365 }, { "epoch": 1.49, "learning_rate": 2.520263087961733e-05, "loss": 0.6132, "step": 134370 }, { "epoch": 1.49, "learning_rate": 2.5201708152478814e-05, "loss": 0.6076, "step": 134375 }, { "epoch": 1.49, "learning_rate": 2.52007854253403e-05, "loss": 0.6456, "step": 134380 }, { "epoch": 1.49, "learning_rate": 2.5199862698201793e-05, "loss": 0.6465, "step": 134385 }, { "epoch": 1.49, "learning_rate": 2.519893997106328e-05, "loss": 0.6116, "step": 134390 }, { "epoch": 1.49, "learning_rate": 2.5198017243924765e-05, "loss": 0.6285, "step": 134395 }, { "epoch": 1.49, "learning_rate": 2.5197094516786253e-05, "loss": 0.6932, "step": 134400 }, { "epoch": 1.49, "learning_rate": 2.5196171789647744e-05, "loss": 0.6031, "step": 134405 }, { "epoch": 1.49, "learning_rate": 2.519524906250923e-05, "loss": 0.6424, "step": 134410 }, { "epoch": 1.49, "learning_rate": 2.5194326335370717e-05, "loss": 0.5982, "step": 134415 }, { "epoch": 1.49, "learning_rate": 2.51934036082322e-05, "loss": 0.6964, "step": 134420 }, { "epoch": 1.49, "learning_rate": 2.519248088109369e-05, "loss": 0.6791, "step": 134425 }, { "epoch": 1.49, "learning_rate": 2.519155815395518e-05, "loss": 0.612, "step": 134430 }, { "epoch": 1.49, "learning_rate": 2.5190635426816668e-05, "loss": 0.6663, "step": 134435 }, { "epoch": 1.49, "learning_rate": 2.5189712699678152e-05, "loss": 0.6537, "step": 134440 }, { "epoch": 1.49, "learning_rate": 2.518878997253964e-05, "loss": 0.6311, "step": 134445 }, { "epoch": 1.49, "learning_rate": 2.518786724540113e-05, "loss": 0.5915, "step": 134450 }, { "epoch": 1.49, "learning_rate": 2.5186944518262616e-05, "loss": 0.6415, "step": 134455 }, { "epoch": 1.49, "learning_rate": 2.5186021791124104e-05, "loss": 0.6771, "step": 134460 }, { "epoch": 1.49, "learning_rate": 2.518509906398559e-05, "loss": 0.6649, "step": 134465 }, { "epoch": 1.49, "learning_rate": 2.5184176336847083e-05, "loss": 0.6344, "step": 134470 }, { "epoch": 1.49, "learning_rate": 2.5183253609708567e-05, "loss": 0.6283, "step": 134475 }, { "epoch": 1.49, "learning_rate": 2.5182330882570055e-05, "loss": 0.6641, "step": 134480 }, { "epoch": 1.49, "learning_rate": 2.518140815543154e-05, "loss": 0.6451, "step": 134485 }, { "epoch": 1.49, "learning_rate": 2.518048542829303e-05, "loss": 0.6343, "step": 134490 }, { "epoch": 1.49, "learning_rate": 2.517956270115452e-05, "loss": 0.6775, "step": 134495 }, { "epoch": 1.49, "learning_rate": 2.5178639974016007e-05, "loss": 0.5879, "step": 134500 }, { "epoch": 1.49, "learning_rate": 2.517771724687749e-05, "loss": 0.6463, "step": 134505 }, { "epoch": 1.49, "learning_rate": 2.517679451973898e-05, "loss": 0.6583, "step": 134510 }, { "epoch": 1.49, "learning_rate": 2.517587179260047e-05, "loss": 0.604, "step": 134515 }, { "epoch": 1.49, "learning_rate": 2.5174949065461955e-05, "loss": 0.5897, "step": 134520 }, { "epoch": 1.49, "learning_rate": 2.5174026338323442e-05, "loss": 0.6444, "step": 134525 }, { "epoch": 1.49, "learning_rate": 2.5173103611184927e-05, "loss": 0.633, "step": 134530 }, { "epoch": 1.49, "learning_rate": 2.517218088404642e-05, "loss": 0.5729, "step": 134535 }, { "epoch": 1.49, "learning_rate": 2.5171258156907906e-05, "loss": 0.6646, "step": 134540 }, { "epoch": 1.49, "learning_rate": 2.5170335429769394e-05, "loss": 0.6242, "step": 134545 }, { "epoch": 1.49, "learning_rate": 2.5169412702630878e-05, "loss": 0.5992, "step": 134550 }, { "epoch": 1.49, "learning_rate": 2.516848997549237e-05, "loss": 0.6574, "step": 134555 }, { "epoch": 1.49, "learning_rate": 2.5167567248353857e-05, "loss": 0.6516, "step": 134560 }, { "epoch": 1.49, "learning_rate": 2.5166644521215342e-05, "loss": 0.6701, "step": 134565 }, { "epoch": 1.49, "learning_rate": 2.516572179407683e-05, "loss": 0.6354, "step": 134570 }, { "epoch": 1.49, "learning_rate": 2.5164799066938318e-05, "loss": 0.5886, "step": 134575 }, { "epoch": 1.49, "learning_rate": 2.516387633979981e-05, "loss": 0.64, "step": 134580 }, { "epoch": 1.49, "learning_rate": 2.5162953612661293e-05, "loss": 0.5414, "step": 134585 }, { "epoch": 1.49, "learning_rate": 2.516203088552278e-05, "loss": 0.6457, "step": 134590 }, { "epoch": 1.49, "learning_rate": 2.5161108158384266e-05, "loss": 0.6443, "step": 134595 }, { "epoch": 1.49, "learning_rate": 2.516018543124576e-05, "loss": 0.696, "step": 134600 }, { "epoch": 1.49, "learning_rate": 2.5159262704107245e-05, "loss": 0.6012, "step": 134605 }, { "epoch": 1.49, "learning_rate": 2.5158339976968732e-05, "loss": 0.62, "step": 134610 }, { "epoch": 1.49, "learning_rate": 2.5157417249830217e-05, "loss": 0.6618, "step": 134615 }, { "epoch": 1.49, "learning_rate": 2.5156494522691708e-05, "loss": 0.6411, "step": 134620 }, { "epoch": 1.49, "learning_rate": 2.5155571795553196e-05, "loss": 0.6491, "step": 134625 }, { "epoch": 1.49, "learning_rate": 2.515464906841468e-05, "loss": 0.6151, "step": 134630 }, { "epoch": 1.49, "learning_rate": 2.5153726341276168e-05, "loss": 0.6432, "step": 134635 }, { "epoch": 1.49, "learning_rate": 2.515280361413766e-05, "loss": 0.6713, "step": 134640 }, { "epoch": 1.49, "learning_rate": 2.5151880886999147e-05, "loss": 0.6054, "step": 134645 }, { "epoch": 1.49, "learning_rate": 2.5150958159860632e-05, "loss": 0.6698, "step": 134650 }, { "epoch": 1.49, "learning_rate": 2.515003543272212e-05, "loss": 0.6352, "step": 134655 }, { "epoch": 1.49, "learning_rate": 2.5149112705583604e-05, "loss": 0.7252, "step": 134660 }, { "epoch": 1.49, "learning_rate": 2.5148189978445095e-05, "loss": 0.6832, "step": 134665 }, { "epoch": 1.49, "learning_rate": 2.5147267251306583e-05, "loss": 0.638, "step": 134670 }, { "epoch": 1.49, "learning_rate": 2.514634452416807e-05, "loss": 0.59, "step": 134675 }, { "epoch": 1.49, "learning_rate": 2.5145421797029556e-05, "loss": 0.5617, "step": 134680 }, { "epoch": 1.49, "learning_rate": 2.5144499069891047e-05, "loss": 0.6494, "step": 134685 }, { "epoch": 1.49, "learning_rate": 2.5143576342752535e-05, "loss": 0.6348, "step": 134690 }, { "epoch": 1.49, "learning_rate": 2.514265361561402e-05, "loss": 0.6364, "step": 134695 }, { "epoch": 1.49, "learning_rate": 2.5141730888475507e-05, "loss": 0.6248, "step": 134700 }, { "epoch": 1.49, "learning_rate": 2.5140808161336998e-05, "loss": 0.6155, "step": 134705 }, { "epoch": 1.49, "learning_rate": 2.5139885434198486e-05, "loss": 0.6811, "step": 134710 }, { "epoch": 1.49, "learning_rate": 2.513896270705997e-05, "loss": 0.588, "step": 134715 }, { "epoch": 1.49, "learning_rate": 2.513803997992146e-05, "loss": 0.6068, "step": 134720 }, { "epoch": 1.49, "learning_rate": 2.5137117252782943e-05, "loss": 0.6436, "step": 134725 }, { "epoch": 1.49, "learning_rate": 2.5136194525644434e-05, "loss": 0.6913, "step": 134730 }, { "epoch": 1.49, "learning_rate": 2.5135271798505922e-05, "loss": 0.6661, "step": 134735 }, { "epoch": 1.49, "learning_rate": 2.5134349071367406e-05, "loss": 0.5958, "step": 134740 }, { "epoch": 1.49, "learning_rate": 2.5133426344228894e-05, "loss": 0.6168, "step": 134745 }, { "epoch": 1.49, "learning_rate": 2.5132503617090385e-05, "loss": 0.6233, "step": 134750 }, { "epoch": 1.49, "learning_rate": 2.5131580889951873e-05, "loss": 0.6394, "step": 134755 }, { "epoch": 1.49, "learning_rate": 2.5130658162813358e-05, "loss": 0.6282, "step": 134760 }, { "epoch": 1.49, "learning_rate": 2.5129735435674846e-05, "loss": 0.6799, "step": 134765 }, { "epoch": 1.49, "learning_rate": 2.5128812708536337e-05, "loss": 0.5995, "step": 134770 }, { "epoch": 1.49, "learning_rate": 2.5127889981397825e-05, "loss": 0.6049, "step": 134775 }, { "epoch": 1.49, "learning_rate": 2.512696725425931e-05, "loss": 0.5979, "step": 134780 }, { "epoch": 1.49, "learning_rate": 2.5126044527120797e-05, "loss": 0.6848, "step": 134785 }, { "epoch": 1.49, "learning_rate": 2.5125121799982288e-05, "loss": 0.6877, "step": 134790 }, { "epoch": 1.49, "learning_rate": 2.5124199072843773e-05, "loss": 0.6461, "step": 134795 }, { "epoch": 1.49, "learning_rate": 2.512327634570526e-05, "loss": 0.6549, "step": 134800 }, { "epoch": 1.49, "learning_rate": 2.5122353618566745e-05, "loss": 0.6423, "step": 134805 }, { "epoch": 1.49, "learning_rate": 2.5121430891428233e-05, "loss": 0.6554, "step": 134810 }, { "epoch": 1.49, "learning_rate": 2.5120508164289724e-05, "loss": 0.6215, "step": 134815 }, { "epoch": 1.49, "learning_rate": 2.5119585437151212e-05, "loss": 0.6612, "step": 134820 }, { "epoch": 1.49, "learning_rate": 2.5118662710012696e-05, "loss": 0.6231, "step": 134825 }, { "epoch": 1.49, "learning_rate": 2.5117739982874184e-05, "loss": 0.6496, "step": 134830 }, { "epoch": 1.49, "learning_rate": 2.5116817255735675e-05, "loss": 0.6688, "step": 134835 }, { "epoch": 1.49, "learning_rate": 2.511589452859716e-05, "loss": 0.668, "step": 134840 }, { "epoch": 1.49, "learning_rate": 2.5114971801458648e-05, "loss": 0.6589, "step": 134845 }, { "epoch": 1.49, "learning_rate": 2.5114049074320136e-05, "loss": 0.6557, "step": 134850 }, { "epoch": 1.49, "learning_rate": 2.5113126347181627e-05, "loss": 0.6278, "step": 134855 }, { "epoch": 1.49, "learning_rate": 2.511220362004311e-05, "loss": 0.6137, "step": 134860 }, { "epoch": 1.49, "learning_rate": 2.51112808929046e-05, "loss": 0.6088, "step": 134865 }, { "epoch": 1.49, "learning_rate": 2.5110358165766084e-05, "loss": 0.6325, "step": 134870 }, { "epoch": 1.49, "learning_rate": 2.5109435438627575e-05, "loss": 0.6812, "step": 134875 }, { "epoch": 1.49, "learning_rate": 2.5108512711489063e-05, "loss": 0.6529, "step": 134880 }, { "epoch": 1.49, "learning_rate": 2.510758998435055e-05, "loss": 0.68, "step": 134885 }, { "epoch": 1.49, "learning_rate": 2.5106667257212035e-05, "loss": 0.6209, "step": 134890 }, { "epoch": 1.49, "learning_rate": 2.5105744530073523e-05, "loss": 0.6369, "step": 134895 }, { "epoch": 1.49, "learning_rate": 2.5104821802935014e-05, "loss": 0.6387, "step": 134900 }, { "epoch": 1.49, "learning_rate": 2.51038990757965e-05, "loss": 0.6834, "step": 134905 }, { "epoch": 1.49, "learning_rate": 2.5102976348657986e-05, "loss": 0.6634, "step": 134910 }, { "epoch": 1.49, "learning_rate": 2.510205362151947e-05, "loss": 0.6627, "step": 134915 }, { "epoch": 1.49, "learning_rate": 2.5101130894380966e-05, "loss": 0.6152, "step": 134920 }, { "epoch": 1.49, "learning_rate": 2.510020816724245e-05, "loss": 0.6181, "step": 134925 }, { "epoch": 1.49, "learning_rate": 2.5099285440103938e-05, "loss": 0.6327, "step": 134930 }, { "epoch": 1.49, "learning_rate": 2.5098362712965422e-05, "loss": 0.597, "step": 134935 }, { "epoch": 1.49, "learning_rate": 2.5097439985826914e-05, "loss": 0.6553, "step": 134940 }, { "epoch": 1.49, "learning_rate": 2.50965172586884e-05, "loss": 0.6729, "step": 134945 }, { "epoch": 1.49, "learning_rate": 2.5095594531549886e-05, "loss": 0.6373, "step": 134950 }, { "epoch": 1.49, "learning_rate": 2.5094671804411374e-05, "loss": 0.6751, "step": 134955 }, { "epoch": 1.49, "learning_rate": 2.509374907727286e-05, "loss": 0.6719, "step": 134960 }, { "epoch": 1.49, "learning_rate": 2.5092826350134353e-05, "loss": 0.6566, "step": 134965 }, { "epoch": 1.49, "learning_rate": 2.5091903622995837e-05, "loss": 0.6509, "step": 134970 }, { "epoch": 1.49, "learning_rate": 2.5090980895857325e-05, "loss": 0.6761, "step": 134975 }, { "epoch": 1.49, "learning_rate": 2.509005816871881e-05, "loss": 0.6796, "step": 134980 }, { "epoch": 1.49, "learning_rate": 2.5089135441580304e-05, "loss": 0.6278, "step": 134985 }, { "epoch": 1.49, "learning_rate": 2.508821271444179e-05, "loss": 0.6291, "step": 134990 }, { "epoch": 1.49, "learning_rate": 2.5087289987303276e-05, "loss": 0.6559, "step": 134995 }, { "epoch": 1.49, "learning_rate": 2.508636726016476e-05, "loss": 0.6307, "step": 135000 }, { "epoch": 1.49, "eval_loss": 0.6012109518051147, "eval_runtime": 69.2728, "eval_samples_per_second": 28.871, "eval_steps_per_second": 14.436, "step": 135000 }, { "epoch": 1.49, "learning_rate": 2.5085444533026252e-05, "loss": 0.6156, "step": 135005 }, { "epoch": 1.49, "learning_rate": 2.508452180588774e-05, "loss": 0.6618, "step": 135010 }, { "epoch": 1.49, "learning_rate": 2.5083599078749224e-05, "loss": 0.6718, "step": 135015 }, { "epoch": 1.5, "learning_rate": 2.5082676351610712e-05, "loss": 0.6392, "step": 135020 }, { "epoch": 1.5, "learning_rate": 2.5081753624472204e-05, "loss": 0.6144, "step": 135025 }, { "epoch": 1.5, "learning_rate": 2.508083089733369e-05, "loss": 0.6203, "step": 135030 }, { "epoch": 1.5, "learning_rate": 2.5079908170195176e-05, "loss": 0.6392, "step": 135035 }, { "epoch": 1.5, "learning_rate": 2.5078985443056664e-05, "loss": 0.6428, "step": 135040 }, { "epoch": 1.5, "learning_rate": 2.5078062715918148e-05, "loss": 0.641, "step": 135045 }, { "epoch": 1.5, "learning_rate": 2.507713998877964e-05, "loss": 0.68, "step": 135050 }, { "epoch": 1.5, "learning_rate": 2.5076217261641127e-05, "loss": 0.5809, "step": 135055 }, { "epoch": 1.5, "learning_rate": 2.5075294534502615e-05, "loss": 0.6035, "step": 135060 }, { "epoch": 1.5, "learning_rate": 2.50743718073641e-05, "loss": 0.634, "step": 135065 }, { "epoch": 1.5, "learning_rate": 2.507344908022559e-05, "loss": 0.6274, "step": 135070 }, { "epoch": 1.5, "learning_rate": 2.507252635308708e-05, "loss": 0.6372, "step": 135075 }, { "epoch": 1.5, "learning_rate": 2.5071603625948563e-05, "loss": 0.5973, "step": 135080 }, { "epoch": 1.5, "learning_rate": 2.507068089881005e-05, "loss": 0.5986, "step": 135085 }, { "epoch": 1.5, "learning_rate": 2.5069758171671542e-05, "loss": 0.6285, "step": 135090 }, { "epoch": 1.5, "learning_rate": 2.506883544453303e-05, "loss": 0.625, "step": 135095 }, { "epoch": 1.5, "learning_rate": 2.5067912717394515e-05, "loss": 0.6234, "step": 135100 }, { "epoch": 1.5, "learning_rate": 2.5066989990256002e-05, "loss": 0.6225, "step": 135105 }, { "epoch": 1.5, "learning_rate": 2.5066067263117487e-05, "loss": 0.5959, "step": 135110 }, { "epoch": 1.5, "learning_rate": 2.5065144535978978e-05, "loss": 0.6498, "step": 135115 }, { "epoch": 1.5, "learning_rate": 2.5064221808840466e-05, "loss": 0.6981, "step": 135120 }, { "epoch": 1.5, "learning_rate": 2.506329908170195e-05, "loss": 0.6253, "step": 135125 }, { "epoch": 1.5, "learning_rate": 2.5062376354563438e-05, "loss": 0.684, "step": 135130 }, { "epoch": 1.5, "learning_rate": 2.506145362742493e-05, "loss": 0.6896, "step": 135135 }, { "epoch": 1.5, "learning_rate": 2.5060530900286417e-05, "loss": 0.6636, "step": 135140 }, { "epoch": 1.5, "learning_rate": 2.5059608173147902e-05, "loss": 0.6357, "step": 135145 }, { "epoch": 1.5, "learning_rate": 2.505868544600939e-05, "loss": 0.6041, "step": 135150 }, { "epoch": 1.5, "learning_rate": 2.505776271887088e-05, "loss": 0.6284, "step": 135155 }, { "epoch": 1.5, "learning_rate": 2.505683999173237e-05, "loss": 0.6255, "step": 135160 }, { "epoch": 1.5, "learning_rate": 2.5055917264593853e-05, "loss": 0.6694, "step": 135165 }, { "epoch": 1.5, "learning_rate": 2.505499453745534e-05, "loss": 0.63, "step": 135170 }, { "epoch": 1.5, "learning_rate": 2.5054071810316832e-05, "loss": 0.6226, "step": 135175 }, { "epoch": 1.5, "learning_rate": 2.5053149083178317e-05, "loss": 0.6554, "step": 135180 }, { "epoch": 1.5, "learning_rate": 2.5052226356039805e-05, "loss": 0.6433, "step": 135185 }, { "epoch": 1.5, "learning_rate": 2.505130362890129e-05, "loss": 0.6576, "step": 135190 }, { "epoch": 1.5, "learning_rate": 2.5050380901762777e-05, "loss": 0.6208, "step": 135195 }, { "epoch": 1.5, "learning_rate": 2.5049458174624268e-05, "loss": 0.6603, "step": 135200 }, { "epoch": 1.5, "learning_rate": 2.5048535447485756e-05, "loss": 0.695, "step": 135205 }, { "epoch": 1.5, "learning_rate": 2.504761272034724e-05, "loss": 0.69, "step": 135210 }, { "epoch": 1.5, "learning_rate": 2.5046689993208728e-05, "loss": 0.6098, "step": 135215 }, { "epoch": 1.5, "learning_rate": 2.504576726607022e-05, "loss": 0.6701, "step": 135220 }, { "epoch": 1.5, "learning_rate": 2.5044844538931704e-05, "loss": 0.6571, "step": 135225 }, { "epoch": 1.5, "learning_rate": 2.5043921811793192e-05, "loss": 0.5968, "step": 135230 }, { "epoch": 1.5, "learning_rate": 2.504299908465468e-05, "loss": 0.6284, "step": 135235 }, { "epoch": 1.5, "learning_rate": 2.504207635751617e-05, "loss": 0.6481, "step": 135240 }, { "epoch": 1.5, "learning_rate": 2.5041153630377655e-05, "loss": 0.7144, "step": 135245 }, { "epoch": 1.5, "learning_rate": 2.5040230903239143e-05, "loss": 0.63, "step": 135250 }, { "epoch": 1.5, "learning_rate": 2.5039308176100628e-05, "loss": 0.6216, "step": 135255 }, { "epoch": 1.5, "learning_rate": 2.5038385448962116e-05, "loss": 0.6272, "step": 135260 }, { "epoch": 1.5, "learning_rate": 2.5037462721823607e-05, "loss": 0.687, "step": 135265 }, { "epoch": 1.5, "learning_rate": 2.5036539994685095e-05, "loss": 0.6612, "step": 135270 }, { "epoch": 1.5, "learning_rate": 2.503561726754658e-05, "loss": 0.6436, "step": 135275 }, { "epoch": 1.5, "learning_rate": 2.5034694540408067e-05, "loss": 0.6449, "step": 135280 }, { "epoch": 1.5, "learning_rate": 2.5033771813269558e-05, "loss": 0.6388, "step": 135285 }, { "epoch": 1.5, "learning_rate": 2.5032849086131043e-05, "loss": 0.6423, "step": 135290 }, { "epoch": 1.5, "learning_rate": 2.503192635899253e-05, "loss": 0.5984, "step": 135295 }, { "epoch": 1.5, "learning_rate": 2.5031003631854015e-05, "loss": 0.6738, "step": 135300 }, { "epoch": 1.5, "learning_rate": 2.503008090471551e-05, "loss": 0.5928, "step": 135305 }, { "epoch": 1.5, "learning_rate": 2.5029158177576994e-05, "loss": 0.6681, "step": 135310 }, { "epoch": 1.5, "learning_rate": 2.5028235450438482e-05, "loss": 0.6117, "step": 135315 }, { "epoch": 1.5, "learning_rate": 2.5027312723299966e-05, "loss": 0.6873, "step": 135320 }, { "epoch": 1.5, "learning_rate": 2.5026389996161458e-05, "loss": 0.6005, "step": 135325 }, { "epoch": 1.5, "learning_rate": 2.5025467269022945e-05, "loss": 0.6788, "step": 135330 }, { "epoch": 1.5, "learning_rate": 2.502454454188443e-05, "loss": 0.6767, "step": 135335 }, { "epoch": 1.5, "learning_rate": 2.5023621814745918e-05, "loss": 0.6576, "step": 135340 }, { "epoch": 1.5, "learning_rate": 2.5022699087607406e-05, "loss": 0.649, "step": 135345 }, { "epoch": 1.5, "learning_rate": 2.5021776360468897e-05, "loss": 0.6289, "step": 135350 }, { "epoch": 1.5, "learning_rate": 2.502085363333038e-05, "loss": 0.6289, "step": 135355 }, { "epoch": 1.5, "learning_rate": 2.501993090619187e-05, "loss": 0.6433, "step": 135360 }, { "epoch": 1.5, "learning_rate": 2.5019008179053354e-05, "loss": 0.6808, "step": 135365 }, { "epoch": 1.5, "learning_rate": 2.5018085451914848e-05, "loss": 0.6206, "step": 135370 }, { "epoch": 1.5, "learning_rate": 2.5017162724776333e-05, "loss": 0.6532, "step": 135375 }, { "epoch": 1.5, "learning_rate": 2.501623999763782e-05, "loss": 0.6408, "step": 135380 }, { "epoch": 1.5, "learning_rate": 2.5015317270499305e-05, "loss": 0.6868, "step": 135385 }, { "epoch": 1.5, "learning_rate": 2.5014394543360796e-05, "loss": 0.6133, "step": 135390 }, { "epoch": 1.5, "learning_rate": 2.5013471816222284e-05, "loss": 0.6438, "step": 135395 }, { "epoch": 1.5, "learning_rate": 2.501254908908377e-05, "loss": 0.6343, "step": 135400 }, { "epoch": 1.5, "learning_rate": 2.5011626361945256e-05, "loss": 0.6645, "step": 135405 }, { "epoch": 1.5, "learning_rate": 2.5010703634806744e-05, "loss": 0.6687, "step": 135410 }, { "epoch": 1.5, "learning_rate": 2.5009780907668235e-05, "loss": 0.668, "step": 135415 }, { "epoch": 1.5, "learning_rate": 2.500885818052972e-05, "loss": 0.5927, "step": 135420 }, { "epoch": 1.5, "learning_rate": 2.5007935453391208e-05, "loss": 0.6763, "step": 135425 }, { "epoch": 1.5, "learning_rate": 2.5007012726252692e-05, "loss": 0.5944, "step": 135430 }, { "epoch": 1.5, "learning_rate": 2.5006089999114183e-05, "loss": 0.6592, "step": 135435 }, { "epoch": 1.5, "learning_rate": 2.500516727197567e-05, "loss": 0.6131, "step": 135440 }, { "epoch": 1.5, "learning_rate": 2.500424454483716e-05, "loss": 0.6447, "step": 135445 }, { "epoch": 1.5, "learning_rate": 2.5003321817698644e-05, "loss": 0.6547, "step": 135450 }, { "epoch": 1.5, "learning_rate": 2.5002399090560135e-05, "loss": 0.6388, "step": 135455 }, { "epoch": 1.5, "learning_rate": 2.5001476363421623e-05, "loss": 0.597, "step": 135460 }, { "epoch": 1.5, "learning_rate": 2.5000553636283107e-05, "loss": 0.7079, "step": 135465 }, { "epoch": 1.5, "learning_rate": 2.49996309091446e-05, "loss": 0.6899, "step": 135470 }, { "epoch": 1.5, "learning_rate": 2.4998708182006083e-05, "loss": 0.6242, "step": 135475 }, { "epoch": 1.5, "learning_rate": 2.4997785454867574e-05, "loss": 0.633, "step": 135480 }, { "epoch": 1.5, "learning_rate": 2.499686272772906e-05, "loss": 0.6017, "step": 135485 }, { "epoch": 1.5, "learning_rate": 2.4995940000590546e-05, "loss": 0.7093, "step": 135490 }, { "epoch": 1.5, "learning_rate": 2.4995017273452034e-05, "loss": 0.6946, "step": 135495 }, { "epoch": 1.5, "learning_rate": 2.4994094546313522e-05, "loss": 0.662, "step": 135500 }, { "epoch": 1.5, "learning_rate": 2.499317181917501e-05, "loss": 0.6205, "step": 135505 }, { "epoch": 1.5, "learning_rate": 2.4992249092036494e-05, "loss": 0.6111, "step": 135510 }, { "epoch": 1.5, "learning_rate": 2.4991326364897986e-05, "loss": 0.6673, "step": 135515 }, { "epoch": 1.5, "learning_rate": 2.499040363775947e-05, "loss": 0.6883, "step": 135520 }, { "epoch": 1.5, "learning_rate": 2.498948091062096e-05, "loss": 0.6561, "step": 135525 }, { "epoch": 1.5, "learning_rate": 2.4988558183482446e-05, "loss": 0.6691, "step": 135530 }, { "epoch": 1.5, "learning_rate": 2.4987635456343937e-05, "loss": 0.6458, "step": 135535 }, { "epoch": 1.5, "learning_rate": 2.498671272920542e-05, "loss": 0.6066, "step": 135540 }, { "epoch": 1.5, "learning_rate": 2.4985790002066913e-05, "loss": 0.6101, "step": 135545 }, { "epoch": 1.5, "learning_rate": 2.4984867274928397e-05, "loss": 0.6967, "step": 135550 }, { "epoch": 1.5, "learning_rate": 2.4983944547789885e-05, "loss": 0.6144, "step": 135555 }, { "epoch": 1.5, "learning_rate": 2.4983021820651373e-05, "loss": 0.6754, "step": 135560 }, { "epoch": 1.5, "learning_rate": 2.498209909351286e-05, "loss": 0.6905, "step": 135565 }, { "epoch": 1.5, "learning_rate": 2.498117636637435e-05, "loss": 0.6665, "step": 135570 }, { "epoch": 1.5, "learning_rate": 2.4980253639235833e-05, "loss": 0.6782, "step": 135575 }, { "epoch": 1.5, "learning_rate": 2.4979330912097324e-05, "loss": 0.6144, "step": 135580 }, { "epoch": 1.5, "learning_rate": 2.497840818495881e-05, "loss": 0.6593, "step": 135585 }, { "epoch": 1.5, "learning_rate": 2.49774854578203e-05, "loss": 0.6514, "step": 135590 }, { "epoch": 1.5, "learning_rate": 2.4976562730681784e-05, "loss": 0.6957, "step": 135595 }, { "epoch": 1.5, "learning_rate": 2.4975640003543276e-05, "loss": 0.6375, "step": 135600 }, { "epoch": 1.5, "learning_rate": 2.497471727640476e-05, "loss": 0.6016, "step": 135605 }, { "epoch": 1.5, "learning_rate": 2.4973794549266248e-05, "loss": 0.6445, "step": 135610 }, { "epoch": 1.5, "learning_rate": 2.4972871822127736e-05, "loss": 0.6979, "step": 135615 }, { "epoch": 1.5, "learning_rate": 2.4971949094989224e-05, "loss": 0.6547, "step": 135620 }, { "epoch": 1.5, "learning_rate": 2.497102636785071e-05, "loss": 0.6445, "step": 135625 }, { "epoch": 1.5, "learning_rate": 2.49701036407122e-05, "loss": 0.6364, "step": 135630 }, { "epoch": 1.5, "learning_rate": 2.4969180913573687e-05, "loss": 0.6711, "step": 135635 }, { "epoch": 1.5, "learning_rate": 2.496825818643517e-05, "loss": 0.633, "step": 135640 }, { "epoch": 1.5, "learning_rate": 2.4967335459296663e-05, "loss": 0.732, "step": 135645 }, { "epoch": 1.5, "learning_rate": 2.4966412732158147e-05, "loss": 0.6131, "step": 135650 }, { "epoch": 1.5, "learning_rate": 2.496549000501964e-05, "loss": 0.6206, "step": 135655 }, { "epoch": 1.5, "learning_rate": 2.4964567277881123e-05, "loss": 0.6704, "step": 135660 }, { "epoch": 1.5, "learning_rate": 2.496364455074261e-05, "loss": 0.6175, "step": 135665 }, { "epoch": 1.5, "learning_rate": 2.49627218236041e-05, "loss": 0.6336, "step": 135670 }, { "epoch": 1.5, "learning_rate": 2.4961799096465587e-05, "loss": 0.6693, "step": 135675 }, { "epoch": 1.5, "learning_rate": 2.4960876369327074e-05, "loss": 0.5893, "step": 135680 }, { "epoch": 1.5, "learning_rate": 2.4959953642188562e-05, "loss": 0.6512, "step": 135685 }, { "epoch": 1.5, "learning_rate": 2.495903091505005e-05, "loss": 0.6234, "step": 135690 }, { "epoch": 1.5, "learning_rate": 2.4958108187911538e-05, "loss": 0.5997, "step": 135695 }, { "epoch": 1.5, "learning_rate": 2.4957185460773026e-05, "loss": 0.671, "step": 135700 }, { "epoch": 1.5, "learning_rate": 2.4956262733634514e-05, "loss": 0.604, "step": 135705 }, { "epoch": 1.5, "learning_rate": 2.4955340006496e-05, "loss": 0.6337, "step": 135710 }, { "epoch": 1.5, "learning_rate": 2.4954417279357486e-05, "loss": 0.6175, "step": 135715 }, { "epoch": 1.5, "learning_rate": 2.4953494552218977e-05, "loss": 0.6197, "step": 135720 }, { "epoch": 1.5, "learning_rate": 2.4952571825080462e-05, "loss": 0.6258, "step": 135725 }, { "epoch": 1.5, "learning_rate": 2.495164909794195e-05, "loss": 0.646, "step": 135730 }, { "epoch": 1.5, "learning_rate": 2.4950726370803437e-05, "loss": 0.5997, "step": 135735 }, { "epoch": 1.5, "learning_rate": 2.4949803643664925e-05, "loss": 0.6919, "step": 135740 }, { "epoch": 1.5, "learning_rate": 2.4948880916526413e-05, "loss": 0.604, "step": 135745 }, { "epoch": 1.5, "learning_rate": 2.49479581893879e-05, "loss": 0.583, "step": 135750 }, { "epoch": 1.5, "learning_rate": 2.494703546224939e-05, "loss": 0.6722, "step": 135755 }, { "epoch": 1.5, "learning_rate": 2.4946112735110877e-05, "loss": 0.6503, "step": 135760 }, { "epoch": 1.5, "learning_rate": 2.4945190007972365e-05, "loss": 0.6206, "step": 135765 }, { "epoch": 1.5, "learning_rate": 2.4944267280833852e-05, "loss": 0.6348, "step": 135770 }, { "epoch": 1.5, "learning_rate": 2.494334455369534e-05, "loss": 0.6573, "step": 135775 }, { "epoch": 1.5, "learning_rate": 2.4942421826556828e-05, "loss": 0.5931, "step": 135780 }, { "epoch": 1.5, "learning_rate": 2.4941499099418313e-05, "loss": 0.6301, "step": 135785 }, { "epoch": 1.5, "learning_rate": 2.49405763722798e-05, "loss": 0.67, "step": 135790 }, { "epoch": 1.5, "learning_rate": 2.4939653645141288e-05, "loss": 0.6564, "step": 135795 }, { "epoch": 1.5, "learning_rate": 2.4938730918002776e-05, "loss": 0.6534, "step": 135800 }, { "epoch": 1.5, "learning_rate": 2.4937808190864264e-05, "loss": 0.6079, "step": 135805 }, { "epoch": 1.5, "learning_rate": 2.4936885463725752e-05, "loss": 0.6217, "step": 135810 }, { "epoch": 1.5, "learning_rate": 2.493596273658724e-05, "loss": 0.5855, "step": 135815 }, { "epoch": 1.5, "learning_rate": 2.4935040009448727e-05, "loss": 0.657, "step": 135820 }, { "epoch": 1.5, "learning_rate": 2.4934117282310215e-05, "loss": 0.6918, "step": 135825 }, { "epoch": 1.5, "learning_rate": 2.4933194555171703e-05, "loss": 0.6376, "step": 135830 }, { "epoch": 1.5, "learning_rate": 2.493227182803319e-05, "loss": 0.6844, "step": 135835 }, { "epoch": 1.5, "learning_rate": 2.4931349100894675e-05, "loss": 0.6188, "step": 135840 }, { "epoch": 1.5, "learning_rate": 2.4930426373756167e-05, "loss": 0.7202, "step": 135845 }, { "epoch": 1.5, "learning_rate": 2.492950364661765e-05, "loss": 0.6533, "step": 135850 }, { "epoch": 1.5, "learning_rate": 2.4928580919479142e-05, "loss": 0.6097, "step": 135855 }, { "epoch": 1.5, "learning_rate": 2.4927658192340627e-05, "loss": 0.6053, "step": 135860 }, { "epoch": 1.5, "learning_rate": 2.4926735465202115e-05, "loss": 0.639, "step": 135865 }, { "epoch": 1.5, "learning_rate": 2.4925812738063603e-05, "loss": 0.6124, "step": 135870 }, { "epoch": 1.5, "learning_rate": 2.492489001092509e-05, "loss": 0.6562, "step": 135875 }, { "epoch": 1.5, "learning_rate": 2.4923967283786578e-05, "loss": 0.7088, "step": 135880 }, { "epoch": 1.5, "learning_rate": 2.4923044556648066e-05, "loss": 0.628, "step": 135885 }, { "epoch": 1.5, "learning_rate": 2.4922121829509554e-05, "loss": 0.6814, "step": 135890 }, { "epoch": 1.5, "learning_rate": 2.492119910237104e-05, "loss": 0.6823, "step": 135895 }, { "epoch": 1.5, "learning_rate": 2.492027637523253e-05, "loss": 0.6387, "step": 135900 }, { "epoch": 1.5, "learning_rate": 2.4919353648094014e-05, "loss": 0.6314, "step": 135905 }, { "epoch": 1.5, "learning_rate": 2.4918430920955505e-05, "loss": 0.6484, "step": 135910 }, { "epoch": 1.5, "learning_rate": 2.491750819381699e-05, "loss": 0.6206, "step": 135915 }, { "epoch": 1.51, "learning_rate": 2.491658546667848e-05, "loss": 0.6805, "step": 135920 }, { "epoch": 1.51, "learning_rate": 2.4915662739539965e-05, "loss": 0.6356, "step": 135925 }, { "epoch": 1.51, "learning_rate": 2.4914740012401457e-05, "loss": 0.675, "step": 135930 }, { "epoch": 1.51, "learning_rate": 2.491381728526294e-05, "loss": 0.6468, "step": 135935 }, { "epoch": 1.51, "learning_rate": 2.491289455812443e-05, "loss": 0.6256, "step": 135940 }, { "epoch": 1.51, "learning_rate": 2.4911971830985917e-05, "loss": 0.6397, "step": 135945 }, { "epoch": 1.51, "learning_rate": 2.4911049103847405e-05, "loss": 0.664, "step": 135950 }, { "epoch": 1.51, "learning_rate": 2.4910126376708893e-05, "loss": 0.6273, "step": 135955 }, { "epoch": 1.51, "learning_rate": 2.4909203649570377e-05, "loss": 0.6218, "step": 135960 }, { "epoch": 1.51, "learning_rate": 2.4908280922431868e-05, "loss": 0.6725, "step": 135965 }, { "epoch": 1.51, "learning_rate": 2.4907358195293353e-05, "loss": 0.6455, "step": 135970 }, { "epoch": 1.51, "learning_rate": 2.4906435468154844e-05, "loss": 0.62, "step": 135975 }, { "epoch": 1.51, "learning_rate": 2.490551274101633e-05, "loss": 0.6292, "step": 135980 }, { "epoch": 1.51, "learning_rate": 2.490459001387782e-05, "loss": 0.6309, "step": 135985 }, { "epoch": 1.51, "learning_rate": 2.4903667286739304e-05, "loss": 0.6358, "step": 135990 }, { "epoch": 1.51, "learning_rate": 2.4902744559600792e-05, "loss": 0.6074, "step": 135995 }, { "epoch": 1.51, "learning_rate": 2.490182183246228e-05, "loss": 0.6018, "step": 136000 }, { "epoch": 1.51, "eval_loss": 0.5899821519851685, "eval_runtime": 69.2273, "eval_samples_per_second": 28.89, "eval_steps_per_second": 14.445, "step": 136000 }, { "epoch": 1.51, "learning_rate": 2.4900899105323768e-05, "loss": 0.6261, "step": 136005 }, { "epoch": 1.51, "learning_rate": 2.4899976378185256e-05, "loss": 0.6321, "step": 136010 }, { "epoch": 1.51, "learning_rate": 2.489905365104674e-05, "loss": 0.6294, "step": 136015 }, { "epoch": 1.51, "learning_rate": 2.489813092390823e-05, "loss": 0.6167, "step": 136020 }, { "epoch": 1.51, "learning_rate": 2.4897208196769716e-05, "loss": 0.6385, "step": 136025 }, { "epoch": 1.51, "learning_rate": 2.4896285469631207e-05, "loss": 0.6401, "step": 136030 }, { "epoch": 1.51, "learning_rate": 2.489536274249269e-05, "loss": 0.615, "step": 136035 }, { "epoch": 1.51, "learning_rate": 2.4894440015354183e-05, "loss": 0.6504, "step": 136040 }, { "epoch": 1.51, "learning_rate": 2.4893517288215667e-05, "loss": 0.626, "step": 136045 }, { "epoch": 1.51, "learning_rate": 2.4892594561077155e-05, "loss": 0.6329, "step": 136050 }, { "epoch": 1.51, "learning_rate": 2.4891671833938643e-05, "loss": 0.6491, "step": 136055 }, { "epoch": 1.51, "learning_rate": 2.489074910680013e-05, "loss": 0.6032, "step": 136060 }, { "epoch": 1.51, "learning_rate": 2.488982637966162e-05, "loss": 0.6237, "step": 136065 }, { "epoch": 1.51, "learning_rate": 2.4888903652523106e-05, "loss": 0.6671, "step": 136070 }, { "epoch": 1.51, "learning_rate": 2.4887980925384594e-05, "loss": 0.6228, "step": 136075 }, { "epoch": 1.51, "learning_rate": 2.4887058198246082e-05, "loss": 0.6366, "step": 136080 }, { "epoch": 1.51, "learning_rate": 2.488613547110757e-05, "loss": 0.6297, "step": 136085 }, { "epoch": 1.51, "learning_rate": 2.4885212743969058e-05, "loss": 0.6793, "step": 136090 }, { "epoch": 1.51, "learning_rate": 2.4884290016830546e-05, "loss": 0.623, "step": 136095 }, { "epoch": 1.51, "learning_rate": 2.488336728969203e-05, "loss": 0.6436, "step": 136100 }, { "epoch": 1.51, "learning_rate": 2.488244456255352e-05, "loss": 0.6451, "step": 136105 }, { "epoch": 1.51, "learning_rate": 2.4881521835415006e-05, "loss": 0.6092, "step": 136110 }, { "epoch": 1.51, "learning_rate": 2.4880599108276494e-05, "loss": 0.6896, "step": 136115 }, { "epoch": 1.51, "learning_rate": 2.487967638113798e-05, "loss": 0.6323, "step": 136120 }, { "epoch": 1.51, "learning_rate": 2.487875365399947e-05, "loss": 0.6683, "step": 136125 }, { "epoch": 1.51, "learning_rate": 2.4877830926860957e-05, "loss": 0.6235, "step": 136130 }, { "epoch": 1.51, "learning_rate": 2.4876908199722445e-05, "loss": 0.6244, "step": 136135 }, { "epoch": 1.51, "learning_rate": 2.4875985472583933e-05, "loss": 0.6472, "step": 136140 }, { "epoch": 1.51, "learning_rate": 2.487506274544542e-05, "loss": 0.6419, "step": 136145 }, { "epoch": 1.51, "learning_rate": 2.487414001830691e-05, "loss": 0.6129, "step": 136150 }, { "epoch": 1.51, "learning_rate": 2.4873217291168396e-05, "loss": 0.6994, "step": 136155 }, { "epoch": 1.51, "learning_rate": 2.4872294564029884e-05, "loss": 0.6143, "step": 136160 }, { "epoch": 1.51, "learning_rate": 2.4871371836891372e-05, "loss": 0.6673, "step": 136165 }, { "epoch": 1.51, "learning_rate": 2.4870449109752857e-05, "loss": 0.6772, "step": 136170 }, { "epoch": 1.51, "learning_rate": 2.4869526382614344e-05, "loss": 0.6357, "step": 136175 }, { "epoch": 1.51, "learning_rate": 2.4868603655475832e-05, "loss": 0.6428, "step": 136180 }, { "epoch": 1.51, "learning_rate": 2.486768092833732e-05, "loss": 0.6598, "step": 136185 }, { "epoch": 1.51, "learning_rate": 2.4866758201198808e-05, "loss": 0.6453, "step": 136190 }, { "epoch": 1.51, "learning_rate": 2.4865835474060296e-05, "loss": 0.6503, "step": 136195 }, { "epoch": 1.51, "learning_rate": 2.4864912746921784e-05, "loss": 0.6155, "step": 136200 }, { "epoch": 1.51, "learning_rate": 2.486399001978327e-05, "loss": 0.6486, "step": 136205 }, { "epoch": 1.51, "learning_rate": 2.486306729264476e-05, "loss": 0.6212, "step": 136210 }, { "epoch": 1.51, "learning_rate": 2.4862144565506247e-05, "loss": 0.6344, "step": 136215 }, { "epoch": 1.51, "learning_rate": 2.4861221838367735e-05, "loss": 0.7032, "step": 136220 }, { "epoch": 1.51, "learning_rate": 2.486029911122922e-05, "loss": 0.638, "step": 136225 }, { "epoch": 1.51, "learning_rate": 2.485937638409071e-05, "loss": 0.6533, "step": 136230 }, { "epoch": 1.51, "learning_rate": 2.4858453656952195e-05, "loss": 0.6457, "step": 136235 }, { "epoch": 1.51, "learning_rate": 2.4857530929813686e-05, "loss": 0.6514, "step": 136240 }, { "epoch": 1.51, "learning_rate": 2.485660820267517e-05, "loss": 0.6122, "step": 136245 }, { "epoch": 1.51, "learning_rate": 2.485568547553666e-05, "loss": 0.6575, "step": 136250 }, { "epoch": 1.51, "learning_rate": 2.4854762748398147e-05, "loss": 0.6221, "step": 136255 }, { "epoch": 1.51, "learning_rate": 2.4853840021259634e-05, "loss": 0.6504, "step": 136260 }, { "epoch": 1.51, "learning_rate": 2.4852917294121122e-05, "loss": 0.6669, "step": 136265 }, { "epoch": 1.51, "learning_rate": 2.485199456698261e-05, "loss": 0.6458, "step": 136270 }, { "epoch": 1.51, "learning_rate": 2.4851071839844098e-05, "loss": 0.6277, "step": 136275 }, { "epoch": 1.51, "learning_rate": 2.4850149112705582e-05, "loss": 0.7063, "step": 136280 }, { "epoch": 1.51, "learning_rate": 2.4849226385567074e-05, "loss": 0.6347, "step": 136285 }, { "epoch": 1.51, "learning_rate": 2.4848303658428558e-05, "loss": 0.5951, "step": 136290 }, { "epoch": 1.51, "learning_rate": 2.484738093129005e-05, "loss": 0.6636, "step": 136295 }, { "epoch": 1.51, "learning_rate": 2.4846458204151534e-05, "loss": 0.5848, "step": 136300 }, { "epoch": 1.51, "learning_rate": 2.4845535477013025e-05, "loss": 0.6187, "step": 136305 }, { "epoch": 1.51, "learning_rate": 2.484461274987451e-05, "loss": 0.6535, "step": 136310 }, { "epoch": 1.51, "learning_rate": 2.4843690022736e-05, "loss": 0.6349, "step": 136315 }, { "epoch": 1.51, "learning_rate": 2.4842767295597485e-05, "loss": 0.6236, "step": 136320 }, { "epoch": 1.51, "learning_rate": 2.4841844568458973e-05, "loss": 0.6195, "step": 136325 }, { "epoch": 1.51, "learning_rate": 2.484092184132046e-05, "loss": 0.6332, "step": 136330 }, { "epoch": 1.51, "learning_rate": 2.483999911418195e-05, "loss": 0.614, "step": 136335 }, { "epoch": 1.51, "learning_rate": 2.4839076387043437e-05, "loss": 0.6386, "step": 136340 }, { "epoch": 1.51, "learning_rate": 2.483815365990492e-05, "loss": 0.6506, "step": 136345 }, { "epoch": 1.51, "learning_rate": 2.4837230932766412e-05, "loss": 0.5631, "step": 136350 }, { "epoch": 1.51, "learning_rate": 2.4836308205627897e-05, "loss": 0.6371, "step": 136355 }, { "epoch": 1.51, "learning_rate": 2.4835385478489388e-05, "loss": 0.5943, "step": 136360 }, { "epoch": 1.51, "learning_rate": 2.4834462751350872e-05, "loss": 0.5875, "step": 136365 }, { "epoch": 1.51, "learning_rate": 2.4833540024212364e-05, "loss": 0.6657, "step": 136370 }, { "epoch": 1.51, "learning_rate": 2.4832617297073848e-05, "loss": 0.6246, "step": 136375 }, { "epoch": 1.51, "learning_rate": 2.4831694569935336e-05, "loss": 0.6916, "step": 136380 }, { "epoch": 1.51, "learning_rate": 2.4830771842796824e-05, "loss": 0.6538, "step": 136385 }, { "epoch": 1.51, "learning_rate": 2.4829849115658312e-05, "loss": 0.5846, "step": 136390 }, { "epoch": 1.51, "learning_rate": 2.48289263885198e-05, "loss": 0.6618, "step": 136395 }, { "epoch": 1.51, "learning_rate": 2.4828003661381284e-05, "loss": 0.6411, "step": 136400 }, { "epoch": 1.51, "learning_rate": 2.4827080934242775e-05, "loss": 0.6755, "step": 136405 }, { "epoch": 1.51, "learning_rate": 2.482615820710426e-05, "loss": 0.6593, "step": 136410 }, { "epoch": 1.51, "learning_rate": 2.482523547996575e-05, "loss": 0.597, "step": 136415 }, { "epoch": 1.51, "learning_rate": 2.4824312752827235e-05, "loss": 0.6578, "step": 136420 }, { "epoch": 1.51, "learning_rate": 2.4823390025688727e-05, "loss": 0.6863, "step": 136425 }, { "epoch": 1.51, "learning_rate": 2.482246729855021e-05, "loss": 0.6247, "step": 136430 }, { "epoch": 1.51, "learning_rate": 2.48215445714117e-05, "loss": 0.5616, "step": 136435 }, { "epoch": 1.51, "learning_rate": 2.4820621844273187e-05, "loss": 0.6763, "step": 136440 }, { "epoch": 1.51, "learning_rate": 2.4819699117134675e-05, "loss": 0.6852, "step": 136445 }, { "epoch": 1.51, "learning_rate": 2.4818776389996163e-05, "loss": 0.6726, "step": 136450 }, { "epoch": 1.51, "learning_rate": 2.481785366285765e-05, "loss": 0.6116, "step": 136455 }, { "epoch": 1.51, "learning_rate": 2.4816930935719138e-05, "loss": 0.6991, "step": 136460 }, { "epoch": 1.51, "learning_rate": 2.4816008208580626e-05, "loss": 0.6509, "step": 136465 }, { "epoch": 1.51, "learning_rate": 2.4815085481442114e-05, "loss": 0.5991, "step": 136470 }, { "epoch": 1.51, "learning_rate": 2.48141627543036e-05, "loss": 0.683, "step": 136475 }, { "epoch": 1.51, "learning_rate": 2.481324002716509e-05, "loss": 0.6452, "step": 136480 }, { "epoch": 1.51, "learning_rate": 2.4812317300026574e-05, "loss": 0.6926, "step": 136485 }, { "epoch": 1.51, "learning_rate": 2.4811394572888065e-05, "loss": 0.6224, "step": 136490 }, { "epoch": 1.51, "learning_rate": 2.481047184574955e-05, "loss": 0.5938, "step": 136495 }, { "epoch": 1.51, "learning_rate": 2.4809549118611038e-05, "loss": 0.623, "step": 136500 }, { "epoch": 1.51, "learning_rate": 2.4808626391472525e-05, "loss": 0.6498, "step": 136505 }, { "epoch": 1.51, "learning_rate": 2.4807703664334013e-05, "loss": 0.6355, "step": 136510 }, { "epoch": 1.51, "learning_rate": 2.48067809371955e-05, "loss": 0.6431, "step": 136515 }, { "epoch": 1.51, "learning_rate": 2.480585821005699e-05, "loss": 0.6568, "step": 136520 }, { "epoch": 1.51, "learning_rate": 2.4804935482918477e-05, "loss": 0.6641, "step": 136525 }, { "epoch": 1.51, "learning_rate": 2.4804012755779965e-05, "loss": 0.6799, "step": 136530 }, { "epoch": 1.51, "learning_rate": 2.4803090028641453e-05, "loss": 0.626, "step": 136535 }, { "epoch": 1.51, "learning_rate": 2.480216730150294e-05, "loss": 0.5901, "step": 136540 }, { "epoch": 1.51, "learning_rate": 2.4801244574364428e-05, "loss": 0.6238, "step": 136545 }, { "epoch": 1.51, "learning_rate": 2.4800321847225913e-05, "loss": 0.6542, "step": 136550 }, { "epoch": 1.51, "learning_rate": 2.47993991200874e-05, "loss": 0.6526, "step": 136555 }, { "epoch": 1.51, "learning_rate": 2.479847639294889e-05, "loss": 0.7119, "step": 136560 }, { "epoch": 1.51, "learning_rate": 2.4797553665810376e-05, "loss": 0.6325, "step": 136565 }, { "epoch": 1.51, "learning_rate": 2.4796630938671864e-05, "loss": 0.625, "step": 136570 }, { "epoch": 1.51, "learning_rate": 2.4795708211533352e-05, "loss": 0.6559, "step": 136575 }, { "epoch": 1.51, "learning_rate": 2.479478548439484e-05, "loss": 0.7054, "step": 136580 }, { "epoch": 1.51, "learning_rate": 2.4793862757256328e-05, "loss": 0.6425, "step": 136585 }, { "epoch": 1.51, "learning_rate": 2.4792940030117815e-05, "loss": 0.6985, "step": 136590 }, { "epoch": 1.51, "learning_rate": 2.4792017302979303e-05, "loss": 0.6816, "step": 136595 }, { "epoch": 1.51, "learning_rate": 2.479109457584079e-05, "loss": 0.5551, "step": 136600 }, { "epoch": 1.51, "learning_rate": 2.479017184870228e-05, "loss": 0.6625, "step": 136605 }, { "epoch": 1.51, "learning_rate": 2.4789249121563763e-05, "loss": 0.6748, "step": 136610 }, { "epoch": 1.51, "learning_rate": 2.4788326394425255e-05, "loss": 0.626, "step": 136615 }, { "epoch": 1.51, "learning_rate": 2.478740366728674e-05, "loss": 0.672, "step": 136620 }, { "epoch": 1.51, "learning_rate": 2.4786480940148227e-05, "loss": 0.6449, "step": 136625 }, { "epoch": 1.51, "learning_rate": 2.4785558213009715e-05, "loss": 0.6433, "step": 136630 }, { "epoch": 1.51, "learning_rate": 2.4784635485871203e-05, "loss": 0.6109, "step": 136635 }, { "epoch": 1.51, "learning_rate": 2.478371275873269e-05, "loss": 0.6411, "step": 136640 }, { "epoch": 1.51, "learning_rate": 2.478279003159418e-05, "loss": 0.6403, "step": 136645 }, { "epoch": 1.51, "learning_rate": 2.4781867304455666e-05, "loss": 0.6088, "step": 136650 }, { "epoch": 1.51, "learning_rate": 2.4780944577317154e-05, "loss": 0.6711, "step": 136655 }, { "epoch": 1.51, "learning_rate": 2.4780021850178642e-05, "loss": 0.6821, "step": 136660 }, { "epoch": 1.51, "learning_rate": 2.477909912304013e-05, "loss": 0.5756, "step": 136665 }, { "epoch": 1.51, "learning_rate": 2.4778176395901618e-05, "loss": 0.6204, "step": 136670 }, { "epoch": 1.51, "learning_rate": 2.4777253668763102e-05, "loss": 0.6472, "step": 136675 }, { "epoch": 1.51, "learning_rate": 2.4776330941624593e-05, "loss": 0.6018, "step": 136680 }, { "epoch": 1.51, "learning_rate": 2.4775408214486078e-05, "loss": 0.6015, "step": 136685 }, { "epoch": 1.51, "learning_rate": 2.477448548734757e-05, "loss": 0.6136, "step": 136690 }, { "epoch": 1.51, "learning_rate": 2.4773562760209054e-05, "loss": 0.6495, "step": 136695 }, { "epoch": 1.51, "learning_rate": 2.477264003307054e-05, "loss": 0.6697, "step": 136700 }, { "epoch": 1.51, "learning_rate": 2.477171730593203e-05, "loss": 0.6593, "step": 136705 }, { "epoch": 1.51, "learning_rate": 2.4770794578793517e-05, "loss": 0.651, "step": 136710 }, { "epoch": 1.51, "learning_rate": 2.4769871851655005e-05, "loss": 0.6868, "step": 136715 }, { "epoch": 1.51, "learning_rate": 2.4768949124516493e-05, "loss": 0.5854, "step": 136720 }, { "epoch": 1.51, "learning_rate": 2.476802639737798e-05, "loss": 0.6666, "step": 136725 }, { "epoch": 1.51, "learning_rate": 2.4767103670239465e-05, "loss": 0.6485, "step": 136730 }, { "epoch": 1.51, "learning_rate": 2.4766180943100956e-05, "loss": 0.6711, "step": 136735 }, { "epoch": 1.51, "learning_rate": 2.476525821596244e-05, "loss": 0.6527, "step": 136740 }, { "epoch": 1.51, "learning_rate": 2.4764335488823932e-05, "loss": 0.6605, "step": 136745 }, { "epoch": 1.51, "learning_rate": 2.4763412761685416e-05, "loss": 0.5916, "step": 136750 }, { "epoch": 1.51, "learning_rate": 2.4762490034546908e-05, "loss": 0.6446, "step": 136755 }, { "epoch": 1.51, "learning_rate": 2.4761567307408392e-05, "loss": 0.6301, "step": 136760 }, { "epoch": 1.51, "learning_rate": 2.476064458026988e-05, "loss": 0.6044, "step": 136765 }, { "epoch": 1.51, "learning_rate": 2.4759721853131368e-05, "loss": 0.5825, "step": 136770 }, { "epoch": 1.51, "learning_rate": 2.4758799125992856e-05, "loss": 0.6078, "step": 136775 }, { "epoch": 1.51, "learning_rate": 2.4757876398854344e-05, "loss": 0.6564, "step": 136780 }, { "epoch": 1.51, "learning_rate": 2.4756953671715828e-05, "loss": 0.7049, "step": 136785 }, { "epoch": 1.51, "learning_rate": 2.475603094457732e-05, "loss": 0.6509, "step": 136790 }, { "epoch": 1.51, "learning_rate": 2.4755108217438804e-05, "loss": 0.6694, "step": 136795 }, { "epoch": 1.51, "learning_rate": 2.4754185490300295e-05, "loss": 0.677, "step": 136800 }, { "epoch": 1.51, "learning_rate": 2.475326276316178e-05, "loss": 0.6403, "step": 136805 }, { "epoch": 1.51, "learning_rate": 2.475234003602327e-05, "loss": 0.6332, "step": 136810 }, { "epoch": 1.51, "learning_rate": 2.4751417308884755e-05, "loss": 0.6438, "step": 136815 }, { "epoch": 1.51, "learning_rate": 2.4750494581746246e-05, "loss": 0.6019, "step": 136820 }, { "epoch": 1.52, "learning_rate": 2.474957185460773e-05, "loss": 0.6722, "step": 136825 }, { "epoch": 1.52, "learning_rate": 2.474864912746922e-05, "loss": 0.6497, "step": 136830 }, { "epoch": 1.52, "learning_rate": 2.4747726400330707e-05, "loss": 0.6249, "step": 136835 }, { "epoch": 1.52, "learning_rate": 2.4746803673192194e-05, "loss": 0.6716, "step": 136840 }, { "epoch": 1.52, "learning_rate": 2.4745880946053682e-05, "loss": 0.6305, "step": 136845 }, { "epoch": 1.52, "learning_rate": 2.4744958218915167e-05, "loss": 0.5794, "step": 136850 }, { "epoch": 1.52, "learning_rate": 2.4744035491776658e-05, "loss": 0.6512, "step": 136855 }, { "epoch": 1.52, "learning_rate": 2.4743112764638142e-05, "loss": 0.6607, "step": 136860 }, { "epoch": 1.52, "learning_rate": 2.4742190037499634e-05, "loss": 0.6879, "step": 136865 }, { "epoch": 1.52, "learning_rate": 2.4741267310361118e-05, "loss": 0.6604, "step": 136870 }, { "epoch": 1.52, "learning_rate": 2.474034458322261e-05, "loss": 0.6841, "step": 136875 }, { "epoch": 1.52, "learning_rate": 2.4739421856084094e-05, "loss": 0.6624, "step": 136880 }, { "epoch": 1.52, "learning_rate": 2.473849912894558e-05, "loss": 0.6482, "step": 136885 }, { "epoch": 1.52, "learning_rate": 2.473757640180707e-05, "loss": 0.6111, "step": 136890 }, { "epoch": 1.52, "learning_rate": 2.4736653674668557e-05, "loss": 0.6971, "step": 136895 }, { "epoch": 1.52, "learning_rate": 2.4735730947530045e-05, "loss": 0.7135, "step": 136900 }, { "epoch": 1.52, "learning_rate": 2.4734808220391533e-05, "loss": 0.6472, "step": 136905 }, { "epoch": 1.52, "learning_rate": 2.473388549325302e-05, "loss": 0.6752, "step": 136910 }, { "epoch": 1.52, "learning_rate": 2.473296276611451e-05, "loss": 0.5998, "step": 136915 }, { "epoch": 1.52, "learning_rate": 2.4732040038975997e-05, "loss": 0.6322, "step": 136920 }, { "epoch": 1.52, "learning_rate": 2.4731117311837484e-05, "loss": 0.6674, "step": 136925 }, { "epoch": 1.52, "learning_rate": 2.4730194584698972e-05, "loss": 0.5811, "step": 136930 }, { "epoch": 1.52, "learning_rate": 2.4729271857560457e-05, "loss": 0.6272, "step": 136935 }, { "epoch": 1.52, "learning_rate": 2.4728349130421945e-05, "loss": 0.6671, "step": 136940 }, { "epoch": 1.52, "learning_rate": 2.4727426403283432e-05, "loss": 0.6287, "step": 136945 }, { "epoch": 1.52, "learning_rate": 2.472650367614492e-05, "loss": 0.6693, "step": 136950 }, { "epoch": 1.52, "learning_rate": 2.4725580949006408e-05, "loss": 0.699, "step": 136955 }, { "epoch": 1.52, "learning_rate": 2.4724658221867896e-05, "loss": 0.679, "step": 136960 }, { "epoch": 1.52, "learning_rate": 2.4723735494729384e-05, "loss": 0.618, "step": 136965 }, { "epoch": 1.52, "learning_rate": 2.472281276759087e-05, "loss": 0.6512, "step": 136970 }, { "epoch": 1.52, "learning_rate": 2.472189004045236e-05, "loss": 0.6933, "step": 136975 }, { "epoch": 1.52, "learning_rate": 2.4720967313313847e-05, "loss": 0.7073, "step": 136980 }, { "epoch": 1.52, "learning_rate": 2.4720044586175335e-05, "loss": 0.6254, "step": 136985 }, { "epoch": 1.52, "learning_rate": 2.4719121859036823e-05, "loss": 0.6275, "step": 136990 }, { "epoch": 1.52, "learning_rate": 2.4718199131898308e-05, "loss": 0.6646, "step": 136995 }, { "epoch": 1.52, "learning_rate": 2.47172764047598e-05, "loss": 0.6724, "step": 137000 }, { "epoch": 1.52, "eval_loss": 0.6085899472236633, "eval_runtime": 69.2552, "eval_samples_per_second": 28.879, "eval_steps_per_second": 14.439, "step": 137000 }, { "epoch": 1.52, "learning_rate": 2.4716353677621283e-05, "loss": 0.6086, "step": 137005 }, { "epoch": 1.52, "learning_rate": 2.471543095048277e-05, "loss": 0.6228, "step": 137010 }, { "epoch": 1.52, "learning_rate": 2.471450822334426e-05, "loss": 0.6192, "step": 137015 }, { "epoch": 1.52, "learning_rate": 2.4713585496205747e-05, "loss": 0.655, "step": 137020 }, { "epoch": 1.52, "learning_rate": 2.4712662769067235e-05, "loss": 0.6291, "step": 137025 }, { "epoch": 1.52, "learning_rate": 2.4711740041928722e-05, "loss": 0.7524, "step": 137030 }, { "epoch": 1.52, "learning_rate": 2.471081731479021e-05, "loss": 0.607, "step": 137035 }, { "epoch": 1.52, "learning_rate": 2.4709894587651698e-05, "loss": 0.6692, "step": 137040 }, { "epoch": 1.52, "learning_rate": 2.4708971860513186e-05, "loss": 0.694, "step": 137045 }, { "epoch": 1.52, "learning_rate": 2.4708049133374674e-05, "loss": 0.6109, "step": 137050 }, { "epoch": 1.52, "learning_rate": 2.4707126406236162e-05, "loss": 0.6475, "step": 137055 }, { "epoch": 1.52, "learning_rate": 2.4706203679097646e-05, "loss": 0.6242, "step": 137060 }, { "epoch": 1.52, "learning_rate": 2.4705280951959137e-05, "loss": 0.5901, "step": 137065 }, { "epoch": 1.52, "learning_rate": 2.4704358224820622e-05, "loss": 0.6566, "step": 137070 }, { "epoch": 1.52, "learning_rate": 2.4703435497682113e-05, "loss": 0.6345, "step": 137075 }, { "epoch": 1.52, "learning_rate": 2.4702512770543598e-05, "loss": 0.65, "step": 137080 }, { "epoch": 1.52, "learning_rate": 2.4701590043405085e-05, "loss": 0.6395, "step": 137085 }, { "epoch": 1.52, "learning_rate": 2.4700667316266573e-05, "loss": 0.6113, "step": 137090 }, { "epoch": 1.52, "learning_rate": 2.469974458912806e-05, "loss": 0.6184, "step": 137095 }, { "epoch": 1.52, "learning_rate": 2.469882186198955e-05, "loss": 0.763, "step": 137100 }, { "epoch": 1.52, "learning_rate": 2.4697899134851037e-05, "loss": 0.6391, "step": 137105 }, { "epoch": 1.52, "learning_rate": 2.4696976407712525e-05, "loss": 0.598, "step": 137110 }, { "epoch": 1.52, "learning_rate": 2.469605368057401e-05, "loss": 0.6461, "step": 137115 }, { "epoch": 1.52, "learning_rate": 2.46951309534355e-05, "loss": 0.6208, "step": 137120 }, { "epoch": 1.52, "learning_rate": 2.4694208226296985e-05, "loss": 0.7152, "step": 137125 }, { "epoch": 1.52, "learning_rate": 2.4693285499158476e-05, "loss": 0.6486, "step": 137130 }, { "epoch": 1.52, "learning_rate": 2.469236277201996e-05, "loss": 0.6681, "step": 137135 }, { "epoch": 1.52, "learning_rate": 2.4691440044881452e-05, "loss": 0.5373, "step": 137140 }, { "epoch": 1.52, "learning_rate": 2.4690517317742936e-05, "loss": 0.6651, "step": 137145 }, { "epoch": 1.52, "learning_rate": 2.4689594590604424e-05, "loss": 0.6897, "step": 137150 }, { "epoch": 1.52, "learning_rate": 2.4688671863465912e-05, "loss": 0.6359, "step": 137155 }, { "epoch": 1.52, "learning_rate": 2.46877491363274e-05, "loss": 0.5937, "step": 137160 }, { "epoch": 1.52, "learning_rate": 2.4686826409188888e-05, "loss": 0.6649, "step": 137165 }, { "epoch": 1.52, "learning_rate": 2.4685903682050372e-05, "loss": 0.6695, "step": 137170 }, { "epoch": 1.52, "learning_rate": 2.4684980954911863e-05, "loss": 0.6215, "step": 137175 }, { "epoch": 1.52, "learning_rate": 2.4684058227773348e-05, "loss": 0.6541, "step": 137180 }, { "epoch": 1.52, "learning_rate": 2.468313550063484e-05, "loss": 0.6478, "step": 137185 }, { "epoch": 1.52, "learning_rate": 2.4682212773496323e-05, "loss": 0.6283, "step": 137190 }, { "epoch": 1.52, "learning_rate": 2.4681290046357815e-05, "loss": 0.6154, "step": 137195 }, { "epoch": 1.52, "learning_rate": 2.46803673192193e-05, "loss": 0.6509, "step": 137200 }, { "epoch": 1.52, "learning_rate": 2.467944459208079e-05, "loss": 0.6496, "step": 137205 }, { "epoch": 1.52, "learning_rate": 2.4678521864942275e-05, "loss": 0.671, "step": 137210 }, { "epoch": 1.52, "learning_rate": 2.4677599137803763e-05, "loss": 0.6711, "step": 137215 }, { "epoch": 1.52, "learning_rate": 2.467667641066525e-05, "loss": 0.6284, "step": 137220 }, { "epoch": 1.52, "learning_rate": 2.467575368352674e-05, "loss": 0.6494, "step": 137225 }, { "epoch": 1.52, "learning_rate": 2.4674830956388226e-05, "loss": 0.619, "step": 137230 }, { "epoch": 1.52, "learning_rate": 2.467390822924971e-05, "loss": 0.6298, "step": 137235 }, { "epoch": 1.52, "learning_rate": 2.4672985502111202e-05, "loss": 0.7201, "step": 137240 }, { "epoch": 1.52, "learning_rate": 2.4672062774972686e-05, "loss": 0.6739, "step": 137245 }, { "epoch": 1.52, "learning_rate": 2.4671140047834178e-05, "loss": 0.6518, "step": 137250 }, { "epoch": 1.52, "learning_rate": 2.4670217320695662e-05, "loss": 0.6649, "step": 137255 }, { "epoch": 1.52, "learning_rate": 2.4669294593557153e-05, "loss": 0.5913, "step": 137260 }, { "epoch": 1.52, "learning_rate": 2.4668371866418638e-05, "loss": 0.6367, "step": 137265 }, { "epoch": 1.52, "learning_rate": 2.4667449139280126e-05, "loss": 0.6579, "step": 137270 }, { "epoch": 1.52, "learning_rate": 2.4666526412141613e-05, "loss": 0.5829, "step": 137275 }, { "epoch": 1.52, "learning_rate": 2.46656036850031e-05, "loss": 0.5744, "step": 137280 }, { "epoch": 1.52, "learning_rate": 2.466468095786459e-05, "loss": 0.6196, "step": 137285 }, { "epoch": 1.52, "learning_rate": 2.4663758230726077e-05, "loss": 0.6524, "step": 137290 }, { "epoch": 1.52, "learning_rate": 2.4662835503587565e-05, "loss": 0.6135, "step": 137295 }, { "epoch": 1.52, "learning_rate": 2.4661912776449053e-05, "loss": 0.6902, "step": 137300 }, { "epoch": 1.52, "learning_rate": 2.466099004931054e-05, "loss": 0.6467, "step": 137305 }, { "epoch": 1.52, "learning_rate": 2.4660067322172025e-05, "loss": 0.6491, "step": 137310 }, { "epoch": 1.52, "learning_rate": 2.4659144595033516e-05, "loss": 0.6149, "step": 137315 }, { "epoch": 1.52, "learning_rate": 2.4658221867895e-05, "loss": 0.6494, "step": 137320 }, { "epoch": 1.52, "learning_rate": 2.465729914075649e-05, "loss": 0.5662, "step": 137325 }, { "epoch": 1.52, "learning_rate": 2.4656376413617976e-05, "loss": 0.6882, "step": 137330 }, { "epoch": 1.52, "learning_rate": 2.4655453686479464e-05, "loss": 0.6752, "step": 137335 }, { "epoch": 1.52, "learning_rate": 2.4654530959340952e-05, "loss": 0.6578, "step": 137340 }, { "epoch": 1.52, "learning_rate": 2.465360823220244e-05, "loss": 0.6265, "step": 137345 }, { "epoch": 1.52, "learning_rate": 2.4652685505063928e-05, "loss": 0.6185, "step": 137350 }, { "epoch": 1.52, "learning_rate": 2.4651762777925416e-05, "loss": 0.6197, "step": 137355 }, { "epoch": 1.52, "learning_rate": 2.4650840050786904e-05, "loss": 0.6146, "step": 137360 }, { "epoch": 1.52, "learning_rate": 2.464991732364839e-05, "loss": 0.6196, "step": 137365 }, { "epoch": 1.52, "learning_rate": 2.464899459650988e-05, "loss": 0.6581, "step": 137370 }, { "epoch": 1.52, "learning_rate": 2.4648071869371367e-05, "loss": 0.7062, "step": 137375 }, { "epoch": 1.52, "learning_rate": 2.464714914223285e-05, "loss": 0.6175, "step": 137380 }, { "epoch": 1.52, "learning_rate": 2.464622641509434e-05, "loss": 0.5857, "step": 137385 }, { "epoch": 1.52, "learning_rate": 2.4645303687955827e-05, "loss": 0.5974, "step": 137390 }, { "epoch": 1.52, "learning_rate": 2.4644380960817315e-05, "loss": 0.6048, "step": 137395 }, { "epoch": 1.52, "learning_rate": 2.4643458233678803e-05, "loss": 0.6687, "step": 137400 }, { "epoch": 1.52, "learning_rate": 2.464253550654029e-05, "loss": 0.6945, "step": 137405 }, { "epoch": 1.52, "learning_rate": 2.464161277940178e-05, "loss": 0.6597, "step": 137410 }, { "epoch": 1.52, "learning_rate": 2.4640690052263266e-05, "loss": 0.6181, "step": 137415 }, { "epoch": 1.52, "learning_rate": 2.4639767325124754e-05, "loss": 0.6177, "step": 137420 }, { "epoch": 1.52, "learning_rate": 2.4638844597986242e-05, "loss": 0.6804, "step": 137425 }, { "epoch": 1.52, "learning_rate": 2.463792187084773e-05, "loss": 0.6397, "step": 137430 }, { "epoch": 1.52, "learning_rate": 2.4636999143709218e-05, "loss": 0.627, "step": 137435 }, { "epoch": 1.52, "learning_rate": 2.4636076416570706e-05, "loss": 0.6602, "step": 137440 }, { "epoch": 1.52, "learning_rate": 2.463515368943219e-05, "loss": 0.6123, "step": 137445 }, { "epoch": 1.52, "learning_rate": 2.463423096229368e-05, "loss": 0.6236, "step": 137450 }, { "epoch": 1.52, "learning_rate": 2.4633308235155166e-05, "loss": 0.6407, "step": 137455 }, { "epoch": 1.52, "learning_rate": 2.4632385508016654e-05, "loss": 0.6395, "step": 137460 }, { "epoch": 1.52, "learning_rate": 2.463146278087814e-05, "loss": 0.6178, "step": 137465 }, { "epoch": 1.52, "learning_rate": 2.463054005373963e-05, "loss": 0.6448, "step": 137470 }, { "epoch": 1.52, "learning_rate": 2.4629617326601117e-05, "loss": 0.643, "step": 137475 }, { "epoch": 1.52, "learning_rate": 2.4628694599462605e-05, "loss": 0.6605, "step": 137480 }, { "epoch": 1.52, "learning_rate": 2.4627771872324093e-05, "loss": 0.5991, "step": 137485 }, { "epoch": 1.52, "learning_rate": 2.462684914518558e-05, "loss": 0.6233, "step": 137490 }, { "epoch": 1.52, "learning_rate": 2.462592641804707e-05, "loss": 0.6877, "step": 137495 }, { "epoch": 1.52, "learning_rate": 2.4625003690908553e-05, "loss": 0.6772, "step": 137500 }, { "epoch": 1.52, "learning_rate": 2.4624080963770044e-05, "loss": 0.6481, "step": 137505 }, { "epoch": 1.52, "learning_rate": 2.462315823663153e-05, "loss": 0.5819, "step": 137510 }, { "epoch": 1.52, "learning_rate": 2.462223550949302e-05, "loss": 0.605, "step": 137515 }, { "epoch": 1.52, "learning_rate": 2.4621312782354505e-05, "loss": 0.6345, "step": 137520 }, { "epoch": 1.52, "learning_rate": 2.4620390055215996e-05, "loss": 0.6665, "step": 137525 }, { "epoch": 1.52, "learning_rate": 2.461946732807748e-05, "loss": 0.6314, "step": 137530 }, { "epoch": 1.52, "learning_rate": 2.4618544600938968e-05, "loss": 0.6033, "step": 137535 }, { "epoch": 1.52, "learning_rate": 2.4617621873800456e-05, "loss": 0.6404, "step": 137540 }, { "epoch": 1.52, "learning_rate": 2.4616699146661944e-05, "loss": 0.6291, "step": 137545 }, { "epoch": 1.52, "learning_rate": 2.461577641952343e-05, "loss": 0.6504, "step": 137550 }, { "epoch": 1.52, "learning_rate": 2.4614853692384916e-05, "loss": 0.6617, "step": 137555 }, { "epoch": 1.52, "learning_rate": 2.4613930965246407e-05, "loss": 0.6545, "step": 137560 }, { "epoch": 1.52, "learning_rate": 2.4613008238107892e-05, "loss": 0.6077, "step": 137565 }, { "epoch": 1.52, "learning_rate": 2.4612085510969383e-05, "loss": 0.6471, "step": 137570 }, { "epoch": 1.52, "learning_rate": 2.4611162783830867e-05, "loss": 0.6149, "step": 137575 }, { "epoch": 1.52, "learning_rate": 2.461024005669236e-05, "loss": 0.6504, "step": 137580 }, { "epoch": 1.52, "learning_rate": 2.4609317329553843e-05, "loss": 0.6529, "step": 137585 }, { "epoch": 1.52, "learning_rate": 2.4608394602415334e-05, "loss": 0.6595, "step": 137590 }, { "epoch": 1.52, "learning_rate": 2.460747187527682e-05, "loss": 0.6039, "step": 137595 }, { "epoch": 1.52, "learning_rate": 2.4606549148138307e-05, "loss": 0.6404, "step": 137600 }, { "epoch": 1.52, "learning_rate": 2.4605626420999795e-05, "loss": 0.6547, "step": 137605 }, { "epoch": 1.52, "learning_rate": 2.4604703693861282e-05, "loss": 0.6254, "step": 137610 }, { "epoch": 1.52, "learning_rate": 2.460378096672277e-05, "loss": 0.6388, "step": 137615 }, { "epoch": 1.52, "learning_rate": 2.4602858239584255e-05, "loss": 0.6477, "step": 137620 }, { "epoch": 1.52, "learning_rate": 2.4601935512445746e-05, "loss": 0.6886, "step": 137625 }, { "epoch": 1.52, "learning_rate": 2.460101278530723e-05, "loss": 0.6729, "step": 137630 }, { "epoch": 1.52, "learning_rate": 2.460009005816872e-05, "loss": 0.6365, "step": 137635 }, { "epoch": 1.52, "learning_rate": 2.4599167331030206e-05, "loss": 0.607, "step": 137640 }, { "epoch": 1.52, "learning_rate": 2.4598244603891697e-05, "loss": 0.6578, "step": 137645 }, { "epoch": 1.52, "learning_rate": 2.4597321876753182e-05, "loss": 0.6586, "step": 137650 }, { "epoch": 1.52, "learning_rate": 2.459639914961467e-05, "loss": 0.6602, "step": 137655 }, { "epoch": 1.52, "learning_rate": 2.4595476422476158e-05, "loss": 0.6157, "step": 137660 }, { "epoch": 1.52, "learning_rate": 2.4594553695337645e-05, "loss": 0.6611, "step": 137665 }, { "epoch": 1.52, "learning_rate": 2.4593630968199133e-05, "loss": 0.5817, "step": 137670 }, { "epoch": 1.52, "learning_rate": 2.459270824106062e-05, "loss": 0.6319, "step": 137675 }, { "epoch": 1.52, "learning_rate": 2.459178551392211e-05, "loss": 0.64, "step": 137680 }, { "epoch": 1.52, "learning_rate": 2.4590862786783593e-05, "loss": 0.6685, "step": 137685 }, { "epoch": 1.52, "learning_rate": 2.4589940059645085e-05, "loss": 0.6252, "step": 137690 }, { "epoch": 1.52, "learning_rate": 2.458901733250657e-05, "loss": 0.6722, "step": 137695 }, { "epoch": 1.52, "learning_rate": 2.458809460536806e-05, "loss": 0.6447, "step": 137700 }, { "epoch": 1.52, "learning_rate": 2.4587171878229545e-05, "loss": 0.6564, "step": 137705 }, { "epoch": 1.52, "learning_rate": 2.4586249151091033e-05, "loss": 0.6454, "step": 137710 }, { "epoch": 1.52, "learning_rate": 2.458532642395252e-05, "loss": 0.6715, "step": 137715 }, { "epoch": 1.52, "learning_rate": 2.4584403696814008e-05, "loss": 0.5898, "step": 137720 }, { "epoch": 1.52, "learning_rate": 2.4583480969675496e-05, "loss": 0.6286, "step": 137725 }, { "epoch": 1.53, "learning_rate": 2.4582558242536984e-05, "loss": 0.6639, "step": 137730 }, { "epoch": 1.53, "learning_rate": 2.4581635515398472e-05, "loss": 0.6303, "step": 137735 }, { "epoch": 1.53, "learning_rate": 2.458071278825996e-05, "loss": 0.6196, "step": 137740 }, { "epoch": 1.53, "learning_rate": 2.4579790061121448e-05, "loss": 0.6651, "step": 137745 }, { "epoch": 1.53, "learning_rate": 2.4578867333982935e-05, "loss": 0.6246, "step": 137750 }, { "epoch": 1.53, "learning_rate": 2.4577944606844423e-05, "loss": 0.5964, "step": 137755 }, { "epoch": 1.53, "learning_rate": 2.457702187970591e-05, "loss": 0.6622, "step": 137760 }, { "epoch": 1.53, "learning_rate": 2.45760991525674e-05, "loss": 0.6592, "step": 137765 }, { "epoch": 1.53, "learning_rate": 2.4575176425428883e-05, "loss": 0.6219, "step": 137770 }, { "epoch": 1.53, "learning_rate": 2.457425369829037e-05, "loss": 0.709, "step": 137775 }, { "epoch": 1.53, "learning_rate": 2.457333097115186e-05, "loss": 0.6128, "step": 137780 }, { "epoch": 1.53, "learning_rate": 2.4572408244013347e-05, "loss": 0.6731, "step": 137785 }, { "epoch": 1.53, "learning_rate": 2.4571485516874835e-05, "loss": 0.6613, "step": 137790 }, { "epoch": 1.53, "learning_rate": 2.4570562789736323e-05, "loss": 0.6485, "step": 137795 }, { "epoch": 1.53, "learning_rate": 2.456964006259781e-05, "loss": 0.6582, "step": 137800 }, { "epoch": 1.53, "learning_rate": 2.45687173354593e-05, "loss": 0.6399, "step": 137805 }, { "epoch": 1.53, "learning_rate": 2.4567794608320786e-05, "loss": 0.6124, "step": 137810 }, { "epoch": 1.53, "learning_rate": 2.4566871881182274e-05, "loss": 0.643, "step": 137815 }, { "epoch": 1.53, "learning_rate": 2.4565949154043762e-05, "loss": 0.6379, "step": 137820 }, { "epoch": 1.53, "learning_rate": 2.456502642690525e-05, "loss": 0.6384, "step": 137825 }, { "epoch": 1.53, "learning_rate": 2.4564103699766734e-05, "loss": 0.6577, "step": 137830 }, { "epoch": 1.53, "learning_rate": 2.4563180972628225e-05, "loss": 0.6285, "step": 137835 }, { "epoch": 1.53, "learning_rate": 2.456225824548971e-05, "loss": 0.6527, "step": 137840 }, { "epoch": 1.53, "learning_rate": 2.4561335518351198e-05, "loss": 0.6834, "step": 137845 }, { "epoch": 1.53, "learning_rate": 2.4560412791212686e-05, "loss": 0.5928, "step": 137850 }, { "epoch": 1.53, "learning_rate": 2.4559490064074173e-05, "loss": 0.6214, "step": 137855 }, { "epoch": 1.53, "learning_rate": 2.455856733693566e-05, "loss": 0.6858, "step": 137860 }, { "epoch": 1.53, "learning_rate": 2.455764460979715e-05, "loss": 0.6331, "step": 137865 }, { "epoch": 1.53, "learning_rate": 2.4556721882658637e-05, "loss": 0.6675, "step": 137870 }, { "epoch": 1.53, "learning_rate": 2.4555799155520125e-05, "loss": 0.6703, "step": 137875 }, { "epoch": 1.53, "learning_rate": 2.4554876428381613e-05, "loss": 0.5963, "step": 137880 }, { "epoch": 1.53, "learning_rate": 2.4553953701243097e-05, "loss": 0.6168, "step": 137885 }, { "epoch": 1.53, "learning_rate": 2.455303097410459e-05, "loss": 0.6183, "step": 137890 }, { "epoch": 1.53, "learning_rate": 2.4552108246966073e-05, "loss": 0.6435, "step": 137895 }, { "epoch": 1.53, "learning_rate": 2.4551185519827564e-05, "loss": 0.6242, "step": 137900 }, { "epoch": 1.53, "learning_rate": 2.455026279268905e-05, "loss": 0.6164, "step": 137905 }, { "epoch": 1.53, "learning_rate": 2.454934006555054e-05, "loss": 0.6212, "step": 137910 }, { "epoch": 1.53, "learning_rate": 2.4548417338412024e-05, "loss": 0.6659, "step": 137915 }, { "epoch": 1.53, "learning_rate": 2.4547494611273512e-05, "loss": 0.6748, "step": 137920 }, { "epoch": 1.53, "learning_rate": 2.4546571884135e-05, "loss": 0.6109, "step": 137925 }, { "epoch": 1.53, "learning_rate": 2.4545649156996488e-05, "loss": 0.6323, "step": 137930 }, { "epoch": 1.53, "learning_rate": 2.4544726429857976e-05, "loss": 0.6224, "step": 137935 }, { "epoch": 1.53, "learning_rate": 2.454380370271946e-05, "loss": 0.6501, "step": 137940 }, { "epoch": 1.53, "learning_rate": 2.454288097558095e-05, "loss": 0.6694, "step": 137945 }, { "epoch": 1.53, "learning_rate": 2.4541958248442436e-05, "loss": 0.6171, "step": 137950 }, { "epoch": 1.53, "learning_rate": 2.4541035521303927e-05, "loss": 0.6776, "step": 137955 }, { "epoch": 1.53, "learning_rate": 2.454011279416541e-05, "loss": 0.5867, "step": 137960 }, { "epoch": 1.53, "learning_rate": 2.4539190067026903e-05, "loss": 0.6243, "step": 137965 }, { "epoch": 1.53, "learning_rate": 2.4538267339888387e-05, "loss": 0.5987, "step": 137970 }, { "epoch": 1.53, "learning_rate": 2.453734461274988e-05, "loss": 0.662, "step": 137975 }, { "epoch": 1.53, "learning_rate": 2.4536421885611363e-05, "loss": 0.604, "step": 137980 }, { "epoch": 1.53, "learning_rate": 2.453549915847285e-05, "loss": 0.6855, "step": 137985 }, { "epoch": 1.53, "learning_rate": 2.453457643133434e-05, "loss": 0.6638, "step": 137990 }, { "epoch": 1.53, "learning_rate": 2.4533653704195826e-05, "loss": 0.6124, "step": 137995 }, { "epoch": 1.53, "learning_rate": 2.4532730977057314e-05, "loss": 0.6367, "step": 138000 }, { "epoch": 1.53, "eval_loss": 0.641386091709137, "eval_runtime": 69.2657, "eval_samples_per_second": 28.874, "eval_steps_per_second": 14.437, "step": 138000 }, { "epoch": 1.53, "learning_rate": 2.45318082499188e-05, "loss": 0.6473, "step": 138005 }, { "epoch": 1.53, "learning_rate": 2.453088552278029e-05, "loss": 0.6442, "step": 138010 }, { "epoch": 1.53, "learning_rate": 2.4529962795641774e-05, "loss": 0.6192, "step": 138015 }, { "epoch": 1.53, "learning_rate": 2.4529040068503266e-05, "loss": 0.6522, "step": 138020 }, { "epoch": 1.53, "learning_rate": 2.452811734136475e-05, "loss": 0.5963, "step": 138025 }, { "epoch": 1.53, "learning_rate": 2.452719461422624e-05, "loss": 0.6367, "step": 138030 }, { "epoch": 1.53, "learning_rate": 2.4526271887087726e-05, "loss": 0.5709, "step": 138035 }, { "epoch": 1.53, "learning_rate": 2.4525349159949214e-05, "loss": 0.6429, "step": 138040 }, { "epoch": 1.53, "learning_rate": 2.45244264328107e-05, "loss": 0.6491, "step": 138045 }, { "epoch": 1.53, "learning_rate": 2.452350370567219e-05, "loss": 0.5932, "step": 138050 }, { "epoch": 1.53, "learning_rate": 2.4522580978533677e-05, "loss": 0.6137, "step": 138055 }, { "epoch": 1.53, "learning_rate": 2.4521658251395165e-05, "loss": 0.6218, "step": 138060 }, { "epoch": 1.53, "learning_rate": 2.4520735524256653e-05, "loss": 0.6557, "step": 138065 }, { "epoch": 1.53, "learning_rate": 2.4519812797118137e-05, "loss": 0.6208, "step": 138070 }, { "epoch": 1.53, "learning_rate": 2.451889006997963e-05, "loss": 0.6725, "step": 138075 }, { "epoch": 1.53, "learning_rate": 2.4517967342841113e-05, "loss": 0.603, "step": 138080 }, { "epoch": 1.53, "learning_rate": 2.4517044615702604e-05, "loss": 0.6248, "step": 138085 }, { "epoch": 1.53, "learning_rate": 2.451612188856409e-05, "loss": 0.6291, "step": 138090 }, { "epoch": 1.53, "learning_rate": 2.4515199161425577e-05, "loss": 0.5838, "step": 138095 }, { "epoch": 1.53, "learning_rate": 2.4514276434287064e-05, "loss": 0.6057, "step": 138100 }, { "epoch": 1.53, "learning_rate": 2.4513353707148552e-05, "loss": 0.6534, "step": 138105 }, { "epoch": 1.53, "learning_rate": 2.451243098001004e-05, "loss": 0.6275, "step": 138110 }, { "epoch": 1.53, "learning_rate": 2.4511508252871528e-05, "loss": 0.657, "step": 138115 }, { "epoch": 1.53, "learning_rate": 2.4510585525733016e-05, "loss": 0.6712, "step": 138120 }, { "epoch": 1.53, "learning_rate": 2.4509662798594504e-05, "loss": 0.6266, "step": 138125 }, { "epoch": 1.53, "learning_rate": 2.450874007145599e-05, "loss": 0.616, "step": 138130 }, { "epoch": 1.53, "learning_rate": 2.450781734431748e-05, "loss": 0.585, "step": 138135 }, { "epoch": 1.53, "learning_rate": 2.4506894617178967e-05, "loss": 0.6778, "step": 138140 }, { "epoch": 1.53, "learning_rate": 2.4505971890040452e-05, "loss": 0.5914, "step": 138145 }, { "epoch": 1.53, "learning_rate": 2.4505049162901943e-05, "loss": 0.609, "step": 138150 }, { "epoch": 1.53, "learning_rate": 2.4504126435763427e-05, "loss": 0.6613, "step": 138155 }, { "epoch": 1.53, "learning_rate": 2.4503203708624915e-05, "loss": 0.6244, "step": 138160 }, { "epoch": 1.53, "learning_rate": 2.4502280981486403e-05, "loss": 0.66, "step": 138165 }, { "epoch": 1.53, "learning_rate": 2.450135825434789e-05, "loss": 0.6708, "step": 138170 }, { "epoch": 1.53, "learning_rate": 2.450043552720938e-05, "loss": 0.6337, "step": 138175 }, { "epoch": 1.53, "learning_rate": 2.4499512800070867e-05, "loss": 0.6618, "step": 138180 }, { "epoch": 1.53, "learning_rate": 2.4498590072932355e-05, "loss": 0.6843, "step": 138185 }, { "epoch": 1.53, "learning_rate": 2.4497667345793842e-05, "loss": 0.6444, "step": 138190 }, { "epoch": 1.53, "learning_rate": 2.449674461865533e-05, "loss": 0.6423, "step": 138195 }, { "epoch": 1.53, "learning_rate": 2.4495821891516818e-05, "loss": 0.6569, "step": 138200 }, { "epoch": 1.53, "learning_rate": 2.4494899164378306e-05, "loss": 0.6535, "step": 138205 }, { "epoch": 1.53, "learning_rate": 2.4493976437239794e-05, "loss": 0.7063, "step": 138210 }, { "epoch": 1.53, "learning_rate": 2.4493053710101278e-05, "loss": 0.6453, "step": 138215 }, { "epoch": 1.53, "learning_rate": 2.4492130982962766e-05, "loss": 0.6675, "step": 138220 }, { "epoch": 1.53, "learning_rate": 2.4491208255824254e-05, "loss": 0.6339, "step": 138225 }, { "epoch": 1.53, "learning_rate": 2.4490285528685742e-05, "loss": 0.6092, "step": 138230 }, { "epoch": 1.53, "learning_rate": 2.448936280154723e-05, "loss": 0.6803, "step": 138235 }, { "epoch": 1.53, "learning_rate": 2.4488440074408717e-05, "loss": 0.6705, "step": 138240 }, { "epoch": 1.53, "learning_rate": 2.4487517347270205e-05, "loss": 0.6055, "step": 138245 }, { "epoch": 1.53, "learning_rate": 2.4486594620131693e-05, "loss": 0.653, "step": 138250 }, { "epoch": 1.53, "learning_rate": 2.448567189299318e-05, "loss": 0.671, "step": 138255 }, { "epoch": 1.53, "learning_rate": 2.448474916585467e-05, "loss": 0.7002, "step": 138260 }, { "epoch": 1.53, "learning_rate": 2.4483826438716157e-05, "loss": 0.6371, "step": 138265 }, { "epoch": 1.53, "learning_rate": 2.448290371157764e-05, "loss": 0.6748, "step": 138270 }, { "epoch": 1.53, "learning_rate": 2.4481980984439132e-05, "loss": 0.6862, "step": 138275 }, { "epoch": 1.53, "learning_rate": 2.4481058257300617e-05, "loss": 0.6657, "step": 138280 }, { "epoch": 1.53, "learning_rate": 2.4480135530162108e-05, "loss": 0.6627, "step": 138285 }, { "epoch": 1.53, "learning_rate": 2.4479212803023593e-05, "loss": 0.658, "step": 138290 }, { "epoch": 1.53, "learning_rate": 2.447829007588508e-05, "loss": 0.6445, "step": 138295 }, { "epoch": 1.53, "learning_rate": 2.4477367348746568e-05, "loss": 0.6442, "step": 138300 }, { "epoch": 1.53, "learning_rate": 2.4476444621608056e-05, "loss": 0.608, "step": 138305 }, { "epoch": 1.53, "learning_rate": 2.4475521894469544e-05, "loss": 0.6698, "step": 138310 }, { "epoch": 1.53, "learning_rate": 2.4474599167331032e-05, "loss": 0.5648, "step": 138315 }, { "epoch": 1.53, "learning_rate": 2.447367644019252e-05, "loss": 0.6159, "step": 138320 }, { "epoch": 1.53, "learning_rate": 2.4472753713054004e-05, "loss": 0.6141, "step": 138325 }, { "epoch": 1.53, "learning_rate": 2.4471830985915495e-05, "loss": 0.6204, "step": 138330 }, { "epoch": 1.53, "learning_rate": 2.447090825877698e-05, "loss": 0.6207, "step": 138335 }, { "epoch": 1.53, "learning_rate": 2.446998553163847e-05, "loss": 0.5875, "step": 138340 }, { "epoch": 1.53, "learning_rate": 2.4469062804499956e-05, "loss": 0.6027, "step": 138345 }, { "epoch": 1.53, "learning_rate": 2.4468140077361447e-05, "loss": 0.6567, "step": 138350 }, { "epoch": 1.53, "learning_rate": 2.446721735022293e-05, "loss": 0.6472, "step": 138355 }, { "epoch": 1.53, "learning_rate": 2.4466294623084422e-05, "loss": 0.6595, "step": 138360 }, { "epoch": 1.53, "learning_rate": 2.4465371895945907e-05, "loss": 0.6308, "step": 138365 }, { "epoch": 1.53, "learning_rate": 2.4464449168807395e-05, "loss": 0.6412, "step": 138370 }, { "epoch": 1.53, "learning_rate": 2.4463526441668883e-05, "loss": 0.62, "step": 138375 }, { "epoch": 1.53, "learning_rate": 2.446260371453037e-05, "loss": 0.6846, "step": 138380 }, { "epoch": 1.53, "learning_rate": 2.4461680987391858e-05, "loss": 0.6158, "step": 138385 }, { "epoch": 1.53, "learning_rate": 2.4460758260253343e-05, "loss": 0.6308, "step": 138390 }, { "epoch": 1.53, "learning_rate": 2.4459835533114834e-05, "loss": 0.6415, "step": 138395 }, { "epoch": 1.53, "learning_rate": 2.445891280597632e-05, "loss": 0.6867, "step": 138400 }, { "epoch": 1.53, "learning_rate": 2.445799007883781e-05, "loss": 0.5812, "step": 138405 }, { "epoch": 1.53, "learning_rate": 2.4457067351699294e-05, "loss": 0.6089, "step": 138410 }, { "epoch": 1.53, "learning_rate": 2.4456144624560785e-05, "loss": 0.6985, "step": 138415 }, { "epoch": 1.53, "learning_rate": 2.445522189742227e-05, "loss": 0.5905, "step": 138420 }, { "epoch": 1.53, "learning_rate": 2.4454299170283758e-05, "loss": 0.6379, "step": 138425 }, { "epoch": 1.53, "learning_rate": 2.4453376443145246e-05, "loss": 0.635, "step": 138430 }, { "epoch": 1.53, "learning_rate": 2.4452453716006733e-05, "loss": 0.6264, "step": 138435 }, { "epoch": 1.53, "learning_rate": 2.445153098886822e-05, "loss": 0.5992, "step": 138440 }, { "epoch": 1.53, "learning_rate": 2.4450608261729706e-05, "loss": 0.6581, "step": 138445 }, { "epoch": 1.53, "learning_rate": 2.4449685534591197e-05, "loss": 0.6175, "step": 138450 }, { "epoch": 1.53, "learning_rate": 2.444876280745268e-05, "loss": 0.6521, "step": 138455 }, { "epoch": 1.53, "learning_rate": 2.4447840080314173e-05, "loss": 0.6527, "step": 138460 }, { "epoch": 1.53, "learning_rate": 2.4446917353175657e-05, "loss": 0.6548, "step": 138465 }, { "epoch": 1.53, "learning_rate": 2.444599462603715e-05, "loss": 0.6312, "step": 138470 }, { "epoch": 1.53, "learning_rate": 2.4445071898898633e-05, "loss": 0.616, "step": 138475 }, { "epoch": 1.53, "learning_rate": 2.444414917176012e-05, "loss": 0.6259, "step": 138480 }, { "epoch": 1.53, "learning_rate": 2.444322644462161e-05, "loss": 0.5994, "step": 138485 }, { "epoch": 1.53, "learning_rate": 2.4442303717483096e-05, "loss": 0.6555, "step": 138490 }, { "epoch": 1.53, "learning_rate": 2.4441380990344584e-05, "loss": 0.6407, "step": 138495 }, { "epoch": 1.53, "learning_rate": 2.4440458263206072e-05, "loss": 0.6372, "step": 138500 }, { "epoch": 1.53, "learning_rate": 2.443953553606756e-05, "loss": 0.6609, "step": 138505 }, { "epoch": 1.53, "learning_rate": 2.4438612808929048e-05, "loss": 0.6114, "step": 138510 }, { "epoch": 1.53, "learning_rate": 2.4437690081790536e-05, "loss": 0.642, "step": 138515 }, { "epoch": 1.53, "learning_rate": 2.443676735465202e-05, "loss": 0.6047, "step": 138520 }, { "epoch": 1.53, "learning_rate": 2.443584462751351e-05, "loss": 0.6202, "step": 138525 }, { "epoch": 1.53, "learning_rate": 2.4434921900374996e-05, "loss": 0.6853, "step": 138530 }, { "epoch": 1.53, "learning_rate": 2.4433999173236487e-05, "loss": 0.6566, "step": 138535 }, { "epoch": 1.53, "learning_rate": 2.443307644609797e-05, "loss": 0.6654, "step": 138540 }, { "epoch": 1.53, "learning_rate": 2.443215371895946e-05, "loss": 0.6596, "step": 138545 }, { "epoch": 1.53, "learning_rate": 2.4431230991820947e-05, "loss": 0.6069, "step": 138550 }, { "epoch": 1.53, "learning_rate": 2.4430308264682435e-05, "loss": 0.6382, "step": 138555 }, { "epoch": 1.53, "learning_rate": 2.4429385537543923e-05, "loss": 0.6312, "step": 138560 }, { "epoch": 1.53, "learning_rate": 2.442846281040541e-05, "loss": 0.5756, "step": 138565 }, { "epoch": 1.53, "learning_rate": 2.44275400832669e-05, "loss": 0.6422, "step": 138570 }, { "epoch": 1.53, "learning_rate": 2.4426617356128386e-05, "loss": 0.6493, "step": 138575 }, { "epoch": 1.53, "learning_rate": 2.4425694628989874e-05, "loss": 0.6511, "step": 138580 }, { "epoch": 1.53, "learning_rate": 2.4424771901851362e-05, "loss": 0.6032, "step": 138585 }, { "epoch": 1.53, "learning_rate": 2.442384917471285e-05, "loss": 0.596, "step": 138590 }, { "epoch": 1.53, "learning_rate": 2.4422926447574338e-05, "loss": 0.6112, "step": 138595 }, { "epoch": 1.53, "learning_rate": 2.4422003720435822e-05, "loss": 0.6457, "step": 138600 }, { "epoch": 1.53, "learning_rate": 2.442108099329731e-05, "loss": 0.6149, "step": 138605 }, { "epoch": 1.53, "learning_rate": 2.4420158266158798e-05, "loss": 0.6555, "step": 138610 }, { "epoch": 1.53, "learning_rate": 2.4419235539020286e-05, "loss": 0.6298, "step": 138615 }, { "epoch": 1.53, "learning_rate": 2.4418312811881774e-05, "loss": 0.6564, "step": 138620 }, { "epoch": 1.53, "learning_rate": 2.441739008474326e-05, "loss": 0.689, "step": 138625 }, { "epoch": 1.54, "learning_rate": 2.441646735760475e-05, "loss": 0.6351, "step": 138630 }, { "epoch": 1.54, "learning_rate": 2.4415544630466237e-05, "loss": 0.6468, "step": 138635 }, { "epoch": 1.54, "learning_rate": 2.4414621903327725e-05, "loss": 0.6844, "step": 138640 }, { "epoch": 1.54, "learning_rate": 2.4413699176189213e-05, "loss": 0.6275, "step": 138645 }, { "epoch": 1.54, "learning_rate": 2.44127764490507e-05, "loss": 0.7093, "step": 138650 }, { "epoch": 1.54, "learning_rate": 2.4411853721912185e-05, "loss": 0.6491, "step": 138655 }, { "epoch": 1.54, "learning_rate": 2.4410930994773676e-05, "loss": 0.5638, "step": 138660 }, { "epoch": 1.54, "learning_rate": 2.441000826763516e-05, "loss": 0.7075, "step": 138665 }, { "epoch": 1.54, "learning_rate": 2.4409085540496652e-05, "loss": 0.6467, "step": 138670 }, { "epoch": 1.54, "learning_rate": 2.4408162813358137e-05, "loss": 0.6335, "step": 138675 }, { "epoch": 1.54, "learning_rate": 2.4407240086219624e-05, "loss": 0.6173, "step": 138680 }, { "epoch": 1.54, "learning_rate": 2.4406317359081112e-05, "loss": 0.6232, "step": 138685 }, { "epoch": 1.54, "learning_rate": 2.44053946319426e-05, "loss": 0.5985, "step": 138690 }, { "epoch": 1.54, "learning_rate": 2.4404471904804088e-05, "loss": 0.6384, "step": 138695 }, { "epoch": 1.54, "learning_rate": 2.4403549177665576e-05, "loss": 0.613, "step": 138700 }, { "epoch": 1.54, "learning_rate": 2.4402626450527064e-05, "loss": 0.6401, "step": 138705 }, { "epoch": 1.54, "learning_rate": 2.4401703723388548e-05, "loss": 0.6696, "step": 138710 }, { "epoch": 1.54, "learning_rate": 2.440078099625004e-05, "loss": 0.6333, "step": 138715 }, { "epoch": 1.54, "learning_rate": 2.4399858269111524e-05, "loss": 0.6919, "step": 138720 }, { "epoch": 1.54, "learning_rate": 2.4398935541973015e-05, "loss": 0.6379, "step": 138725 }, { "epoch": 1.54, "learning_rate": 2.43980128148345e-05, "loss": 0.5634, "step": 138730 }, { "epoch": 1.54, "learning_rate": 2.439709008769599e-05, "loss": 0.6662, "step": 138735 }, { "epoch": 1.54, "learning_rate": 2.4396167360557475e-05, "loss": 0.6143, "step": 138740 }, { "epoch": 1.54, "learning_rate": 2.4395244633418966e-05, "loss": 0.5996, "step": 138745 }, { "epoch": 1.54, "learning_rate": 2.439432190628045e-05, "loss": 0.5857, "step": 138750 }, { "epoch": 1.54, "learning_rate": 2.439339917914194e-05, "loss": 0.6415, "step": 138755 }, { "epoch": 1.54, "learning_rate": 2.4392476452003427e-05, "loss": 0.6062, "step": 138760 }, { "epoch": 1.54, "learning_rate": 2.4391553724864914e-05, "loss": 0.5998, "step": 138765 }, { "epoch": 1.54, "learning_rate": 2.4390630997726402e-05, "loss": 0.6557, "step": 138770 }, { "epoch": 1.54, "learning_rate": 2.4389708270587887e-05, "loss": 0.6213, "step": 138775 }, { "epoch": 1.54, "learning_rate": 2.4388785543449378e-05, "loss": 0.5906, "step": 138780 }, { "epoch": 1.54, "learning_rate": 2.4387862816310862e-05, "loss": 0.6754, "step": 138785 }, { "epoch": 1.54, "learning_rate": 2.4386940089172354e-05, "loss": 0.6749, "step": 138790 }, { "epoch": 1.54, "learning_rate": 2.4386017362033838e-05, "loss": 0.6272, "step": 138795 }, { "epoch": 1.54, "learning_rate": 2.438509463489533e-05, "loss": 0.6236, "step": 138800 }, { "epoch": 1.54, "learning_rate": 2.4384171907756814e-05, "loss": 0.5963, "step": 138805 }, { "epoch": 1.54, "learning_rate": 2.4383249180618302e-05, "loss": 0.6628, "step": 138810 }, { "epoch": 1.54, "learning_rate": 2.438232645347979e-05, "loss": 0.6226, "step": 138815 }, { "epoch": 1.54, "learning_rate": 2.4381403726341277e-05, "loss": 0.6328, "step": 138820 }, { "epoch": 1.54, "learning_rate": 2.4380480999202765e-05, "loss": 0.6167, "step": 138825 }, { "epoch": 1.54, "learning_rate": 2.437955827206425e-05, "loss": 0.7052, "step": 138830 }, { "epoch": 1.54, "learning_rate": 2.437863554492574e-05, "loss": 0.6122, "step": 138835 }, { "epoch": 1.54, "learning_rate": 2.4377712817787225e-05, "loss": 0.6666, "step": 138840 }, { "epoch": 1.54, "learning_rate": 2.4376790090648717e-05, "loss": 0.6738, "step": 138845 }, { "epoch": 1.54, "learning_rate": 2.43758673635102e-05, "loss": 0.6271, "step": 138850 }, { "epoch": 1.54, "learning_rate": 2.4374944636371692e-05, "loss": 0.6109, "step": 138855 }, { "epoch": 1.54, "learning_rate": 2.4374021909233177e-05, "loss": 0.7187, "step": 138860 }, { "epoch": 1.54, "learning_rate": 2.4373099182094668e-05, "loss": 0.6676, "step": 138865 }, { "epoch": 1.54, "learning_rate": 2.4372176454956153e-05, "loss": 0.6506, "step": 138870 }, { "epoch": 1.54, "learning_rate": 2.437125372781764e-05, "loss": 0.629, "step": 138875 }, { "epoch": 1.54, "learning_rate": 2.4370331000679128e-05, "loss": 0.6195, "step": 138880 }, { "epoch": 1.54, "learning_rate": 2.4369408273540616e-05, "loss": 0.6565, "step": 138885 }, { "epoch": 1.54, "learning_rate": 2.4368485546402104e-05, "loss": 0.6275, "step": 138890 }, { "epoch": 1.54, "learning_rate": 2.4367562819263592e-05, "loss": 0.6626, "step": 138895 }, { "epoch": 1.54, "learning_rate": 2.436664009212508e-05, "loss": 0.612, "step": 138900 }, { "epoch": 1.54, "learning_rate": 2.4365717364986564e-05, "loss": 0.6501, "step": 138905 }, { "epoch": 1.54, "learning_rate": 2.4364794637848055e-05, "loss": 0.6297, "step": 138910 }, { "epoch": 1.54, "learning_rate": 2.436387191070954e-05, "loss": 0.6306, "step": 138915 }, { "epoch": 1.54, "learning_rate": 2.436294918357103e-05, "loss": 0.6586, "step": 138920 }, { "epoch": 1.54, "learning_rate": 2.4362026456432515e-05, "loss": 0.6307, "step": 138925 }, { "epoch": 1.54, "learning_rate": 2.4361103729294003e-05, "loss": 0.6871, "step": 138930 }, { "epoch": 1.54, "learning_rate": 2.436018100215549e-05, "loss": 0.6821, "step": 138935 }, { "epoch": 1.54, "learning_rate": 2.435925827501698e-05, "loss": 0.6436, "step": 138940 }, { "epoch": 1.54, "learning_rate": 2.4358335547878467e-05, "loss": 0.6224, "step": 138945 }, { "epoch": 1.54, "learning_rate": 2.4357412820739955e-05, "loss": 0.6071, "step": 138950 }, { "epoch": 1.54, "learning_rate": 2.4356490093601443e-05, "loss": 0.7013, "step": 138955 }, { "epoch": 1.54, "learning_rate": 2.435556736646293e-05, "loss": 0.6963, "step": 138960 }, { "epoch": 1.54, "learning_rate": 2.4354644639324418e-05, "loss": 0.6453, "step": 138965 }, { "epoch": 1.54, "learning_rate": 2.4353721912185906e-05, "loss": 0.6918, "step": 138970 }, { "epoch": 1.54, "learning_rate": 2.4352799185047394e-05, "loss": 0.5924, "step": 138975 }, { "epoch": 1.54, "learning_rate": 2.435187645790888e-05, "loss": 0.6202, "step": 138980 }, { "epoch": 1.54, "learning_rate": 2.4350953730770366e-05, "loss": 0.6247, "step": 138985 }, { "epoch": 1.54, "learning_rate": 2.4350031003631854e-05, "loss": 0.6721, "step": 138990 }, { "epoch": 1.54, "learning_rate": 2.4349108276493342e-05, "loss": 0.5976, "step": 138995 }, { "epoch": 1.54, "learning_rate": 2.434818554935483e-05, "loss": 0.6515, "step": 139000 }, { "epoch": 1.54, "eval_loss": 0.626746654510498, "eval_runtime": 69.2834, "eval_samples_per_second": 28.867, "eval_steps_per_second": 14.433, "step": 139000 }, { "epoch": 1.54, "learning_rate": 2.4347262822216318e-05, "loss": 0.6497, "step": 139005 }, { "epoch": 1.54, "learning_rate": 2.4346340095077805e-05, "loss": 0.6741, "step": 139010 }, { "epoch": 1.54, "learning_rate": 2.4345417367939293e-05, "loss": 0.643, "step": 139015 }, { "epoch": 1.54, "learning_rate": 2.434449464080078e-05, "loss": 0.6525, "step": 139020 }, { "epoch": 1.54, "learning_rate": 2.434357191366227e-05, "loss": 0.7074, "step": 139025 }, { "epoch": 1.54, "learning_rate": 2.4342649186523757e-05, "loss": 0.6791, "step": 139030 }, { "epoch": 1.54, "learning_rate": 2.4341726459385245e-05, "loss": 0.6732, "step": 139035 }, { "epoch": 1.54, "learning_rate": 2.434080373224673e-05, "loss": 0.6233, "step": 139040 }, { "epoch": 1.54, "learning_rate": 2.433988100510822e-05, "loss": 0.647, "step": 139045 }, { "epoch": 1.54, "learning_rate": 2.4338958277969705e-05, "loss": 0.6817, "step": 139050 }, { "epoch": 1.54, "learning_rate": 2.4338035550831193e-05, "loss": 0.6517, "step": 139055 }, { "epoch": 1.54, "learning_rate": 2.433711282369268e-05, "loss": 0.629, "step": 139060 }, { "epoch": 1.54, "learning_rate": 2.433619009655417e-05, "loss": 0.7177, "step": 139065 }, { "epoch": 1.54, "learning_rate": 2.4335267369415656e-05, "loss": 0.684, "step": 139070 }, { "epoch": 1.54, "learning_rate": 2.4334344642277144e-05, "loss": 0.6562, "step": 139075 }, { "epoch": 1.54, "learning_rate": 2.4333421915138632e-05, "loss": 0.6542, "step": 139080 }, { "epoch": 1.54, "learning_rate": 2.433249918800012e-05, "loss": 0.6594, "step": 139085 }, { "epoch": 1.54, "learning_rate": 2.4331576460861608e-05, "loss": 0.601, "step": 139090 }, { "epoch": 1.54, "learning_rate": 2.4330653733723096e-05, "loss": 0.65, "step": 139095 }, { "epoch": 1.54, "learning_rate": 2.4329731006584583e-05, "loss": 0.5958, "step": 139100 }, { "epoch": 1.54, "learning_rate": 2.4328808279446068e-05, "loss": 0.6664, "step": 139105 }, { "epoch": 1.54, "learning_rate": 2.432788555230756e-05, "loss": 0.6396, "step": 139110 }, { "epoch": 1.54, "learning_rate": 2.4326962825169044e-05, "loss": 0.6001, "step": 139115 }, { "epoch": 1.54, "learning_rate": 2.4326040098030535e-05, "loss": 0.6779, "step": 139120 }, { "epoch": 1.54, "learning_rate": 2.432511737089202e-05, "loss": 0.6034, "step": 139125 }, { "epoch": 1.54, "learning_rate": 2.4324194643753507e-05, "loss": 0.7321, "step": 139130 }, { "epoch": 1.54, "learning_rate": 2.4323271916614995e-05, "loss": 0.6586, "step": 139135 }, { "epoch": 1.54, "learning_rate": 2.4322349189476483e-05, "loss": 0.6675, "step": 139140 }, { "epoch": 1.54, "learning_rate": 2.432142646233797e-05, "loss": 0.6239, "step": 139145 }, { "epoch": 1.54, "learning_rate": 2.432050373519946e-05, "loss": 0.6138, "step": 139150 }, { "epoch": 1.54, "learning_rate": 2.4319581008060946e-05, "loss": 0.6529, "step": 139155 }, { "epoch": 1.54, "learning_rate": 2.431865828092243e-05, "loss": 0.679, "step": 139160 }, { "epoch": 1.54, "learning_rate": 2.4317735553783922e-05, "loss": 0.6414, "step": 139165 }, { "epoch": 1.54, "learning_rate": 2.4316812826645406e-05, "loss": 0.6704, "step": 139170 }, { "epoch": 1.54, "learning_rate": 2.4315890099506898e-05, "loss": 0.5635, "step": 139175 }, { "epoch": 1.54, "learning_rate": 2.4314967372368382e-05, "loss": 0.6218, "step": 139180 }, { "epoch": 1.54, "learning_rate": 2.4314044645229873e-05, "loss": 0.6327, "step": 139185 }, { "epoch": 1.54, "learning_rate": 2.4313121918091358e-05, "loss": 0.7242, "step": 139190 }, { "epoch": 1.54, "learning_rate": 2.4312199190952846e-05, "loss": 0.6251, "step": 139195 }, { "epoch": 1.54, "learning_rate": 2.4311276463814334e-05, "loss": 0.642, "step": 139200 }, { "epoch": 1.54, "learning_rate": 2.431035373667582e-05, "loss": 0.6589, "step": 139205 }, { "epoch": 1.54, "learning_rate": 2.430943100953731e-05, "loss": 0.6188, "step": 139210 }, { "epoch": 1.54, "learning_rate": 2.4308508282398794e-05, "loss": 0.6474, "step": 139215 }, { "epoch": 1.54, "learning_rate": 2.4307585555260285e-05, "loss": 0.5436, "step": 139220 }, { "epoch": 1.54, "learning_rate": 2.430666282812177e-05, "loss": 0.63, "step": 139225 }, { "epoch": 1.54, "learning_rate": 2.430574010098326e-05, "loss": 0.6764, "step": 139230 }, { "epoch": 1.54, "learning_rate": 2.4304817373844745e-05, "loss": 0.6187, "step": 139235 }, { "epoch": 1.54, "learning_rate": 2.4303894646706236e-05, "loss": 0.639, "step": 139240 }, { "epoch": 1.54, "learning_rate": 2.430297191956772e-05, "loss": 0.7207, "step": 139245 }, { "epoch": 1.54, "learning_rate": 2.4302049192429212e-05, "loss": 0.6312, "step": 139250 }, { "epoch": 1.54, "learning_rate": 2.4301126465290697e-05, "loss": 0.6823, "step": 139255 }, { "epoch": 1.54, "learning_rate": 2.4300203738152184e-05, "loss": 0.6434, "step": 139260 }, { "epoch": 1.54, "learning_rate": 2.4299281011013672e-05, "loss": 0.7185, "step": 139265 }, { "epoch": 1.54, "learning_rate": 2.429835828387516e-05, "loss": 0.6309, "step": 139270 }, { "epoch": 1.54, "learning_rate": 2.4297435556736648e-05, "loss": 0.7008, "step": 139275 }, { "epoch": 1.54, "learning_rate": 2.4296512829598132e-05, "loss": 0.6256, "step": 139280 }, { "epoch": 1.54, "learning_rate": 2.4295590102459624e-05, "loss": 0.6445, "step": 139285 }, { "epoch": 1.54, "learning_rate": 2.4294667375321108e-05, "loss": 0.6394, "step": 139290 }, { "epoch": 1.54, "learning_rate": 2.42937446481826e-05, "loss": 0.6945, "step": 139295 }, { "epoch": 1.54, "learning_rate": 2.4292821921044084e-05, "loss": 0.6999, "step": 139300 }, { "epoch": 1.54, "learning_rate": 2.4291899193905575e-05, "loss": 0.6614, "step": 139305 }, { "epoch": 1.54, "learning_rate": 2.429097646676706e-05, "loss": 0.6228, "step": 139310 }, { "epoch": 1.54, "learning_rate": 2.4290053739628547e-05, "loss": 0.6567, "step": 139315 }, { "epoch": 1.54, "learning_rate": 2.4289131012490035e-05, "loss": 0.6805, "step": 139320 }, { "epoch": 1.54, "learning_rate": 2.4288208285351523e-05, "loss": 0.6182, "step": 139325 }, { "epoch": 1.54, "learning_rate": 2.428728555821301e-05, "loss": 0.5921, "step": 139330 }, { "epoch": 1.54, "learning_rate": 2.42863628310745e-05, "loss": 0.6023, "step": 139335 }, { "epoch": 1.54, "learning_rate": 2.4285440103935987e-05, "loss": 0.6655, "step": 139340 }, { "epoch": 1.54, "learning_rate": 2.4284517376797474e-05, "loss": 0.6733, "step": 139345 }, { "epoch": 1.54, "learning_rate": 2.4283594649658962e-05, "loss": 0.6738, "step": 139350 }, { "epoch": 1.54, "learning_rate": 2.4282671922520447e-05, "loss": 0.6901, "step": 139355 }, { "epoch": 1.54, "learning_rate": 2.4281749195381938e-05, "loss": 0.6325, "step": 139360 }, { "epoch": 1.54, "learning_rate": 2.4280826468243422e-05, "loss": 0.656, "step": 139365 }, { "epoch": 1.54, "learning_rate": 2.427990374110491e-05, "loss": 0.6289, "step": 139370 }, { "epoch": 1.54, "learning_rate": 2.4278981013966398e-05, "loss": 0.6507, "step": 139375 }, { "epoch": 1.54, "learning_rate": 2.4278058286827886e-05, "loss": 0.6683, "step": 139380 }, { "epoch": 1.54, "learning_rate": 2.4277135559689374e-05, "loss": 0.6539, "step": 139385 }, { "epoch": 1.54, "learning_rate": 2.427621283255086e-05, "loss": 0.6021, "step": 139390 }, { "epoch": 1.54, "learning_rate": 2.427529010541235e-05, "loss": 0.6656, "step": 139395 }, { "epoch": 1.54, "learning_rate": 2.4274367378273837e-05, "loss": 0.5966, "step": 139400 }, { "epoch": 1.54, "learning_rate": 2.4273444651135325e-05, "loss": 0.6674, "step": 139405 }, { "epoch": 1.54, "learning_rate": 2.4272521923996813e-05, "loss": 0.6514, "step": 139410 }, { "epoch": 1.54, "learning_rate": 2.42715991968583e-05, "loss": 0.6312, "step": 139415 }, { "epoch": 1.54, "learning_rate": 2.427067646971979e-05, "loss": 0.6461, "step": 139420 }, { "epoch": 1.54, "learning_rate": 2.4269753742581273e-05, "loss": 0.6589, "step": 139425 }, { "epoch": 1.54, "learning_rate": 2.4268831015442764e-05, "loss": 0.6528, "step": 139430 }, { "epoch": 1.54, "learning_rate": 2.426790828830425e-05, "loss": 0.7115, "step": 139435 }, { "epoch": 1.54, "learning_rate": 2.4266985561165737e-05, "loss": 0.6252, "step": 139440 }, { "epoch": 1.54, "learning_rate": 2.4266062834027225e-05, "loss": 0.6283, "step": 139445 }, { "epoch": 1.54, "learning_rate": 2.4265140106888712e-05, "loss": 0.6562, "step": 139450 }, { "epoch": 1.54, "learning_rate": 2.42642173797502e-05, "loss": 0.5975, "step": 139455 }, { "epoch": 1.54, "learning_rate": 2.4263294652611688e-05, "loss": 0.6411, "step": 139460 }, { "epoch": 1.54, "learning_rate": 2.4262371925473176e-05, "loss": 0.663, "step": 139465 }, { "epoch": 1.54, "learning_rate": 2.4261449198334664e-05, "loss": 0.6161, "step": 139470 }, { "epoch": 1.54, "learning_rate": 2.4260526471196152e-05, "loss": 0.6913, "step": 139475 }, { "epoch": 1.54, "learning_rate": 2.425960374405764e-05, "loss": 0.629, "step": 139480 }, { "epoch": 1.54, "learning_rate": 2.4258681016919127e-05, "loss": 0.6655, "step": 139485 }, { "epoch": 1.54, "learning_rate": 2.4257758289780612e-05, "loss": 0.6644, "step": 139490 }, { "epoch": 1.54, "learning_rate": 2.4256835562642103e-05, "loss": 0.6293, "step": 139495 }, { "epoch": 1.54, "learning_rate": 2.4255912835503588e-05, "loss": 0.6548, "step": 139500 }, { "epoch": 1.54, "learning_rate": 2.425499010836508e-05, "loss": 0.6792, "step": 139505 }, { "epoch": 1.54, "learning_rate": 2.4254067381226563e-05, "loss": 0.6203, "step": 139510 }, { "epoch": 1.54, "learning_rate": 2.425314465408805e-05, "loss": 0.6947, "step": 139515 }, { "epoch": 1.54, "learning_rate": 2.425222192694954e-05, "loss": 0.7043, "step": 139520 }, { "epoch": 1.54, "learning_rate": 2.4251299199811027e-05, "loss": 0.642, "step": 139525 }, { "epoch": 1.54, "learning_rate": 2.4250376472672515e-05, "loss": 0.6591, "step": 139530 }, { "epoch": 1.55, "learning_rate": 2.4249453745534003e-05, "loss": 0.6478, "step": 139535 }, { "epoch": 1.55, "learning_rate": 2.424853101839549e-05, "loss": 0.6031, "step": 139540 }, { "epoch": 1.55, "learning_rate": 2.4247608291256975e-05, "loss": 0.6019, "step": 139545 }, { "epoch": 1.55, "learning_rate": 2.4246685564118466e-05, "loss": 0.6125, "step": 139550 }, { "epoch": 1.55, "learning_rate": 2.424576283697995e-05, "loss": 0.6541, "step": 139555 }, { "epoch": 1.55, "learning_rate": 2.4244840109841442e-05, "loss": 0.6091, "step": 139560 }, { "epoch": 1.55, "learning_rate": 2.4243917382702926e-05, "loss": 0.631, "step": 139565 }, { "epoch": 1.55, "learning_rate": 2.4242994655564417e-05, "loss": 0.6354, "step": 139570 }, { "epoch": 1.55, "learning_rate": 2.4242071928425902e-05, "loss": 0.6593, "step": 139575 }, { "epoch": 1.55, "learning_rate": 2.424114920128739e-05, "loss": 0.6681, "step": 139580 }, { "epoch": 1.55, "learning_rate": 2.4240226474148878e-05, "loss": 0.6421, "step": 139585 }, { "epoch": 1.55, "learning_rate": 2.4239303747010365e-05, "loss": 0.6718, "step": 139590 }, { "epoch": 1.55, "learning_rate": 2.4238381019871853e-05, "loss": 0.6522, "step": 139595 }, { "epoch": 1.55, "learning_rate": 2.4237458292733338e-05, "loss": 0.6562, "step": 139600 }, { "epoch": 1.55, "learning_rate": 2.423653556559483e-05, "loss": 0.6268, "step": 139605 }, { "epoch": 1.55, "learning_rate": 2.4235612838456313e-05, "loss": 0.6933, "step": 139610 }, { "epoch": 1.55, "learning_rate": 2.4234690111317805e-05, "loss": 0.6393, "step": 139615 }, { "epoch": 1.55, "learning_rate": 2.423376738417929e-05, "loss": 0.6434, "step": 139620 }, { "epoch": 1.55, "learning_rate": 2.423284465704078e-05, "loss": 0.6419, "step": 139625 }, { "epoch": 1.55, "learning_rate": 2.4231921929902265e-05, "loss": 0.6014, "step": 139630 }, { "epoch": 1.55, "learning_rate": 2.4230999202763756e-05, "loss": 0.6449, "step": 139635 }, { "epoch": 1.55, "learning_rate": 2.423007647562524e-05, "loss": 0.6827, "step": 139640 }, { "epoch": 1.55, "learning_rate": 2.422915374848673e-05, "loss": 0.6087, "step": 139645 }, { "epoch": 1.55, "learning_rate": 2.4228231021348216e-05, "loss": 0.6493, "step": 139650 }, { "epoch": 1.55, "learning_rate": 2.4227308294209704e-05, "loss": 0.6215, "step": 139655 }, { "epoch": 1.55, "learning_rate": 2.4226385567071192e-05, "loss": 0.6361, "step": 139660 }, { "epoch": 1.55, "learning_rate": 2.4225462839932676e-05, "loss": 0.63, "step": 139665 }, { "epoch": 1.55, "learning_rate": 2.4224540112794168e-05, "loss": 0.5889, "step": 139670 }, { "epoch": 1.55, "learning_rate": 2.4223617385655652e-05, "loss": 0.6347, "step": 139675 }, { "epoch": 1.55, "learning_rate": 2.4222694658517143e-05, "loss": 0.6724, "step": 139680 }, { "epoch": 1.55, "learning_rate": 2.4221771931378628e-05, "loss": 0.6013, "step": 139685 }, { "epoch": 1.55, "learning_rate": 2.422084920424012e-05, "loss": 0.671, "step": 139690 }, { "epoch": 1.55, "learning_rate": 2.4219926477101603e-05, "loss": 0.6507, "step": 139695 }, { "epoch": 1.55, "learning_rate": 2.421900374996309e-05, "loss": 0.608, "step": 139700 }, { "epoch": 1.55, "learning_rate": 2.421808102282458e-05, "loss": 0.6371, "step": 139705 }, { "epoch": 1.55, "learning_rate": 2.4217158295686067e-05, "loss": 0.6272, "step": 139710 }, { "epoch": 1.55, "learning_rate": 2.4216235568547555e-05, "loss": 0.6501, "step": 139715 }, { "epoch": 1.55, "learning_rate": 2.4215312841409043e-05, "loss": 0.6544, "step": 139720 }, { "epoch": 1.55, "learning_rate": 2.421439011427053e-05, "loss": 0.7023, "step": 139725 }, { "epoch": 1.55, "learning_rate": 2.421346738713202e-05, "loss": 0.6257, "step": 139730 }, { "epoch": 1.55, "learning_rate": 2.4212544659993506e-05, "loss": 0.6198, "step": 139735 }, { "epoch": 1.55, "learning_rate": 2.421162193285499e-05, "loss": 0.6181, "step": 139740 }, { "epoch": 1.55, "learning_rate": 2.4210699205716482e-05, "loss": 0.662, "step": 139745 }, { "epoch": 1.55, "learning_rate": 2.4209776478577966e-05, "loss": 0.6062, "step": 139750 }, { "epoch": 1.55, "learning_rate": 2.4208853751439454e-05, "loss": 0.6476, "step": 139755 }, { "epoch": 1.55, "learning_rate": 2.4207931024300942e-05, "loss": 0.5734, "step": 139760 }, { "epoch": 1.55, "learning_rate": 2.420700829716243e-05, "loss": 0.609, "step": 139765 }, { "epoch": 1.55, "learning_rate": 2.4206085570023918e-05, "loss": 0.6335, "step": 139770 }, { "epoch": 1.55, "learning_rate": 2.4205162842885406e-05, "loss": 0.6487, "step": 139775 }, { "epoch": 1.55, "learning_rate": 2.4204240115746894e-05, "loss": 0.6454, "step": 139780 }, { "epoch": 1.55, "learning_rate": 2.420331738860838e-05, "loss": 0.7002, "step": 139785 }, { "epoch": 1.55, "learning_rate": 2.420239466146987e-05, "loss": 0.6859, "step": 139790 }, { "epoch": 1.55, "learning_rate": 2.4201471934331357e-05, "loss": 0.6684, "step": 139795 }, { "epoch": 1.55, "learning_rate": 2.4200549207192845e-05, "loss": 0.6668, "step": 139800 }, { "epoch": 1.55, "learning_rate": 2.4199626480054333e-05, "loss": 0.5533, "step": 139805 }, { "epoch": 1.55, "learning_rate": 2.4198703752915817e-05, "loss": 0.6256, "step": 139810 }, { "epoch": 1.55, "learning_rate": 2.4197781025777305e-05, "loss": 0.692, "step": 139815 }, { "epoch": 1.55, "learning_rate": 2.4196858298638793e-05, "loss": 0.6925, "step": 139820 }, { "epoch": 1.55, "learning_rate": 2.419593557150028e-05, "loss": 0.6435, "step": 139825 }, { "epoch": 1.55, "learning_rate": 2.419501284436177e-05, "loss": 0.644, "step": 139830 }, { "epoch": 1.55, "learning_rate": 2.4194090117223256e-05, "loss": 0.64, "step": 139835 }, { "epoch": 1.55, "learning_rate": 2.4193167390084744e-05, "loss": 0.6282, "step": 139840 }, { "epoch": 1.55, "learning_rate": 2.4192244662946232e-05, "loss": 0.5981, "step": 139845 }, { "epoch": 1.55, "learning_rate": 2.419132193580772e-05, "loss": 0.6026, "step": 139850 }, { "epoch": 1.55, "learning_rate": 2.4190399208669208e-05, "loss": 0.6596, "step": 139855 }, { "epoch": 1.55, "learning_rate": 2.4189476481530696e-05, "loss": 0.6382, "step": 139860 }, { "epoch": 1.55, "learning_rate": 2.4188553754392184e-05, "loss": 0.5728, "step": 139865 }, { "epoch": 1.55, "learning_rate": 2.418763102725367e-05, "loss": 0.6541, "step": 139870 }, { "epoch": 1.55, "learning_rate": 2.4186708300115156e-05, "loss": 0.6477, "step": 139875 }, { "epoch": 1.55, "learning_rate": 2.4185785572976647e-05, "loss": 0.6458, "step": 139880 }, { "epoch": 1.55, "learning_rate": 2.418486284583813e-05, "loss": 0.6506, "step": 139885 }, { "epoch": 1.55, "learning_rate": 2.418394011869962e-05, "loss": 0.6706, "step": 139890 }, { "epoch": 1.55, "learning_rate": 2.4183017391561107e-05, "loss": 0.6148, "step": 139895 }, { "epoch": 1.55, "learning_rate": 2.4182094664422595e-05, "loss": 0.7002, "step": 139900 }, { "epoch": 1.55, "learning_rate": 2.4181171937284083e-05, "loss": 0.6602, "step": 139905 }, { "epoch": 1.55, "learning_rate": 2.418024921014557e-05, "loss": 0.6328, "step": 139910 }, { "epoch": 1.55, "learning_rate": 2.417932648300706e-05, "loss": 0.5924, "step": 139915 }, { "epoch": 1.55, "learning_rate": 2.4178403755868547e-05, "loss": 0.6942, "step": 139920 }, { "epoch": 1.55, "learning_rate": 2.4177481028730034e-05, "loss": 0.5866, "step": 139925 }, { "epoch": 1.55, "learning_rate": 2.417655830159152e-05, "loss": 0.6297, "step": 139930 }, { "epoch": 1.55, "learning_rate": 2.417563557445301e-05, "loss": 0.6389, "step": 139935 }, { "epoch": 1.55, "learning_rate": 2.4174712847314495e-05, "loss": 0.6328, "step": 139940 }, { "epoch": 1.55, "learning_rate": 2.4173790120175986e-05, "loss": 0.6227, "step": 139945 }, { "epoch": 1.55, "learning_rate": 2.417286739303747e-05, "loss": 0.6545, "step": 139950 }, { "epoch": 1.55, "learning_rate": 2.417194466589896e-05, "loss": 0.6432, "step": 139955 }, { "epoch": 1.55, "learning_rate": 2.4171021938760446e-05, "loss": 0.6033, "step": 139960 }, { "epoch": 1.55, "learning_rate": 2.4170099211621934e-05, "loss": 0.621, "step": 139965 }, { "epoch": 1.55, "learning_rate": 2.416917648448342e-05, "loss": 0.6823, "step": 139970 }, { "epoch": 1.55, "learning_rate": 2.416825375734491e-05, "loss": 0.702, "step": 139975 }, { "epoch": 1.55, "learning_rate": 2.4167331030206397e-05, "loss": 0.6313, "step": 139980 }, { "epoch": 1.55, "learning_rate": 2.4166408303067882e-05, "loss": 0.6178, "step": 139985 }, { "epoch": 1.55, "learning_rate": 2.4165485575929373e-05, "loss": 0.6199, "step": 139990 }, { "epoch": 1.55, "learning_rate": 2.4164562848790857e-05, "loss": 0.5846, "step": 139995 }, { "epoch": 1.55, "learning_rate": 2.416364012165235e-05, "loss": 0.5902, "step": 140000 }, { "epoch": 1.55, "eval_loss": 0.5913480520248413, "eval_runtime": 69.5399, "eval_samples_per_second": 28.76, "eval_steps_per_second": 14.38, "step": 140000 }, { "epoch": 1.55, "learning_rate": 2.4162717394513833e-05, "loss": 0.6902, "step": 140005 }, { "epoch": 1.55, "learning_rate": 2.4161794667375324e-05, "loss": 0.6476, "step": 140010 }, { "epoch": 1.55, "learning_rate": 2.416087194023681e-05, "loss": 0.6379, "step": 140015 }, { "epoch": 1.55, "learning_rate": 2.41599492130983e-05, "loss": 0.6277, "step": 140020 }, { "epoch": 1.55, "learning_rate": 2.4159026485959785e-05, "loss": 0.6489, "step": 140025 }, { "epoch": 1.55, "learning_rate": 2.4158103758821272e-05, "loss": 0.6208, "step": 140030 }, { "epoch": 1.55, "learning_rate": 2.415718103168276e-05, "loss": 0.6363, "step": 140035 }, { "epoch": 1.55, "learning_rate": 2.4156258304544248e-05, "loss": 0.6567, "step": 140040 }, { "epoch": 1.55, "learning_rate": 2.4155335577405736e-05, "loss": 0.6475, "step": 140045 }, { "epoch": 1.55, "learning_rate": 2.415441285026722e-05, "loss": 0.6521, "step": 140050 }, { "epoch": 1.55, "learning_rate": 2.415349012312871e-05, "loss": 0.5675, "step": 140055 }, { "epoch": 1.55, "learning_rate": 2.4152567395990196e-05, "loss": 0.6775, "step": 140060 }, { "epoch": 1.55, "learning_rate": 2.4151644668851687e-05, "loss": 0.6171, "step": 140065 }, { "epoch": 1.55, "learning_rate": 2.4150721941713172e-05, "loss": 0.6233, "step": 140070 }, { "epoch": 1.55, "learning_rate": 2.4149799214574663e-05, "loss": 0.6126, "step": 140075 }, { "epoch": 1.55, "learning_rate": 2.4148876487436148e-05, "loss": 0.6166, "step": 140080 }, { "epoch": 1.55, "learning_rate": 2.4147953760297635e-05, "loss": 0.6447, "step": 140085 }, { "epoch": 1.55, "learning_rate": 2.4147031033159123e-05, "loss": 0.6166, "step": 140090 }, { "epoch": 1.55, "learning_rate": 2.414610830602061e-05, "loss": 0.6647, "step": 140095 }, { "epoch": 1.55, "learning_rate": 2.41451855788821e-05, "loss": 0.6519, "step": 140100 }, { "epoch": 1.55, "learning_rate": 2.4144262851743587e-05, "loss": 0.6368, "step": 140105 }, { "epoch": 1.55, "learning_rate": 2.4143340124605075e-05, "loss": 0.5724, "step": 140110 }, { "epoch": 1.55, "learning_rate": 2.414241739746656e-05, "loss": 0.585, "step": 140115 }, { "epoch": 1.55, "learning_rate": 2.414149467032805e-05, "loss": 0.6454, "step": 140120 }, { "epoch": 1.55, "learning_rate": 2.4140571943189535e-05, "loss": 0.6528, "step": 140125 }, { "epoch": 1.55, "learning_rate": 2.4139649216051026e-05, "loss": 0.6197, "step": 140130 }, { "epoch": 1.55, "learning_rate": 2.413872648891251e-05, "loss": 0.6412, "step": 140135 }, { "epoch": 1.55, "learning_rate": 2.4137803761774e-05, "loss": 0.5931, "step": 140140 }, { "epoch": 1.55, "learning_rate": 2.4136881034635486e-05, "loss": 0.6466, "step": 140145 }, { "epoch": 1.55, "learning_rate": 2.4135958307496974e-05, "loss": 0.6648, "step": 140150 }, { "epoch": 1.55, "learning_rate": 2.4135035580358462e-05, "loss": 0.6121, "step": 140155 }, { "epoch": 1.55, "learning_rate": 2.413411285321995e-05, "loss": 0.6303, "step": 140160 }, { "epoch": 1.55, "learning_rate": 2.4133190126081438e-05, "loss": 0.6564, "step": 140165 }, { "epoch": 1.55, "learning_rate": 2.4132267398942925e-05, "loss": 0.63, "step": 140170 }, { "epoch": 1.55, "learning_rate": 2.4131344671804413e-05, "loss": 0.6447, "step": 140175 }, { "epoch": 1.55, "learning_rate": 2.41304219446659e-05, "loss": 0.6299, "step": 140180 }, { "epoch": 1.55, "learning_rate": 2.412949921752739e-05, "loss": 0.6761, "step": 140185 }, { "epoch": 1.55, "learning_rate": 2.4128576490388873e-05, "loss": 0.676, "step": 140190 }, { "epoch": 1.55, "learning_rate": 2.4127653763250365e-05, "loss": 0.6093, "step": 140195 }, { "epoch": 1.55, "learning_rate": 2.412673103611185e-05, "loss": 0.6483, "step": 140200 }, { "epoch": 1.55, "learning_rate": 2.4125808308973337e-05, "loss": 0.6466, "step": 140205 }, { "epoch": 1.55, "learning_rate": 2.4124885581834825e-05, "loss": 0.608, "step": 140210 }, { "epoch": 1.55, "learning_rate": 2.4123962854696313e-05, "loss": 0.6338, "step": 140215 }, { "epoch": 1.55, "learning_rate": 2.41230401275578e-05, "loss": 0.6103, "step": 140220 }, { "epoch": 1.55, "learning_rate": 2.412211740041929e-05, "loss": 0.6695, "step": 140225 }, { "epoch": 1.55, "learning_rate": 2.4121194673280776e-05, "loss": 0.6482, "step": 140230 }, { "epoch": 1.55, "learning_rate": 2.4120271946142264e-05, "loss": 0.6206, "step": 140235 }, { "epoch": 1.55, "learning_rate": 2.4119349219003752e-05, "loss": 0.5759, "step": 140240 }, { "epoch": 1.55, "learning_rate": 2.411842649186524e-05, "loss": 0.6703, "step": 140245 }, { "epoch": 1.55, "learning_rate": 2.4117503764726728e-05, "loss": 0.6838, "step": 140250 }, { "epoch": 1.55, "learning_rate": 2.4116581037588215e-05, "loss": 0.6309, "step": 140255 }, { "epoch": 1.55, "learning_rate": 2.41156583104497e-05, "loss": 0.6135, "step": 140260 }, { "epoch": 1.55, "learning_rate": 2.4114735583311188e-05, "loss": 0.6735, "step": 140265 }, { "epoch": 1.55, "learning_rate": 2.4113812856172676e-05, "loss": 0.682, "step": 140270 }, { "epoch": 1.55, "learning_rate": 2.4112890129034163e-05, "loss": 0.61, "step": 140275 }, { "epoch": 1.55, "learning_rate": 2.411196740189565e-05, "loss": 0.6503, "step": 140280 }, { "epoch": 1.55, "learning_rate": 2.411104467475714e-05, "loss": 0.6003, "step": 140285 }, { "epoch": 1.55, "learning_rate": 2.4110121947618627e-05, "loss": 0.5877, "step": 140290 }, { "epoch": 1.55, "learning_rate": 2.4109199220480115e-05, "loss": 0.6631, "step": 140295 }, { "epoch": 1.55, "learning_rate": 2.4108276493341603e-05, "loss": 0.6455, "step": 140300 }, { "epoch": 1.55, "learning_rate": 2.410735376620309e-05, "loss": 0.6609, "step": 140305 }, { "epoch": 1.55, "learning_rate": 2.410643103906458e-05, "loss": 0.5903, "step": 140310 }, { "epoch": 1.55, "learning_rate": 2.4105508311926063e-05, "loss": 0.6312, "step": 140315 }, { "epoch": 1.55, "learning_rate": 2.4104585584787554e-05, "loss": 0.6383, "step": 140320 }, { "epoch": 1.55, "learning_rate": 2.410366285764904e-05, "loss": 0.6468, "step": 140325 }, { "epoch": 1.55, "learning_rate": 2.410274013051053e-05, "loss": 0.6482, "step": 140330 }, { "epoch": 1.55, "learning_rate": 2.4101817403372014e-05, "loss": 0.6209, "step": 140335 }, { "epoch": 1.55, "learning_rate": 2.4100894676233505e-05, "loss": 0.6839, "step": 140340 }, { "epoch": 1.55, "learning_rate": 2.409997194909499e-05, "loss": 0.6128, "step": 140345 }, { "epoch": 1.55, "learning_rate": 2.4099049221956478e-05, "loss": 0.6091, "step": 140350 }, { "epoch": 1.55, "learning_rate": 2.4098126494817966e-05, "loss": 0.6258, "step": 140355 }, { "epoch": 1.55, "learning_rate": 2.4097203767679453e-05, "loss": 0.6405, "step": 140360 }, { "epoch": 1.55, "learning_rate": 2.409628104054094e-05, "loss": 0.6313, "step": 140365 }, { "epoch": 1.55, "learning_rate": 2.4095358313402426e-05, "loss": 0.6517, "step": 140370 }, { "epoch": 1.55, "learning_rate": 2.4094435586263917e-05, "loss": 0.6569, "step": 140375 }, { "epoch": 1.55, "learning_rate": 2.40935128591254e-05, "loss": 0.6213, "step": 140380 }, { "epoch": 1.55, "learning_rate": 2.4092590131986893e-05, "loss": 0.6484, "step": 140385 }, { "epoch": 1.55, "learning_rate": 2.4091667404848377e-05, "loss": 0.6908, "step": 140390 }, { "epoch": 1.55, "learning_rate": 2.409074467770987e-05, "loss": 0.7084, "step": 140395 }, { "epoch": 1.55, "learning_rate": 2.4089821950571353e-05, "loss": 0.6718, "step": 140400 }, { "epoch": 1.55, "learning_rate": 2.4088899223432844e-05, "loss": 0.6774, "step": 140405 }, { "epoch": 1.55, "learning_rate": 2.408797649629433e-05, "loss": 0.5805, "step": 140410 }, { "epoch": 1.55, "learning_rate": 2.4087053769155816e-05, "loss": 0.6747, "step": 140415 }, { "epoch": 1.55, "learning_rate": 2.4086131042017304e-05, "loss": 0.6448, "step": 140420 }, { "epoch": 1.55, "learning_rate": 2.4085208314878792e-05, "loss": 0.6011, "step": 140425 }, { "epoch": 1.55, "learning_rate": 2.408428558774028e-05, "loss": 0.6726, "step": 140430 }, { "epoch": 1.55, "learning_rate": 2.4083362860601764e-05, "loss": 0.657, "step": 140435 }, { "epoch": 1.56, "learning_rate": 2.4082440133463256e-05, "loss": 0.6834, "step": 140440 }, { "epoch": 1.56, "learning_rate": 2.408151740632474e-05, "loss": 0.6444, "step": 140445 }, { "epoch": 1.56, "learning_rate": 2.408059467918623e-05, "loss": 0.6346, "step": 140450 }, { "epoch": 1.56, "learning_rate": 2.4079671952047716e-05, "loss": 0.6141, "step": 140455 }, { "epoch": 1.56, "learning_rate": 2.4078749224909207e-05, "loss": 0.6045, "step": 140460 }, { "epoch": 1.56, "learning_rate": 2.407782649777069e-05, "loss": 0.5985, "step": 140465 }, { "epoch": 1.56, "learning_rate": 2.407690377063218e-05, "loss": 0.639, "step": 140470 }, { "epoch": 1.56, "learning_rate": 2.4075981043493667e-05, "loss": 0.6497, "step": 140475 }, { "epoch": 1.56, "learning_rate": 2.4075058316355155e-05, "loss": 0.6434, "step": 140480 }, { "epoch": 1.56, "learning_rate": 2.4074135589216643e-05, "loss": 0.6777, "step": 140485 }, { "epoch": 1.56, "learning_rate": 2.407321286207813e-05, "loss": 0.5882, "step": 140490 }, { "epoch": 1.56, "learning_rate": 2.407229013493962e-05, "loss": 0.6057, "step": 140495 }, { "epoch": 1.56, "learning_rate": 2.4071367407801103e-05, "loss": 0.6405, "step": 140500 }, { "epoch": 1.56, "learning_rate": 2.4070444680662594e-05, "loss": 0.6462, "step": 140505 }, { "epoch": 1.56, "learning_rate": 2.406952195352408e-05, "loss": 0.595, "step": 140510 }, { "epoch": 1.56, "learning_rate": 2.406859922638557e-05, "loss": 0.6327, "step": 140515 }, { "epoch": 1.56, "learning_rate": 2.4067676499247054e-05, "loss": 0.6087, "step": 140520 }, { "epoch": 1.56, "learning_rate": 2.4066753772108542e-05, "loss": 0.6177, "step": 140525 }, { "epoch": 1.56, "learning_rate": 2.406583104497003e-05, "loss": 0.6637, "step": 140530 }, { "epoch": 1.56, "learning_rate": 2.4064908317831518e-05, "loss": 0.58, "step": 140535 }, { "epoch": 1.56, "learning_rate": 2.4063985590693006e-05, "loss": 0.6435, "step": 140540 }, { "epoch": 1.56, "learning_rate": 2.4063062863554494e-05, "loss": 0.6685, "step": 140545 }, { "epoch": 1.56, "learning_rate": 2.406214013641598e-05, "loss": 0.6458, "step": 140550 }, { "epoch": 1.56, "learning_rate": 2.406121740927747e-05, "loss": 0.6222, "step": 140555 }, { "epoch": 1.56, "learning_rate": 2.4060294682138957e-05, "loss": 0.624, "step": 140560 }, { "epoch": 1.56, "learning_rate": 2.4059371955000445e-05, "loss": 0.6551, "step": 140565 }, { "epoch": 1.56, "learning_rate": 2.4058449227861933e-05, "loss": 0.6476, "step": 140570 }, { "epoch": 1.56, "learning_rate": 2.4057526500723417e-05, "loss": 0.6407, "step": 140575 }, { "epoch": 1.56, "learning_rate": 2.405660377358491e-05, "loss": 0.6468, "step": 140580 }, { "epoch": 1.56, "learning_rate": 2.4055681046446393e-05, "loss": 0.6445, "step": 140585 }, { "epoch": 1.56, "learning_rate": 2.405475831930788e-05, "loss": 0.697, "step": 140590 }, { "epoch": 1.56, "learning_rate": 2.405383559216937e-05, "loss": 0.6346, "step": 140595 }, { "epoch": 1.56, "learning_rate": 2.4052912865030857e-05, "loss": 0.6372, "step": 140600 }, { "epoch": 1.56, "learning_rate": 2.4051990137892345e-05, "loss": 0.6188, "step": 140605 }, { "epoch": 1.56, "learning_rate": 2.4051067410753832e-05, "loss": 0.6566, "step": 140610 }, { "epoch": 1.56, "learning_rate": 2.405014468361532e-05, "loss": 0.6922, "step": 140615 }, { "epoch": 1.56, "learning_rate": 2.4049221956476808e-05, "loss": 0.6459, "step": 140620 }, { "epoch": 1.56, "learning_rate": 2.4048299229338296e-05, "loss": 0.6614, "step": 140625 }, { "epoch": 1.56, "learning_rate": 2.4047376502199784e-05, "loss": 0.6587, "step": 140630 }, { "epoch": 1.56, "learning_rate": 2.404645377506127e-05, "loss": 0.6596, "step": 140635 }, { "epoch": 1.56, "learning_rate": 2.404553104792276e-05, "loss": 0.6097, "step": 140640 }, { "epoch": 1.56, "learning_rate": 2.4044608320784244e-05, "loss": 0.594, "step": 140645 }, { "epoch": 1.56, "learning_rate": 2.4043685593645732e-05, "loss": 0.6031, "step": 140650 }, { "epoch": 1.56, "learning_rate": 2.404276286650722e-05, "loss": 0.5925, "step": 140655 }, { "epoch": 1.56, "learning_rate": 2.4041840139368707e-05, "loss": 0.6894, "step": 140660 }, { "epoch": 1.56, "learning_rate": 2.4040917412230195e-05, "loss": 0.6078, "step": 140665 }, { "epoch": 1.56, "learning_rate": 2.4039994685091683e-05, "loss": 0.6646, "step": 140670 }, { "epoch": 1.56, "learning_rate": 2.403907195795317e-05, "loss": 0.6565, "step": 140675 }, { "epoch": 1.56, "learning_rate": 2.403814923081466e-05, "loss": 0.641, "step": 140680 }, { "epoch": 1.56, "learning_rate": 2.4037226503676147e-05, "loss": 0.6526, "step": 140685 }, { "epoch": 1.56, "learning_rate": 2.4036303776537635e-05, "loss": 0.6085, "step": 140690 }, { "epoch": 1.56, "learning_rate": 2.4035381049399122e-05, "loss": 0.6308, "step": 140695 }, { "epoch": 1.56, "learning_rate": 2.4034458322260607e-05, "loss": 0.6338, "step": 140700 }, { "epoch": 1.56, "learning_rate": 2.4033535595122098e-05, "loss": 0.6613, "step": 140705 }, { "epoch": 1.56, "learning_rate": 2.4032612867983583e-05, "loss": 0.6332, "step": 140710 }, { "epoch": 1.56, "learning_rate": 2.4031690140845074e-05, "loss": 0.6817, "step": 140715 }, { "epoch": 1.56, "learning_rate": 2.4030767413706558e-05, "loss": 0.6397, "step": 140720 }, { "epoch": 1.56, "learning_rate": 2.4029844686568046e-05, "loss": 0.6598, "step": 140725 }, { "epoch": 1.56, "learning_rate": 2.4028921959429534e-05, "loss": 0.6545, "step": 140730 }, { "epoch": 1.56, "learning_rate": 2.4027999232291022e-05, "loss": 0.6183, "step": 140735 }, { "epoch": 1.56, "learning_rate": 2.402707650515251e-05, "loss": 0.596, "step": 140740 }, { "epoch": 1.56, "learning_rate": 2.4026153778013998e-05, "loss": 0.6781, "step": 140745 }, { "epoch": 1.56, "learning_rate": 2.4025231050875485e-05, "loss": 0.6304, "step": 140750 }, { "epoch": 1.56, "learning_rate": 2.402430832373697e-05, "loss": 0.6449, "step": 140755 }, { "epoch": 1.56, "learning_rate": 2.402338559659846e-05, "loss": 0.5919, "step": 140760 }, { "epoch": 1.56, "learning_rate": 2.4022462869459946e-05, "loss": 0.6867, "step": 140765 }, { "epoch": 1.56, "learning_rate": 2.4021540142321437e-05, "loss": 0.6445, "step": 140770 }, { "epoch": 1.56, "learning_rate": 2.402061741518292e-05, "loss": 0.6274, "step": 140775 }, { "epoch": 1.56, "learning_rate": 2.4019694688044412e-05, "loss": 0.687, "step": 140780 }, { "epoch": 1.56, "learning_rate": 2.4018771960905897e-05, "loss": 0.6756, "step": 140785 }, { "epoch": 1.56, "learning_rate": 2.4017849233767388e-05, "loss": 0.6663, "step": 140790 }, { "epoch": 1.56, "learning_rate": 2.4016926506628873e-05, "loss": 0.6468, "step": 140795 }, { "epoch": 1.56, "learning_rate": 2.401600377949036e-05, "loss": 0.5887, "step": 140800 }, { "epoch": 1.56, "learning_rate": 2.4015081052351848e-05, "loss": 0.6721, "step": 140805 }, { "epoch": 1.56, "learning_rate": 2.4014158325213336e-05, "loss": 0.6332, "step": 140810 }, { "epoch": 1.56, "learning_rate": 2.4013235598074824e-05, "loss": 0.6625, "step": 140815 }, { "epoch": 1.56, "learning_rate": 2.401231287093631e-05, "loss": 0.6947, "step": 140820 }, { "epoch": 1.56, "learning_rate": 2.40113901437978e-05, "loss": 0.6213, "step": 140825 }, { "epoch": 1.56, "learning_rate": 2.4010467416659284e-05, "loss": 0.5706, "step": 140830 }, { "epoch": 1.56, "learning_rate": 2.4009544689520775e-05, "loss": 0.6382, "step": 140835 }, { "epoch": 1.56, "learning_rate": 2.400862196238226e-05, "loss": 0.643, "step": 140840 }, { "epoch": 1.56, "learning_rate": 2.400769923524375e-05, "loss": 0.6815, "step": 140845 }, { "epoch": 1.56, "learning_rate": 2.4006776508105236e-05, "loss": 0.5964, "step": 140850 }, { "epoch": 1.56, "learning_rate": 2.4005853780966723e-05, "loss": 0.612, "step": 140855 }, { "epoch": 1.56, "learning_rate": 2.400493105382821e-05, "loss": 0.6767, "step": 140860 }, { "epoch": 1.56, "learning_rate": 2.40040083266897e-05, "loss": 0.6324, "step": 140865 }, { "epoch": 1.56, "learning_rate": 2.4003085599551187e-05, "loss": 0.7144, "step": 140870 }, { "epoch": 1.56, "learning_rate": 2.400216287241267e-05, "loss": 0.6248, "step": 140875 }, { "epoch": 1.56, "learning_rate": 2.4001240145274163e-05, "loss": 0.6284, "step": 140880 }, { "epoch": 1.56, "learning_rate": 2.4000317418135647e-05, "loss": 0.6432, "step": 140885 }, { "epoch": 1.56, "learning_rate": 2.399939469099714e-05, "loss": 0.6342, "step": 140890 }, { "epoch": 1.56, "learning_rate": 2.3998471963858623e-05, "loss": 0.5974, "step": 140895 }, { "epoch": 1.56, "learning_rate": 2.3997549236720114e-05, "loss": 0.6086, "step": 140900 }, { "epoch": 1.56, "learning_rate": 2.39966265095816e-05, "loss": 0.6173, "step": 140905 }, { "epoch": 1.56, "learning_rate": 2.3995703782443086e-05, "loss": 0.6307, "step": 140910 }, { "epoch": 1.56, "learning_rate": 2.3994781055304574e-05, "loss": 0.6412, "step": 140915 }, { "epoch": 1.56, "learning_rate": 2.3993858328166062e-05, "loss": 0.603, "step": 140920 }, { "epoch": 1.56, "learning_rate": 2.399293560102755e-05, "loss": 0.6736, "step": 140925 }, { "epoch": 1.56, "learning_rate": 2.3992012873889038e-05, "loss": 0.627, "step": 140930 }, { "epoch": 1.56, "learning_rate": 2.3991090146750526e-05, "loss": 0.716, "step": 140935 }, { "epoch": 1.56, "learning_rate": 2.3990167419612013e-05, "loss": 0.6362, "step": 140940 }, { "epoch": 1.56, "learning_rate": 2.39892446924735e-05, "loss": 0.6708, "step": 140945 }, { "epoch": 1.56, "learning_rate": 2.3988321965334986e-05, "loss": 0.666, "step": 140950 }, { "epoch": 1.56, "learning_rate": 2.3987399238196477e-05, "loss": 0.6702, "step": 140955 }, { "epoch": 1.56, "learning_rate": 2.398647651105796e-05, "loss": 0.6851, "step": 140960 }, { "epoch": 1.56, "learning_rate": 2.3985553783919453e-05, "loss": 0.6511, "step": 140965 }, { "epoch": 1.56, "learning_rate": 2.3984631056780937e-05, "loss": 0.6201, "step": 140970 }, { "epoch": 1.56, "learning_rate": 2.3983708329642425e-05, "loss": 0.6221, "step": 140975 }, { "epoch": 1.56, "learning_rate": 2.3982785602503913e-05, "loss": 0.6314, "step": 140980 }, { "epoch": 1.56, "learning_rate": 2.39818628753654e-05, "loss": 0.6564, "step": 140985 }, { "epoch": 1.56, "learning_rate": 2.398094014822689e-05, "loss": 0.64, "step": 140990 }, { "epoch": 1.56, "learning_rate": 2.3980017421088376e-05, "loss": 0.5753, "step": 140995 }, { "epoch": 1.56, "learning_rate": 2.3979094693949864e-05, "loss": 0.6523, "step": 141000 }, { "epoch": 1.56, "eval_loss": 0.599164605140686, "eval_runtime": 69.54, "eval_samples_per_second": 28.76, "eval_steps_per_second": 14.38, "step": 141000 }, { "epoch": 1.56, "learning_rate": 2.3978171966811352e-05, "loss": 0.6715, "step": 141005 }, { "epoch": 1.56, "learning_rate": 2.397724923967284e-05, "loss": 0.5854, "step": 141010 }, { "epoch": 1.56, "learning_rate": 2.3976326512534328e-05, "loss": 0.6319, "step": 141015 }, { "epoch": 1.56, "learning_rate": 2.3975403785395816e-05, "loss": 0.5675, "step": 141020 }, { "epoch": 1.56, "learning_rate": 2.39744810582573e-05, "loss": 0.6532, "step": 141025 }, { "epoch": 1.56, "learning_rate": 2.3973558331118788e-05, "loss": 0.6193, "step": 141030 }, { "epoch": 1.56, "learning_rate": 2.3972635603980276e-05, "loss": 0.6296, "step": 141035 }, { "epoch": 1.56, "learning_rate": 2.3971712876841764e-05, "loss": 0.6405, "step": 141040 }, { "epoch": 1.56, "learning_rate": 2.397079014970325e-05, "loss": 0.6268, "step": 141045 }, { "epoch": 1.56, "learning_rate": 2.396986742256474e-05, "loss": 0.5991, "step": 141050 }, { "epoch": 1.56, "learning_rate": 2.3968944695426227e-05, "loss": 0.6477, "step": 141055 }, { "epoch": 1.56, "learning_rate": 2.3968021968287715e-05, "loss": 0.6287, "step": 141060 }, { "epoch": 1.56, "learning_rate": 2.3967099241149203e-05, "loss": 0.6297, "step": 141065 }, { "epoch": 1.56, "learning_rate": 2.396617651401069e-05, "loss": 0.5661, "step": 141070 }, { "epoch": 1.56, "learning_rate": 2.396525378687218e-05, "loss": 0.641, "step": 141075 }, { "epoch": 1.56, "learning_rate": 2.3964331059733666e-05, "loss": 0.6488, "step": 141080 }, { "epoch": 1.56, "learning_rate": 2.396340833259515e-05, "loss": 0.613, "step": 141085 }, { "epoch": 1.56, "learning_rate": 2.3962485605456642e-05, "loss": 0.6837, "step": 141090 }, { "epoch": 1.56, "learning_rate": 2.3961562878318127e-05, "loss": 0.6704, "step": 141095 }, { "epoch": 1.56, "learning_rate": 2.3960640151179614e-05, "loss": 0.7043, "step": 141100 }, { "epoch": 1.56, "learning_rate": 2.3959717424041102e-05, "loss": 0.6487, "step": 141105 }, { "epoch": 1.56, "learning_rate": 2.395879469690259e-05, "loss": 0.6565, "step": 141110 }, { "epoch": 1.56, "learning_rate": 2.3957871969764078e-05, "loss": 0.6563, "step": 141115 }, { "epoch": 1.56, "learning_rate": 2.3956949242625566e-05, "loss": 0.6689, "step": 141120 }, { "epoch": 1.56, "learning_rate": 2.3956026515487054e-05, "loss": 0.6332, "step": 141125 }, { "epoch": 1.56, "learning_rate": 2.395510378834854e-05, "loss": 0.6348, "step": 141130 }, { "epoch": 1.56, "learning_rate": 2.395418106121003e-05, "loss": 0.6181, "step": 141135 }, { "epoch": 1.56, "learning_rate": 2.3953258334071517e-05, "loss": 0.6404, "step": 141140 }, { "epoch": 1.56, "learning_rate": 2.3952335606933005e-05, "loss": 0.6285, "step": 141145 }, { "epoch": 1.56, "learning_rate": 2.395141287979449e-05, "loss": 0.5974, "step": 141150 }, { "epoch": 1.56, "learning_rate": 2.395049015265598e-05, "loss": 0.5948, "step": 141155 }, { "epoch": 1.56, "learning_rate": 2.3949567425517465e-05, "loss": 0.6482, "step": 141160 }, { "epoch": 1.56, "learning_rate": 2.3948644698378956e-05, "loss": 0.614, "step": 141165 }, { "epoch": 1.56, "learning_rate": 2.394772197124044e-05, "loss": 0.6405, "step": 141170 }, { "epoch": 1.56, "learning_rate": 2.3946799244101932e-05, "loss": 0.6411, "step": 141175 }, { "epoch": 1.56, "learning_rate": 2.3945876516963417e-05, "loss": 0.6566, "step": 141180 }, { "epoch": 1.56, "learning_rate": 2.3944953789824904e-05, "loss": 0.6208, "step": 141185 }, { "epoch": 1.56, "learning_rate": 2.3944031062686392e-05, "loss": 0.6462, "step": 141190 }, { "epoch": 1.56, "learning_rate": 2.394310833554788e-05, "loss": 0.6159, "step": 141195 }, { "epoch": 1.56, "learning_rate": 2.3942185608409368e-05, "loss": 0.6376, "step": 141200 }, { "epoch": 1.56, "learning_rate": 2.3941262881270852e-05, "loss": 0.6199, "step": 141205 }, { "epoch": 1.56, "learning_rate": 2.3940340154132344e-05, "loss": 0.5753, "step": 141210 }, { "epoch": 1.56, "learning_rate": 2.3939417426993828e-05, "loss": 0.641, "step": 141215 }, { "epoch": 1.56, "learning_rate": 2.393849469985532e-05, "loss": 0.6176, "step": 141220 }, { "epoch": 1.56, "learning_rate": 2.3937571972716804e-05, "loss": 0.635, "step": 141225 }, { "epoch": 1.56, "learning_rate": 2.3936649245578295e-05, "loss": 0.6706, "step": 141230 }, { "epoch": 1.56, "learning_rate": 2.393572651843978e-05, "loss": 0.6557, "step": 141235 }, { "epoch": 1.56, "learning_rate": 2.3934803791301267e-05, "loss": 0.631, "step": 141240 }, { "epoch": 1.56, "learning_rate": 2.3933881064162755e-05, "loss": 0.6664, "step": 141245 }, { "epoch": 1.56, "learning_rate": 2.3932958337024243e-05, "loss": 0.6817, "step": 141250 }, { "epoch": 1.56, "learning_rate": 2.393203560988573e-05, "loss": 0.6468, "step": 141255 }, { "epoch": 1.56, "learning_rate": 2.3931112882747215e-05, "loss": 0.6084, "step": 141260 }, { "epoch": 1.56, "learning_rate": 2.3930190155608707e-05, "loss": 0.6495, "step": 141265 }, { "epoch": 1.56, "learning_rate": 2.392926742847019e-05, "loss": 0.552, "step": 141270 }, { "epoch": 1.56, "learning_rate": 2.3928344701331682e-05, "loss": 0.6575, "step": 141275 }, { "epoch": 1.56, "learning_rate": 2.3927421974193167e-05, "loss": 0.6974, "step": 141280 }, { "epoch": 1.56, "learning_rate": 2.3926499247054658e-05, "loss": 0.6844, "step": 141285 }, { "epoch": 1.56, "learning_rate": 2.3925576519916143e-05, "loss": 0.6019, "step": 141290 }, { "epoch": 1.56, "learning_rate": 2.3924653792777634e-05, "loss": 0.6558, "step": 141295 }, { "epoch": 1.56, "learning_rate": 2.3923731065639118e-05, "loss": 0.6811, "step": 141300 }, { "epoch": 1.56, "learning_rate": 2.3922808338500606e-05, "loss": 0.6295, "step": 141305 }, { "epoch": 1.56, "learning_rate": 2.3921885611362094e-05, "loss": 0.6885, "step": 141310 }, { "epoch": 1.56, "learning_rate": 2.3920962884223582e-05, "loss": 0.6306, "step": 141315 }, { "epoch": 1.56, "learning_rate": 2.392004015708507e-05, "loss": 0.6618, "step": 141320 }, { "epoch": 1.56, "learning_rate": 2.3919117429946557e-05, "loss": 0.6671, "step": 141325 }, { "epoch": 1.56, "learning_rate": 2.3918194702808045e-05, "loss": 0.6714, "step": 141330 }, { "epoch": 1.56, "learning_rate": 2.391727197566953e-05, "loss": 0.6457, "step": 141335 }, { "epoch": 1.57, "learning_rate": 2.391634924853102e-05, "loss": 0.6674, "step": 141340 }, { "epoch": 1.57, "learning_rate": 2.3915426521392505e-05, "loss": 0.6619, "step": 141345 }, { "epoch": 1.57, "learning_rate": 2.3914503794253997e-05, "loss": 0.6744, "step": 141350 }, { "epoch": 1.57, "learning_rate": 2.391358106711548e-05, "loss": 0.6777, "step": 141355 }, { "epoch": 1.57, "learning_rate": 2.391265833997697e-05, "loss": 0.6535, "step": 141360 }, { "epoch": 1.57, "learning_rate": 2.3911735612838457e-05, "loss": 0.5976, "step": 141365 }, { "epoch": 1.57, "learning_rate": 2.3910812885699945e-05, "loss": 0.5879, "step": 141370 }, { "epoch": 1.57, "learning_rate": 2.3909890158561433e-05, "loss": 0.677, "step": 141375 }, { "epoch": 1.57, "learning_rate": 2.390896743142292e-05, "loss": 0.6666, "step": 141380 }, { "epoch": 1.57, "learning_rate": 2.3908044704284408e-05, "loss": 0.6253, "step": 141385 }, { "epoch": 1.57, "learning_rate": 2.3907121977145896e-05, "loss": 0.6957, "step": 141390 }, { "epoch": 1.57, "learning_rate": 2.3906199250007384e-05, "loss": 0.6079, "step": 141395 }, { "epoch": 1.57, "learning_rate": 2.3905276522868872e-05, "loss": 0.666, "step": 141400 }, { "epoch": 1.57, "learning_rate": 2.390435379573036e-05, "loss": 0.5954, "step": 141405 }, { "epoch": 1.57, "learning_rate": 2.3903431068591844e-05, "loss": 0.6796, "step": 141410 }, { "epoch": 1.57, "learning_rate": 2.3902508341453332e-05, "loss": 0.6664, "step": 141415 }, { "epoch": 1.57, "learning_rate": 2.390158561431482e-05, "loss": 0.5921, "step": 141420 }, { "epoch": 1.57, "learning_rate": 2.3900662887176308e-05, "loss": 0.5917, "step": 141425 }, { "epoch": 1.57, "learning_rate": 2.3899740160037796e-05, "loss": 0.6274, "step": 141430 }, { "epoch": 1.57, "learning_rate": 2.3898817432899283e-05, "loss": 0.5553, "step": 141435 }, { "epoch": 1.57, "learning_rate": 2.389789470576077e-05, "loss": 0.7233, "step": 141440 }, { "epoch": 1.57, "learning_rate": 2.389697197862226e-05, "loss": 0.6351, "step": 141445 }, { "epoch": 1.57, "learning_rate": 2.3896049251483747e-05, "loss": 0.6141, "step": 141450 }, { "epoch": 1.57, "learning_rate": 2.3895126524345235e-05, "loss": 0.642, "step": 141455 }, { "epoch": 1.57, "learning_rate": 2.3894203797206723e-05, "loss": 0.5871, "step": 141460 }, { "epoch": 1.57, "learning_rate": 2.389328107006821e-05, "loss": 0.6351, "step": 141465 }, { "epoch": 1.57, "learning_rate": 2.3892358342929695e-05, "loss": 0.6521, "step": 141470 }, { "epoch": 1.57, "learning_rate": 2.3891435615791186e-05, "loss": 0.5885, "step": 141475 }, { "epoch": 1.57, "learning_rate": 2.389051288865267e-05, "loss": 0.6411, "step": 141480 }, { "epoch": 1.57, "learning_rate": 2.388959016151416e-05, "loss": 0.7044, "step": 141485 }, { "epoch": 1.57, "learning_rate": 2.3888667434375646e-05, "loss": 0.6478, "step": 141490 }, { "epoch": 1.57, "learning_rate": 2.3887744707237134e-05, "loss": 0.645, "step": 141495 }, { "epoch": 1.57, "learning_rate": 2.3886821980098622e-05, "loss": 0.622, "step": 141500 }, { "epoch": 1.57, "learning_rate": 2.388589925296011e-05, "loss": 0.6213, "step": 141505 }, { "epoch": 1.57, "learning_rate": 2.3884976525821598e-05, "loss": 0.6535, "step": 141510 }, { "epoch": 1.57, "learning_rate": 2.3884053798683086e-05, "loss": 0.5998, "step": 141515 }, { "epoch": 1.57, "learning_rate": 2.3883131071544573e-05, "loss": 0.64, "step": 141520 }, { "epoch": 1.57, "learning_rate": 2.388220834440606e-05, "loss": 0.6194, "step": 141525 }, { "epoch": 1.57, "learning_rate": 2.388128561726755e-05, "loss": 0.5904, "step": 141530 }, { "epoch": 1.57, "learning_rate": 2.3880362890129034e-05, "loss": 0.6764, "step": 141535 }, { "epoch": 1.57, "learning_rate": 2.3879440162990525e-05, "loss": 0.6304, "step": 141540 }, { "epoch": 1.57, "learning_rate": 2.387851743585201e-05, "loss": 0.6447, "step": 141545 }, { "epoch": 1.57, "learning_rate": 2.38775947087135e-05, "loss": 0.6357, "step": 141550 }, { "epoch": 1.57, "learning_rate": 2.3876671981574985e-05, "loss": 0.5936, "step": 141555 }, { "epoch": 1.57, "learning_rate": 2.3875749254436473e-05, "loss": 0.6604, "step": 141560 }, { "epoch": 1.57, "learning_rate": 2.387482652729796e-05, "loss": 0.6176, "step": 141565 }, { "epoch": 1.57, "learning_rate": 2.387390380015945e-05, "loss": 0.6252, "step": 141570 }, { "epoch": 1.57, "learning_rate": 2.3872981073020936e-05, "loss": 0.67, "step": 141575 }, { "epoch": 1.57, "learning_rate": 2.3872058345882424e-05, "loss": 0.616, "step": 141580 }, { "epoch": 1.57, "learning_rate": 2.3871135618743912e-05, "loss": 0.5617, "step": 141585 }, { "epoch": 1.57, "learning_rate": 2.3870212891605397e-05, "loss": 0.6391, "step": 141590 }, { "epoch": 1.57, "learning_rate": 2.3869290164466888e-05, "loss": 0.6341, "step": 141595 }, { "epoch": 1.57, "learning_rate": 2.3868367437328372e-05, "loss": 0.6663, "step": 141600 }, { "epoch": 1.57, "learning_rate": 2.3867444710189863e-05, "loss": 0.5694, "step": 141605 }, { "epoch": 1.57, "learning_rate": 2.3866521983051348e-05, "loss": 0.5404, "step": 141610 }, { "epoch": 1.57, "learning_rate": 2.386559925591284e-05, "loss": 0.6662, "step": 141615 }, { "epoch": 1.57, "learning_rate": 2.3864676528774324e-05, "loss": 0.6039, "step": 141620 }, { "epoch": 1.57, "learning_rate": 2.386375380163581e-05, "loss": 0.5272, "step": 141625 }, { "epoch": 1.57, "learning_rate": 2.38628310744973e-05, "loss": 0.6379, "step": 141630 }, { "epoch": 1.57, "learning_rate": 2.3861908347358787e-05, "loss": 0.6685, "step": 141635 }, { "epoch": 1.57, "learning_rate": 2.3860985620220275e-05, "loss": 0.6301, "step": 141640 }, { "epoch": 1.57, "learning_rate": 2.386006289308176e-05, "loss": 0.6611, "step": 141645 }, { "epoch": 1.57, "learning_rate": 2.385914016594325e-05, "loss": 0.6218, "step": 141650 }, { "epoch": 1.57, "learning_rate": 2.3858217438804735e-05, "loss": 0.6539, "step": 141655 }, { "epoch": 1.57, "learning_rate": 2.3857294711666226e-05, "loss": 0.6203, "step": 141660 }, { "epoch": 1.57, "learning_rate": 2.385637198452771e-05, "loss": 0.6216, "step": 141665 }, { "epoch": 1.57, "learning_rate": 2.3855449257389202e-05, "loss": 0.6546, "step": 141670 }, { "epoch": 1.57, "learning_rate": 2.3854526530250687e-05, "loss": 0.6402, "step": 141675 }, { "epoch": 1.57, "learning_rate": 2.3853603803112178e-05, "loss": 0.6618, "step": 141680 }, { "epoch": 1.57, "learning_rate": 2.3852681075973662e-05, "loss": 0.6517, "step": 141685 }, { "epoch": 1.57, "learning_rate": 2.385175834883515e-05, "loss": 0.6071, "step": 141690 }, { "epoch": 1.57, "learning_rate": 2.3850835621696638e-05, "loss": 0.6609, "step": 141695 }, { "epoch": 1.57, "learning_rate": 2.3849912894558126e-05, "loss": 0.7, "step": 141700 }, { "epoch": 1.57, "learning_rate": 2.3848990167419614e-05, "loss": 0.6645, "step": 141705 }, { "epoch": 1.57, "learning_rate": 2.3848067440281098e-05, "loss": 0.6936, "step": 141710 }, { "epoch": 1.57, "learning_rate": 2.384714471314259e-05, "loss": 0.6261, "step": 141715 }, { "epoch": 1.57, "learning_rate": 2.3846221986004074e-05, "loss": 0.6392, "step": 141720 }, { "epoch": 1.57, "learning_rate": 2.3845299258865565e-05, "loss": 0.6342, "step": 141725 }, { "epoch": 1.57, "learning_rate": 2.384437653172705e-05, "loss": 0.6245, "step": 141730 }, { "epoch": 1.57, "learning_rate": 2.384345380458854e-05, "loss": 0.6306, "step": 141735 }, { "epoch": 1.57, "learning_rate": 2.3842531077450025e-05, "loss": 0.664, "step": 141740 }, { "epoch": 1.57, "learning_rate": 2.3841608350311513e-05, "loss": 0.6291, "step": 141745 }, { "epoch": 1.57, "learning_rate": 2.3840685623173e-05, "loss": 0.6195, "step": 141750 }, { "epoch": 1.57, "learning_rate": 2.383976289603449e-05, "loss": 0.6471, "step": 141755 }, { "epoch": 1.57, "learning_rate": 2.3838840168895977e-05, "loss": 0.6162, "step": 141760 }, { "epoch": 1.57, "learning_rate": 2.3837917441757464e-05, "loss": 0.5936, "step": 141765 }, { "epoch": 1.57, "learning_rate": 2.3836994714618952e-05, "loss": 0.7069, "step": 141770 }, { "epoch": 1.57, "learning_rate": 2.383607198748044e-05, "loss": 0.6378, "step": 141775 }, { "epoch": 1.57, "learning_rate": 2.3835149260341928e-05, "loss": 0.6803, "step": 141780 }, { "epoch": 1.57, "learning_rate": 2.3834226533203412e-05, "loss": 0.6244, "step": 141785 }, { "epoch": 1.57, "learning_rate": 2.3833303806064904e-05, "loss": 0.6979, "step": 141790 }, { "epoch": 1.57, "learning_rate": 2.3832381078926388e-05, "loss": 0.6636, "step": 141795 }, { "epoch": 1.57, "learning_rate": 2.3831458351787876e-05, "loss": 0.6866, "step": 141800 }, { "epoch": 1.57, "learning_rate": 2.3830535624649364e-05, "loss": 0.656, "step": 141805 }, { "epoch": 1.57, "learning_rate": 2.382961289751085e-05, "loss": 0.5832, "step": 141810 }, { "epoch": 1.57, "learning_rate": 2.382869017037234e-05, "loss": 0.6668, "step": 141815 }, { "epoch": 1.57, "learning_rate": 2.3827767443233827e-05, "loss": 0.6359, "step": 141820 }, { "epoch": 1.57, "learning_rate": 2.3826844716095315e-05, "loss": 0.6311, "step": 141825 }, { "epoch": 1.57, "learning_rate": 2.3825921988956803e-05, "loss": 0.6385, "step": 141830 }, { "epoch": 1.57, "learning_rate": 2.382499926181829e-05, "loss": 0.6003, "step": 141835 }, { "epoch": 1.57, "learning_rate": 2.382407653467978e-05, "loss": 0.6038, "step": 141840 }, { "epoch": 1.57, "learning_rate": 2.3823153807541267e-05, "loss": 0.6199, "step": 141845 }, { "epoch": 1.57, "learning_rate": 2.3822231080402754e-05, "loss": 0.6696, "step": 141850 }, { "epoch": 1.57, "learning_rate": 2.382130835326424e-05, "loss": 0.6026, "step": 141855 }, { "epoch": 1.57, "learning_rate": 2.3820385626125727e-05, "loss": 0.6363, "step": 141860 }, { "epoch": 1.57, "learning_rate": 2.3819462898987215e-05, "loss": 0.6484, "step": 141865 }, { "epoch": 1.57, "learning_rate": 2.3818540171848702e-05, "loss": 0.5966, "step": 141870 }, { "epoch": 1.57, "learning_rate": 2.381761744471019e-05, "loss": 0.6338, "step": 141875 }, { "epoch": 1.57, "learning_rate": 2.3816694717571678e-05, "loss": 0.656, "step": 141880 }, { "epoch": 1.57, "learning_rate": 2.3815771990433166e-05, "loss": 0.5956, "step": 141885 }, { "epoch": 1.57, "learning_rate": 2.3814849263294654e-05, "loss": 0.6784, "step": 141890 }, { "epoch": 1.57, "learning_rate": 2.3813926536156142e-05, "loss": 0.6566, "step": 141895 }, { "epoch": 1.57, "learning_rate": 2.381300380901763e-05, "loss": 0.6619, "step": 141900 }, { "epoch": 1.57, "learning_rate": 2.3812081081879117e-05, "loss": 0.6111, "step": 141905 }, { "epoch": 1.57, "learning_rate": 2.3811158354740605e-05, "loss": 0.6072, "step": 141910 }, { "epoch": 1.57, "learning_rate": 2.3810235627602093e-05, "loss": 0.6426, "step": 141915 }, { "epoch": 1.57, "learning_rate": 2.3809312900463578e-05, "loss": 0.604, "step": 141920 }, { "epoch": 1.57, "learning_rate": 2.380839017332507e-05, "loss": 0.6117, "step": 141925 }, { "epoch": 1.57, "learning_rate": 2.3807467446186553e-05, "loss": 0.6156, "step": 141930 }, { "epoch": 1.57, "learning_rate": 2.380654471904804e-05, "loss": 0.6793, "step": 141935 }, { "epoch": 1.57, "learning_rate": 2.380562199190953e-05, "loss": 0.6657, "step": 141940 }, { "epoch": 1.57, "learning_rate": 2.3804699264771017e-05, "loss": 0.6427, "step": 141945 }, { "epoch": 1.57, "learning_rate": 2.3803776537632505e-05, "loss": 0.6754, "step": 141950 }, { "epoch": 1.57, "learning_rate": 2.3802853810493993e-05, "loss": 0.634, "step": 141955 }, { "epoch": 1.57, "learning_rate": 2.380193108335548e-05, "loss": 0.6063, "step": 141960 }, { "epoch": 1.57, "learning_rate": 2.3801008356216968e-05, "loss": 0.6351, "step": 141965 }, { "epoch": 1.57, "learning_rate": 2.3800085629078456e-05, "loss": 0.6626, "step": 141970 }, { "epoch": 1.57, "learning_rate": 2.379916290193994e-05, "loss": 0.6499, "step": 141975 }, { "epoch": 1.57, "learning_rate": 2.3798240174801432e-05, "loss": 0.6392, "step": 141980 }, { "epoch": 1.57, "learning_rate": 2.3797317447662916e-05, "loss": 0.6341, "step": 141985 }, { "epoch": 1.57, "learning_rate": 2.3796394720524407e-05, "loss": 0.632, "step": 141990 }, { "epoch": 1.57, "learning_rate": 2.3795471993385892e-05, "loss": 0.6565, "step": 141995 }, { "epoch": 1.57, "learning_rate": 2.3794549266247383e-05, "loss": 0.6005, "step": 142000 }, { "epoch": 1.57, "eval_loss": 0.6127725839614868, "eval_runtime": 69.0933, "eval_samples_per_second": 28.946, "eval_steps_per_second": 14.473, "step": 142000 }, { "epoch": 1.57, "learning_rate": 2.3793626539108868e-05, "loss": 0.5897, "step": 142005 }, { "epoch": 1.57, "learning_rate": 2.3792703811970355e-05, "loss": 0.5942, "step": 142010 }, { "epoch": 1.57, "learning_rate": 2.3791781084831843e-05, "loss": 0.6982, "step": 142015 }, { "epoch": 1.57, "learning_rate": 2.379085835769333e-05, "loss": 0.6247, "step": 142020 }, { "epoch": 1.57, "learning_rate": 2.378993563055482e-05, "loss": 0.6341, "step": 142025 }, { "epoch": 1.57, "learning_rate": 2.3789012903416303e-05, "loss": 0.6784, "step": 142030 }, { "epoch": 1.57, "learning_rate": 2.3788090176277795e-05, "loss": 0.6137, "step": 142035 }, { "epoch": 1.57, "learning_rate": 2.378716744913928e-05, "loss": 0.6344, "step": 142040 }, { "epoch": 1.57, "learning_rate": 2.378624472200077e-05, "loss": 0.6663, "step": 142045 }, { "epoch": 1.57, "learning_rate": 2.3785321994862255e-05, "loss": 0.6655, "step": 142050 }, { "epoch": 1.57, "learning_rate": 2.3784399267723746e-05, "loss": 0.6245, "step": 142055 }, { "epoch": 1.57, "learning_rate": 2.378347654058523e-05, "loss": 0.6679, "step": 142060 }, { "epoch": 1.57, "learning_rate": 2.3782553813446722e-05, "loss": 0.6182, "step": 142065 }, { "epoch": 1.57, "learning_rate": 2.3781631086308206e-05, "loss": 0.6003, "step": 142070 }, { "epoch": 1.57, "learning_rate": 2.3780708359169694e-05, "loss": 0.7351, "step": 142075 }, { "epoch": 1.57, "learning_rate": 2.3779785632031182e-05, "loss": 0.6007, "step": 142080 }, { "epoch": 1.57, "learning_rate": 2.377886290489267e-05, "loss": 0.699, "step": 142085 }, { "epoch": 1.57, "learning_rate": 2.3777940177754158e-05, "loss": 0.6301, "step": 142090 }, { "epoch": 1.57, "learning_rate": 2.3777017450615642e-05, "loss": 0.6211, "step": 142095 }, { "epoch": 1.57, "learning_rate": 2.3776094723477133e-05, "loss": 0.6184, "step": 142100 }, { "epoch": 1.57, "learning_rate": 2.3775171996338618e-05, "loss": 0.6649, "step": 142105 }, { "epoch": 1.57, "learning_rate": 2.377424926920011e-05, "loss": 0.6253, "step": 142110 }, { "epoch": 1.57, "learning_rate": 2.3773326542061594e-05, "loss": 0.6383, "step": 142115 }, { "epoch": 1.57, "learning_rate": 2.3772403814923085e-05, "loss": 0.6044, "step": 142120 }, { "epoch": 1.57, "learning_rate": 2.377148108778457e-05, "loss": 0.625, "step": 142125 }, { "epoch": 1.57, "learning_rate": 2.3770558360646057e-05, "loss": 0.6384, "step": 142130 }, { "epoch": 1.57, "learning_rate": 2.3769635633507545e-05, "loss": 0.6788, "step": 142135 }, { "epoch": 1.57, "learning_rate": 2.3768712906369033e-05, "loss": 0.6984, "step": 142140 }, { "epoch": 1.57, "learning_rate": 2.376779017923052e-05, "loss": 0.6894, "step": 142145 }, { "epoch": 1.57, "learning_rate": 2.376686745209201e-05, "loss": 0.623, "step": 142150 }, { "epoch": 1.57, "learning_rate": 2.3765944724953496e-05, "loss": 0.5825, "step": 142155 }, { "epoch": 1.57, "learning_rate": 2.3765021997814984e-05, "loss": 0.584, "step": 142160 }, { "epoch": 1.57, "learning_rate": 2.3764099270676472e-05, "loss": 0.6136, "step": 142165 }, { "epoch": 1.57, "learning_rate": 2.3763176543537956e-05, "loss": 0.6317, "step": 142170 }, { "epoch": 1.57, "learning_rate": 2.3762253816399448e-05, "loss": 0.6581, "step": 142175 }, { "epoch": 1.57, "learning_rate": 2.3761331089260932e-05, "loss": 0.6241, "step": 142180 }, { "epoch": 1.57, "learning_rate": 2.376040836212242e-05, "loss": 0.5948, "step": 142185 }, { "epoch": 1.57, "learning_rate": 2.3759485634983908e-05, "loss": 0.6296, "step": 142190 }, { "epoch": 1.57, "learning_rate": 2.3758562907845396e-05, "loss": 0.6777, "step": 142195 }, { "epoch": 1.57, "learning_rate": 2.3757640180706884e-05, "loss": 0.6976, "step": 142200 }, { "epoch": 1.57, "learning_rate": 2.375671745356837e-05, "loss": 0.6148, "step": 142205 }, { "epoch": 1.57, "learning_rate": 2.375579472642986e-05, "loss": 0.6718, "step": 142210 }, { "epoch": 1.57, "learning_rate": 2.3754871999291347e-05, "loss": 0.6333, "step": 142215 }, { "epoch": 1.57, "learning_rate": 2.3753949272152835e-05, "loss": 0.6149, "step": 142220 }, { "epoch": 1.57, "learning_rate": 2.3753026545014323e-05, "loss": 0.6319, "step": 142225 }, { "epoch": 1.57, "learning_rate": 2.375210381787581e-05, "loss": 0.6483, "step": 142230 }, { "epoch": 1.57, "learning_rate": 2.37511810907373e-05, "loss": 0.6255, "step": 142235 }, { "epoch": 1.57, "learning_rate": 2.3750258363598786e-05, "loss": 0.622, "step": 142240 }, { "epoch": 1.58, "learning_rate": 2.374933563646027e-05, "loss": 0.6634, "step": 142245 }, { "epoch": 1.58, "learning_rate": 2.374841290932176e-05, "loss": 0.6491, "step": 142250 }, { "epoch": 1.58, "learning_rate": 2.3747490182183246e-05, "loss": 0.594, "step": 142255 }, { "epoch": 1.58, "learning_rate": 2.3746567455044734e-05, "loss": 0.6443, "step": 142260 }, { "epoch": 1.58, "learning_rate": 2.3745644727906222e-05, "loss": 0.6332, "step": 142265 }, { "epoch": 1.58, "learning_rate": 2.374472200076771e-05, "loss": 0.6362, "step": 142270 }, { "epoch": 1.58, "learning_rate": 2.3743799273629198e-05, "loss": 0.6437, "step": 142275 }, { "epoch": 1.58, "learning_rate": 2.3742876546490686e-05, "loss": 0.6728, "step": 142280 }, { "epoch": 1.58, "learning_rate": 2.3741953819352174e-05, "loss": 0.633, "step": 142285 }, { "epoch": 1.58, "learning_rate": 2.374103109221366e-05, "loss": 0.5952, "step": 142290 }, { "epoch": 1.58, "learning_rate": 2.374010836507515e-05, "loss": 0.6433, "step": 142295 }, { "epoch": 1.58, "learning_rate": 2.3739185637936637e-05, "loss": 0.654, "step": 142300 }, { "epoch": 1.58, "learning_rate": 2.373826291079812e-05, "loss": 0.6409, "step": 142305 }, { "epoch": 1.58, "learning_rate": 2.3737340183659613e-05, "loss": 0.615, "step": 142310 }, { "epoch": 1.58, "learning_rate": 2.3736417456521097e-05, "loss": 0.5977, "step": 142315 }, { "epoch": 1.58, "learning_rate": 2.3735494729382585e-05, "loss": 0.6671, "step": 142320 }, { "epoch": 1.58, "learning_rate": 2.3734572002244073e-05, "loss": 0.6366, "step": 142325 }, { "epoch": 1.58, "learning_rate": 2.373364927510556e-05, "loss": 0.6431, "step": 142330 }, { "epoch": 1.58, "learning_rate": 2.373272654796705e-05, "loss": 0.5987, "step": 142335 }, { "epoch": 1.58, "learning_rate": 2.3731803820828537e-05, "loss": 0.7112, "step": 142340 }, { "epoch": 1.58, "learning_rate": 2.3730881093690024e-05, "loss": 0.6471, "step": 142345 }, { "epoch": 1.58, "learning_rate": 2.3729958366551512e-05, "loss": 0.6451, "step": 142350 }, { "epoch": 1.58, "learning_rate": 2.3729035639413e-05, "loss": 0.6151, "step": 142355 }, { "epoch": 1.58, "learning_rate": 2.3728112912274485e-05, "loss": 0.638, "step": 142360 }, { "epoch": 1.58, "learning_rate": 2.3727190185135976e-05, "loss": 0.6159, "step": 142365 }, { "epoch": 1.58, "learning_rate": 2.372626745799746e-05, "loss": 0.6211, "step": 142370 }, { "epoch": 1.58, "learning_rate": 2.372534473085895e-05, "loss": 0.6092, "step": 142375 }, { "epoch": 1.58, "learning_rate": 2.3724422003720436e-05, "loss": 0.5704, "step": 142380 }, { "epoch": 1.58, "learning_rate": 2.3723499276581927e-05, "loss": 0.6017, "step": 142385 }, { "epoch": 1.58, "learning_rate": 2.372257654944341e-05, "loss": 0.605, "step": 142390 }, { "epoch": 1.58, "learning_rate": 2.37216538223049e-05, "loss": 0.6291, "step": 142395 }, { "epoch": 1.58, "learning_rate": 2.3720731095166387e-05, "loss": 0.5735, "step": 142400 }, { "epoch": 1.58, "learning_rate": 2.3719808368027875e-05, "loss": 0.6001, "step": 142405 }, { "epoch": 1.58, "learning_rate": 2.3718885640889363e-05, "loss": 0.6132, "step": 142410 }, { "epoch": 1.58, "learning_rate": 2.3717962913750847e-05, "loss": 0.6271, "step": 142415 }, { "epoch": 1.58, "learning_rate": 2.371704018661234e-05, "loss": 0.5996, "step": 142420 }, { "epoch": 1.58, "learning_rate": 2.3716117459473823e-05, "loss": 0.7101, "step": 142425 }, { "epoch": 1.58, "learning_rate": 2.3715194732335314e-05, "loss": 0.6608, "step": 142430 }, { "epoch": 1.58, "learning_rate": 2.37142720051968e-05, "loss": 0.6076, "step": 142435 }, { "epoch": 1.58, "learning_rate": 2.371334927805829e-05, "loss": 0.5885, "step": 142440 }, { "epoch": 1.58, "learning_rate": 2.3712426550919775e-05, "loss": 0.6571, "step": 142445 }, { "epoch": 1.58, "learning_rate": 2.3711503823781266e-05, "loss": 0.6567, "step": 142450 }, { "epoch": 1.58, "learning_rate": 2.371058109664275e-05, "loss": 0.5844, "step": 142455 }, { "epoch": 1.58, "learning_rate": 2.3709658369504238e-05, "loss": 0.6413, "step": 142460 }, { "epoch": 1.58, "learning_rate": 2.3708735642365726e-05, "loss": 0.62, "step": 142465 }, { "epoch": 1.58, "learning_rate": 2.3707812915227214e-05, "loss": 0.6346, "step": 142470 }, { "epoch": 1.58, "learning_rate": 2.37068901880887e-05, "loss": 0.6243, "step": 142475 }, { "epoch": 1.58, "learning_rate": 2.3705967460950186e-05, "loss": 0.591, "step": 142480 }, { "epoch": 1.58, "learning_rate": 2.3705044733811677e-05, "loss": 0.6285, "step": 142485 }, { "epoch": 1.58, "learning_rate": 2.3704122006673162e-05, "loss": 0.6291, "step": 142490 }, { "epoch": 1.58, "learning_rate": 2.3703199279534653e-05, "loss": 0.6092, "step": 142495 }, { "epoch": 1.58, "learning_rate": 2.3702276552396138e-05, "loss": 0.6386, "step": 142500 }, { "epoch": 1.58, "learning_rate": 2.370135382525763e-05, "loss": 0.6198, "step": 142505 }, { "epoch": 1.58, "learning_rate": 2.3700431098119113e-05, "loss": 0.615, "step": 142510 }, { "epoch": 1.58, "learning_rate": 2.36995083709806e-05, "loss": 0.6762, "step": 142515 }, { "epoch": 1.58, "learning_rate": 2.369858564384209e-05, "loss": 0.5986, "step": 142520 }, { "epoch": 1.58, "learning_rate": 2.3697662916703577e-05, "loss": 0.6435, "step": 142525 }, { "epoch": 1.58, "learning_rate": 2.3696740189565065e-05, "loss": 0.667, "step": 142530 }, { "epoch": 1.58, "learning_rate": 2.3695817462426552e-05, "loss": 0.6214, "step": 142535 }, { "epoch": 1.58, "learning_rate": 2.369489473528804e-05, "loss": 0.5936, "step": 142540 }, { "epoch": 1.58, "learning_rate": 2.3693972008149525e-05, "loss": 0.5759, "step": 142545 }, { "epoch": 1.58, "learning_rate": 2.3693049281011016e-05, "loss": 0.5866, "step": 142550 }, { "epoch": 1.58, "learning_rate": 2.36921265538725e-05, "loss": 0.6388, "step": 142555 }, { "epoch": 1.58, "learning_rate": 2.3691203826733992e-05, "loss": 0.6466, "step": 142560 }, { "epoch": 1.58, "learning_rate": 2.3690281099595476e-05, "loss": 0.6229, "step": 142565 }, { "epoch": 1.58, "learning_rate": 2.3689358372456964e-05, "loss": 0.6151, "step": 142570 }, { "epoch": 1.58, "learning_rate": 2.3688435645318452e-05, "loss": 0.664, "step": 142575 }, { "epoch": 1.58, "learning_rate": 2.368751291817994e-05, "loss": 0.5871, "step": 142580 }, { "epoch": 1.58, "learning_rate": 2.3686590191041428e-05, "loss": 0.6533, "step": 142585 }, { "epoch": 1.58, "learning_rate": 2.3685667463902915e-05, "loss": 0.6149, "step": 142590 }, { "epoch": 1.58, "learning_rate": 2.3684744736764403e-05, "loss": 0.6091, "step": 142595 }, { "epoch": 1.58, "learning_rate": 2.368382200962589e-05, "loss": 0.5903, "step": 142600 }, { "epoch": 1.58, "learning_rate": 2.368289928248738e-05, "loss": 0.643, "step": 142605 }, { "epoch": 1.58, "learning_rate": 2.3681976555348867e-05, "loss": 0.6644, "step": 142610 }, { "epoch": 1.58, "learning_rate": 2.3681053828210355e-05, "loss": 0.5583, "step": 142615 }, { "epoch": 1.58, "learning_rate": 2.368013110107184e-05, "loss": 0.6451, "step": 142620 }, { "epoch": 1.58, "learning_rate": 2.367920837393333e-05, "loss": 0.6438, "step": 142625 }, { "epoch": 1.58, "learning_rate": 2.3678285646794815e-05, "loss": 0.63, "step": 142630 }, { "epoch": 1.58, "learning_rate": 2.3677362919656303e-05, "loss": 0.6466, "step": 142635 }, { "epoch": 1.58, "learning_rate": 2.367644019251779e-05, "loss": 0.6277, "step": 142640 }, { "epoch": 1.58, "learning_rate": 2.367551746537928e-05, "loss": 0.6413, "step": 142645 }, { "epoch": 1.58, "learning_rate": 2.3674594738240766e-05, "loss": 0.6687, "step": 142650 }, { "epoch": 1.58, "learning_rate": 2.3673672011102254e-05, "loss": 0.6375, "step": 142655 }, { "epoch": 1.58, "learning_rate": 2.3672749283963742e-05, "loss": 0.6391, "step": 142660 }, { "epoch": 1.58, "learning_rate": 2.367182655682523e-05, "loss": 0.628, "step": 142665 }, { "epoch": 1.58, "learning_rate": 2.3670903829686718e-05, "loss": 0.6534, "step": 142670 }, { "epoch": 1.58, "learning_rate": 2.3669981102548205e-05, "loss": 0.6084, "step": 142675 }, { "epoch": 1.58, "learning_rate": 2.3669058375409693e-05, "loss": 0.6658, "step": 142680 }, { "epoch": 1.58, "learning_rate": 2.366813564827118e-05, "loss": 0.6698, "step": 142685 }, { "epoch": 1.58, "learning_rate": 2.3667212921132666e-05, "loss": 0.6125, "step": 142690 }, { "epoch": 1.58, "learning_rate": 2.3666290193994153e-05, "loss": 0.6519, "step": 142695 }, { "epoch": 1.58, "learning_rate": 2.366536746685564e-05, "loss": 0.6443, "step": 142700 }, { "epoch": 1.58, "learning_rate": 2.366444473971713e-05, "loss": 0.6332, "step": 142705 }, { "epoch": 1.58, "learning_rate": 2.3663522012578617e-05, "loss": 0.6413, "step": 142710 }, { "epoch": 1.58, "learning_rate": 2.3662599285440105e-05, "loss": 0.7057, "step": 142715 }, { "epoch": 1.58, "learning_rate": 2.3661676558301593e-05, "loss": 0.6318, "step": 142720 }, { "epoch": 1.58, "learning_rate": 2.366075383116308e-05, "loss": 0.726, "step": 142725 }, { "epoch": 1.58, "learning_rate": 2.365983110402457e-05, "loss": 0.6053, "step": 142730 }, { "epoch": 1.58, "learning_rate": 2.3658908376886056e-05, "loss": 0.6641, "step": 142735 }, { "epoch": 1.58, "learning_rate": 2.3657985649747544e-05, "loss": 0.6629, "step": 142740 }, { "epoch": 1.58, "learning_rate": 2.365706292260903e-05, "loss": 0.6571, "step": 142745 }, { "epoch": 1.58, "learning_rate": 2.365614019547052e-05, "loss": 0.6352, "step": 142750 }, { "epoch": 1.58, "learning_rate": 2.3655217468332004e-05, "loss": 0.6048, "step": 142755 }, { "epoch": 1.58, "learning_rate": 2.3654294741193495e-05, "loss": 0.6206, "step": 142760 }, { "epoch": 1.58, "learning_rate": 2.365337201405498e-05, "loss": 0.6364, "step": 142765 }, { "epoch": 1.58, "learning_rate": 2.3652449286916468e-05, "loss": 0.6749, "step": 142770 }, { "epoch": 1.58, "learning_rate": 2.3651526559777956e-05, "loss": 0.6546, "step": 142775 }, { "epoch": 1.58, "learning_rate": 2.3650603832639444e-05, "loss": 0.6856, "step": 142780 }, { "epoch": 1.58, "learning_rate": 2.364968110550093e-05, "loss": 0.6132, "step": 142785 }, { "epoch": 1.58, "learning_rate": 2.364875837836242e-05, "loss": 0.6296, "step": 142790 }, { "epoch": 1.58, "learning_rate": 2.3647835651223907e-05, "loss": 0.6371, "step": 142795 }, { "epoch": 1.58, "learning_rate": 2.364691292408539e-05, "loss": 0.6545, "step": 142800 }, { "epoch": 1.58, "learning_rate": 2.3645990196946883e-05, "loss": 0.6356, "step": 142805 }, { "epoch": 1.58, "learning_rate": 2.3645067469808367e-05, "loss": 0.6707, "step": 142810 }, { "epoch": 1.58, "learning_rate": 2.364414474266986e-05, "loss": 0.6623, "step": 142815 }, { "epoch": 1.58, "learning_rate": 2.3643222015531343e-05, "loss": 0.6666, "step": 142820 }, { "epoch": 1.58, "learning_rate": 2.3642299288392834e-05, "loss": 0.619, "step": 142825 }, { "epoch": 1.58, "learning_rate": 2.364137656125432e-05, "loss": 0.6078, "step": 142830 }, { "epoch": 1.58, "learning_rate": 2.364045383411581e-05, "loss": 0.6655, "step": 142835 }, { "epoch": 1.58, "learning_rate": 2.3639531106977294e-05, "loss": 0.5972, "step": 142840 }, { "epoch": 1.58, "learning_rate": 2.3638608379838782e-05, "loss": 0.6295, "step": 142845 }, { "epoch": 1.58, "learning_rate": 2.363768565270027e-05, "loss": 0.5846, "step": 142850 }, { "epoch": 1.58, "learning_rate": 2.3636762925561758e-05, "loss": 0.7418, "step": 142855 }, { "epoch": 1.58, "learning_rate": 2.3635840198423246e-05, "loss": 0.6234, "step": 142860 }, { "epoch": 1.58, "learning_rate": 2.363491747128473e-05, "loss": 0.6681, "step": 142865 }, { "epoch": 1.58, "learning_rate": 2.363399474414622e-05, "loss": 0.613, "step": 142870 }, { "epoch": 1.58, "learning_rate": 2.3633072017007706e-05, "loss": 0.6196, "step": 142875 }, { "epoch": 1.58, "learning_rate": 2.3632149289869197e-05, "loss": 0.6081, "step": 142880 }, { "epoch": 1.58, "learning_rate": 2.363122656273068e-05, "loss": 0.6781, "step": 142885 }, { "epoch": 1.58, "learning_rate": 2.3630303835592173e-05, "loss": 0.6972, "step": 142890 }, { "epoch": 1.58, "learning_rate": 2.3629381108453657e-05, "loss": 0.575, "step": 142895 }, { "epoch": 1.58, "learning_rate": 2.3628458381315145e-05, "loss": 0.6447, "step": 142900 }, { "epoch": 1.58, "learning_rate": 2.3627535654176633e-05, "loss": 0.7216, "step": 142905 }, { "epoch": 1.58, "learning_rate": 2.362661292703812e-05, "loss": 0.6829, "step": 142910 }, { "epoch": 1.58, "learning_rate": 2.362569019989961e-05, "loss": 0.6719, "step": 142915 }, { "epoch": 1.58, "learning_rate": 2.3624767472761096e-05, "loss": 0.6412, "step": 142920 }, { "epoch": 1.58, "learning_rate": 2.3623844745622584e-05, "loss": 0.6496, "step": 142925 }, { "epoch": 1.58, "learning_rate": 2.362292201848407e-05, "loss": 0.6419, "step": 142930 }, { "epoch": 1.58, "learning_rate": 2.362199929134556e-05, "loss": 0.6009, "step": 142935 }, { "epoch": 1.58, "learning_rate": 2.3621076564207044e-05, "loss": 0.6155, "step": 142940 }, { "epoch": 1.58, "learning_rate": 2.3620153837068536e-05, "loss": 0.6721, "step": 142945 }, { "epoch": 1.58, "learning_rate": 2.361923110993002e-05, "loss": 0.6873, "step": 142950 }, { "epoch": 1.58, "learning_rate": 2.3618308382791508e-05, "loss": 0.5992, "step": 142955 }, { "epoch": 1.58, "learning_rate": 2.3617385655652996e-05, "loss": 0.6224, "step": 142960 }, { "epoch": 1.58, "learning_rate": 2.3616462928514484e-05, "loss": 0.6618, "step": 142965 }, { "epoch": 1.58, "learning_rate": 2.361554020137597e-05, "loss": 0.5927, "step": 142970 }, { "epoch": 1.58, "learning_rate": 2.361461747423746e-05, "loss": 0.6299, "step": 142975 }, { "epoch": 1.58, "learning_rate": 2.3613694747098947e-05, "loss": 0.6299, "step": 142980 }, { "epoch": 1.58, "learning_rate": 2.3612772019960435e-05, "loss": 0.6522, "step": 142985 }, { "epoch": 1.58, "learning_rate": 2.3611849292821923e-05, "loss": 0.6153, "step": 142990 }, { "epoch": 1.58, "learning_rate": 2.361092656568341e-05, "loss": 0.6531, "step": 142995 }, { "epoch": 1.58, "learning_rate": 2.36100038385449e-05, "loss": 0.6179, "step": 143000 }, { "epoch": 1.58, "eval_loss": 0.6088712215423584, "eval_runtime": 69.129, "eval_samples_per_second": 28.931, "eval_steps_per_second": 14.466, "step": 143000 }, { "epoch": 1.58, "learning_rate": 2.3609081111406383e-05, "loss": 0.5977, "step": 143005 }, { "epoch": 1.58, "learning_rate": 2.3608158384267874e-05, "loss": 0.6181, "step": 143010 }, { "epoch": 1.58, "learning_rate": 2.360723565712936e-05, "loss": 0.6786, "step": 143015 }, { "epoch": 1.58, "learning_rate": 2.3606312929990847e-05, "loss": 0.6077, "step": 143020 }, { "epoch": 1.58, "learning_rate": 2.3605390202852335e-05, "loss": 0.6441, "step": 143025 }, { "epoch": 1.58, "learning_rate": 2.3604467475713822e-05, "loss": 0.6144, "step": 143030 }, { "epoch": 1.58, "learning_rate": 2.360354474857531e-05, "loss": 0.6139, "step": 143035 }, { "epoch": 1.58, "learning_rate": 2.3602622021436798e-05, "loss": 0.6555, "step": 143040 }, { "epoch": 1.58, "learning_rate": 2.3601699294298286e-05, "loss": 0.6378, "step": 143045 }, { "epoch": 1.58, "learning_rate": 2.3600776567159774e-05, "loss": 0.6436, "step": 143050 }, { "epoch": 1.58, "learning_rate": 2.359985384002126e-05, "loss": 0.6595, "step": 143055 }, { "epoch": 1.58, "learning_rate": 2.359893111288275e-05, "loss": 0.6627, "step": 143060 }, { "epoch": 1.58, "learning_rate": 2.3598008385744237e-05, "loss": 0.6617, "step": 143065 }, { "epoch": 1.58, "learning_rate": 2.3597085658605725e-05, "loss": 0.6714, "step": 143070 }, { "epoch": 1.58, "learning_rate": 2.359616293146721e-05, "loss": 0.6527, "step": 143075 }, { "epoch": 1.58, "learning_rate": 2.3595240204328697e-05, "loss": 0.6359, "step": 143080 }, { "epoch": 1.58, "learning_rate": 2.3594317477190185e-05, "loss": 0.6586, "step": 143085 }, { "epoch": 1.58, "learning_rate": 2.3593394750051673e-05, "loss": 0.5884, "step": 143090 }, { "epoch": 1.58, "learning_rate": 2.359247202291316e-05, "loss": 0.6427, "step": 143095 }, { "epoch": 1.58, "learning_rate": 2.359154929577465e-05, "loss": 0.6496, "step": 143100 }, { "epoch": 1.58, "learning_rate": 2.3590626568636137e-05, "loss": 0.6204, "step": 143105 }, { "epoch": 1.58, "learning_rate": 2.3589703841497625e-05, "loss": 0.671, "step": 143110 }, { "epoch": 1.58, "learning_rate": 2.3588781114359112e-05, "loss": 0.5807, "step": 143115 }, { "epoch": 1.58, "learning_rate": 2.35878583872206e-05, "loss": 0.6461, "step": 143120 }, { "epoch": 1.58, "learning_rate": 2.3586935660082088e-05, "loss": 0.6574, "step": 143125 }, { "epoch": 1.58, "learning_rate": 2.3586012932943573e-05, "loss": 0.6639, "step": 143130 }, { "epoch": 1.58, "learning_rate": 2.3585090205805064e-05, "loss": 0.6179, "step": 143135 }, { "epoch": 1.58, "learning_rate": 2.3584167478666548e-05, "loss": 0.6179, "step": 143140 }, { "epoch": 1.59, "learning_rate": 2.358324475152804e-05, "loss": 0.6223, "step": 143145 }, { "epoch": 1.59, "learning_rate": 2.3582322024389524e-05, "loss": 0.6476, "step": 143150 }, { "epoch": 1.59, "learning_rate": 2.3581399297251012e-05, "loss": 0.6287, "step": 143155 }, { "epoch": 1.59, "learning_rate": 2.35804765701125e-05, "loss": 0.6717, "step": 143160 }, { "epoch": 1.59, "learning_rate": 2.3579553842973988e-05, "loss": 0.5932, "step": 143165 }, { "epoch": 1.59, "learning_rate": 2.3578631115835475e-05, "loss": 0.6078, "step": 143170 }, { "epoch": 1.59, "learning_rate": 2.3577708388696963e-05, "loss": 0.6629, "step": 143175 }, { "epoch": 1.59, "learning_rate": 2.357678566155845e-05, "loss": 0.6437, "step": 143180 }, { "epoch": 1.59, "learning_rate": 2.3575862934419936e-05, "loss": 0.6658, "step": 143185 }, { "epoch": 1.59, "learning_rate": 2.3574940207281427e-05, "loss": 0.6046, "step": 143190 }, { "epoch": 1.59, "learning_rate": 2.357401748014291e-05, "loss": 0.6285, "step": 143195 }, { "epoch": 1.59, "learning_rate": 2.3573094753004402e-05, "loss": 0.6504, "step": 143200 }, { "epoch": 1.59, "learning_rate": 2.3572172025865887e-05, "loss": 0.6774, "step": 143205 }, { "epoch": 1.59, "learning_rate": 2.3571249298727378e-05, "loss": 0.6581, "step": 143210 }, { "epoch": 1.59, "learning_rate": 2.3570326571588863e-05, "loss": 0.6012, "step": 143215 }, { "epoch": 1.59, "learning_rate": 2.3569403844450354e-05, "loss": 0.5647, "step": 143220 }, { "epoch": 1.59, "learning_rate": 2.356848111731184e-05, "loss": 0.6662, "step": 143225 }, { "epoch": 1.59, "learning_rate": 2.3567558390173326e-05, "loss": 0.6022, "step": 143230 }, { "epoch": 1.59, "learning_rate": 2.3566635663034814e-05, "loss": 0.6482, "step": 143235 }, { "epoch": 1.59, "learning_rate": 2.3565712935896302e-05, "loss": 0.6389, "step": 143240 }, { "epoch": 1.59, "learning_rate": 2.356479020875779e-05, "loss": 0.6523, "step": 143245 }, { "epoch": 1.59, "learning_rate": 2.3563867481619274e-05, "loss": 0.6167, "step": 143250 }, { "epoch": 1.59, "learning_rate": 2.3562944754480765e-05, "loss": 0.6425, "step": 143255 }, { "epoch": 1.59, "learning_rate": 2.356202202734225e-05, "loss": 0.6228, "step": 143260 }, { "epoch": 1.59, "learning_rate": 2.356109930020374e-05, "loss": 0.6481, "step": 143265 }, { "epoch": 1.59, "learning_rate": 2.3560176573065226e-05, "loss": 0.6618, "step": 143270 }, { "epoch": 1.59, "learning_rate": 2.3559253845926717e-05, "loss": 0.639, "step": 143275 }, { "epoch": 1.59, "learning_rate": 2.35583311187882e-05, "loss": 0.6521, "step": 143280 }, { "epoch": 1.59, "learning_rate": 2.355740839164969e-05, "loss": 0.6008, "step": 143285 }, { "epoch": 1.59, "learning_rate": 2.3556485664511177e-05, "loss": 0.6475, "step": 143290 }, { "epoch": 1.59, "learning_rate": 2.3555562937372665e-05, "loss": 0.6994, "step": 143295 }, { "epoch": 1.59, "learning_rate": 2.3554640210234153e-05, "loss": 0.6354, "step": 143300 }, { "epoch": 1.59, "learning_rate": 2.3553717483095637e-05, "loss": 0.6451, "step": 143305 }, { "epoch": 1.59, "learning_rate": 2.355279475595713e-05, "loss": 0.5687, "step": 143310 }, { "epoch": 1.59, "learning_rate": 2.3551872028818613e-05, "loss": 0.5952, "step": 143315 }, { "epoch": 1.59, "learning_rate": 2.3550949301680104e-05, "loss": 0.6665, "step": 143320 }, { "epoch": 1.59, "learning_rate": 2.355002657454159e-05, "loss": 0.6644, "step": 143325 }, { "epoch": 1.59, "learning_rate": 2.354910384740308e-05, "loss": 0.6347, "step": 143330 }, { "epoch": 1.59, "learning_rate": 2.3548181120264564e-05, "loss": 0.6014, "step": 143335 }, { "epoch": 1.59, "learning_rate": 2.3547258393126052e-05, "loss": 0.6784, "step": 143340 }, { "epoch": 1.59, "learning_rate": 2.354633566598754e-05, "loss": 0.5731, "step": 143345 }, { "epoch": 1.59, "learning_rate": 2.3545412938849028e-05, "loss": 0.6065, "step": 143350 }, { "epoch": 1.59, "learning_rate": 2.3544490211710516e-05, "loss": 0.6438, "step": 143355 }, { "epoch": 1.59, "learning_rate": 2.3543567484572003e-05, "loss": 0.5921, "step": 143360 }, { "epoch": 1.59, "learning_rate": 2.354264475743349e-05, "loss": 0.6495, "step": 143365 }, { "epoch": 1.59, "learning_rate": 2.354172203029498e-05, "loss": 0.5715, "step": 143370 }, { "epoch": 1.59, "learning_rate": 2.3540799303156467e-05, "loss": 0.6057, "step": 143375 }, { "epoch": 1.59, "learning_rate": 2.353987657601795e-05, "loss": 0.6648, "step": 143380 }, { "epoch": 1.59, "learning_rate": 2.3538953848879443e-05, "loss": 0.6178, "step": 143385 }, { "epoch": 1.59, "learning_rate": 2.3538031121740927e-05, "loss": 0.6165, "step": 143390 }, { "epoch": 1.59, "learning_rate": 2.353710839460242e-05, "loss": 0.6254, "step": 143395 }, { "epoch": 1.59, "learning_rate": 2.3536185667463903e-05, "loss": 0.5991, "step": 143400 }, { "epoch": 1.59, "learning_rate": 2.353526294032539e-05, "loss": 0.6351, "step": 143405 }, { "epoch": 1.59, "learning_rate": 2.353434021318688e-05, "loss": 0.6266, "step": 143410 }, { "epoch": 1.59, "learning_rate": 2.3533417486048366e-05, "loss": 0.5804, "step": 143415 }, { "epoch": 1.59, "learning_rate": 2.3532494758909854e-05, "loss": 0.586, "step": 143420 }, { "epoch": 1.59, "learning_rate": 2.3531572031771342e-05, "loss": 0.6223, "step": 143425 }, { "epoch": 1.59, "learning_rate": 2.353064930463283e-05, "loss": 0.6147, "step": 143430 }, { "epoch": 1.59, "learning_rate": 2.3529726577494318e-05, "loss": 0.6375, "step": 143435 }, { "epoch": 1.59, "learning_rate": 2.3528803850355806e-05, "loss": 0.6492, "step": 143440 }, { "epoch": 1.59, "learning_rate": 2.3527881123217293e-05, "loss": 0.6217, "step": 143445 }, { "epoch": 1.59, "learning_rate": 2.352695839607878e-05, "loss": 0.6217, "step": 143450 }, { "epoch": 1.59, "learning_rate": 2.3526035668940266e-05, "loss": 0.6052, "step": 143455 }, { "epoch": 1.59, "learning_rate": 2.3525112941801754e-05, "loss": 0.6245, "step": 143460 }, { "epoch": 1.59, "learning_rate": 2.352419021466324e-05, "loss": 0.6441, "step": 143465 }, { "epoch": 1.59, "learning_rate": 2.352326748752473e-05, "loss": 0.6207, "step": 143470 }, { "epoch": 1.59, "learning_rate": 2.3522344760386217e-05, "loss": 0.6372, "step": 143475 }, { "epoch": 1.59, "learning_rate": 2.3521422033247705e-05, "loss": 0.6489, "step": 143480 }, { "epoch": 1.59, "learning_rate": 2.3520499306109193e-05, "loss": 0.6089, "step": 143485 }, { "epoch": 1.59, "learning_rate": 2.351957657897068e-05, "loss": 0.6099, "step": 143490 }, { "epoch": 1.59, "learning_rate": 2.351865385183217e-05, "loss": 0.6278, "step": 143495 }, { "epoch": 1.59, "learning_rate": 2.3517731124693656e-05, "loss": 0.6002, "step": 143500 }, { "epoch": 1.59, "learning_rate": 2.3516808397555144e-05, "loss": 0.6207, "step": 143505 }, { "epoch": 1.59, "learning_rate": 2.3515885670416632e-05, "loss": 0.6529, "step": 143510 }, { "epoch": 1.59, "learning_rate": 2.3514962943278117e-05, "loss": 0.5843, "step": 143515 }, { "epoch": 1.59, "learning_rate": 2.3514040216139608e-05, "loss": 0.6424, "step": 143520 }, { "epoch": 1.59, "learning_rate": 2.3513117489001092e-05, "loss": 0.6561, "step": 143525 }, { "epoch": 1.59, "learning_rate": 2.351219476186258e-05, "loss": 0.5864, "step": 143530 }, { "epoch": 1.59, "learning_rate": 2.3511272034724068e-05, "loss": 0.6175, "step": 143535 }, { "epoch": 1.59, "learning_rate": 2.3510349307585556e-05, "loss": 0.6544, "step": 143540 }, { "epoch": 1.59, "learning_rate": 2.3509426580447044e-05, "loss": 0.624, "step": 143545 }, { "epoch": 1.59, "learning_rate": 2.350850385330853e-05, "loss": 0.6579, "step": 143550 }, { "epoch": 1.59, "learning_rate": 2.350758112617002e-05, "loss": 0.5842, "step": 143555 }, { "epoch": 1.59, "learning_rate": 2.3506658399031507e-05, "loss": 0.6306, "step": 143560 }, { "epoch": 1.59, "learning_rate": 2.3505735671892995e-05, "loss": 0.6389, "step": 143565 }, { "epoch": 1.59, "learning_rate": 2.3504812944754483e-05, "loss": 0.6582, "step": 143570 }, { "epoch": 1.59, "learning_rate": 2.350389021761597e-05, "loss": 0.6774, "step": 143575 }, { "epoch": 1.59, "learning_rate": 2.3502967490477455e-05, "loss": 0.6357, "step": 143580 }, { "epoch": 1.59, "learning_rate": 2.3502044763338946e-05, "loss": 0.6064, "step": 143585 }, { "epoch": 1.59, "learning_rate": 2.350112203620043e-05, "loss": 0.6049, "step": 143590 }, { "epoch": 1.59, "learning_rate": 2.3500199309061922e-05, "loss": 0.6175, "step": 143595 }, { "epoch": 1.59, "learning_rate": 2.3499276581923407e-05, "loss": 0.632, "step": 143600 }, { "epoch": 1.59, "learning_rate": 2.3498353854784894e-05, "loss": 0.6031, "step": 143605 }, { "epoch": 1.59, "learning_rate": 2.3497431127646382e-05, "loss": 0.6464, "step": 143610 }, { "epoch": 1.59, "learning_rate": 2.349650840050787e-05, "loss": 0.6249, "step": 143615 }, { "epoch": 1.59, "learning_rate": 2.3495585673369358e-05, "loss": 0.6469, "step": 143620 }, { "epoch": 1.59, "learning_rate": 2.3494662946230846e-05, "loss": 0.7254, "step": 143625 }, { "epoch": 1.59, "learning_rate": 2.3493740219092334e-05, "loss": 0.6367, "step": 143630 }, { "epoch": 1.59, "learning_rate": 2.3492817491953818e-05, "loss": 0.5826, "step": 143635 }, { "epoch": 1.59, "learning_rate": 2.349189476481531e-05, "loss": 0.6008, "step": 143640 }, { "epoch": 1.59, "learning_rate": 2.3490972037676794e-05, "loss": 0.6876, "step": 143645 }, { "epoch": 1.59, "learning_rate": 2.3490049310538285e-05, "loss": 0.5714, "step": 143650 }, { "epoch": 1.59, "learning_rate": 2.348912658339977e-05, "loss": 0.6068, "step": 143655 }, { "epoch": 1.59, "learning_rate": 2.348820385626126e-05, "loss": 0.5999, "step": 143660 }, { "epoch": 1.59, "learning_rate": 2.3487281129122745e-05, "loss": 0.6218, "step": 143665 }, { "epoch": 1.59, "learning_rate": 2.3486358401984233e-05, "loss": 0.6823, "step": 143670 }, { "epoch": 1.59, "learning_rate": 2.348543567484572e-05, "loss": 0.6261, "step": 143675 }, { "epoch": 1.59, "learning_rate": 2.348451294770721e-05, "loss": 0.6614, "step": 143680 }, { "epoch": 1.59, "learning_rate": 2.3483590220568697e-05, "loss": 0.6676, "step": 143685 }, { "epoch": 1.59, "learning_rate": 2.348266749343018e-05, "loss": 0.6011, "step": 143690 }, { "epoch": 1.59, "learning_rate": 2.3481744766291672e-05, "loss": 0.656, "step": 143695 }, { "epoch": 1.59, "learning_rate": 2.3480822039153157e-05, "loss": 0.6981, "step": 143700 }, { "epoch": 1.59, "learning_rate": 2.3479899312014648e-05, "loss": 0.6127, "step": 143705 }, { "epoch": 1.59, "learning_rate": 2.3478976584876133e-05, "loss": 0.6406, "step": 143710 }, { "epoch": 1.59, "learning_rate": 2.3478053857737624e-05, "loss": 0.6687, "step": 143715 }, { "epoch": 1.59, "learning_rate": 2.3477131130599108e-05, "loss": 0.6364, "step": 143720 }, { "epoch": 1.59, "learning_rate": 2.34762084034606e-05, "loss": 0.6051, "step": 143725 }, { "epoch": 1.59, "learning_rate": 2.3475285676322084e-05, "loss": 0.6117, "step": 143730 }, { "epoch": 1.59, "learning_rate": 2.3474362949183572e-05, "loss": 0.6455, "step": 143735 }, { "epoch": 1.59, "learning_rate": 2.347344022204506e-05, "loss": 0.6144, "step": 143740 }, { "epoch": 1.59, "learning_rate": 2.3472517494906547e-05, "loss": 0.6284, "step": 143745 }, { "epoch": 1.59, "learning_rate": 2.3471594767768035e-05, "loss": 0.6298, "step": 143750 }, { "epoch": 1.59, "learning_rate": 2.3470672040629523e-05, "loss": 0.6518, "step": 143755 }, { "epoch": 1.59, "learning_rate": 2.346974931349101e-05, "loss": 0.6878, "step": 143760 }, { "epoch": 1.59, "learning_rate": 2.3468826586352495e-05, "loss": 0.6794, "step": 143765 }, { "epoch": 1.59, "learning_rate": 2.3467903859213987e-05, "loss": 0.5634, "step": 143770 }, { "epoch": 1.59, "learning_rate": 2.346698113207547e-05, "loss": 0.617, "step": 143775 }, { "epoch": 1.59, "learning_rate": 2.3466058404936962e-05, "loss": 0.6867, "step": 143780 }, { "epoch": 1.59, "learning_rate": 2.3465135677798447e-05, "loss": 0.5726, "step": 143785 }, { "epoch": 1.59, "learning_rate": 2.3464212950659935e-05, "loss": 0.6363, "step": 143790 }, { "epoch": 1.59, "learning_rate": 2.3463290223521423e-05, "loss": 0.6281, "step": 143795 }, { "epoch": 1.59, "learning_rate": 2.346236749638291e-05, "loss": 0.6295, "step": 143800 }, { "epoch": 1.59, "learning_rate": 2.3461444769244398e-05, "loss": 0.6252, "step": 143805 }, { "epoch": 1.59, "learning_rate": 2.3460522042105886e-05, "loss": 0.6427, "step": 143810 }, { "epoch": 1.59, "learning_rate": 2.3459599314967374e-05, "loss": 0.6164, "step": 143815 }, { "epoch": 1.59, "learning_rate": 2.3458676587828862e-05, "loss": 0.6156, "step": 143820 }, { "epoch": 1.59, "learning_rate": 2.345775386069035e-05, "loss": 0.6558, "step": 143825 }, { "epoch": 1.59, "learning_rate": 2.3456831133551838e-05, "loss": 0.6873, "step": 143830 }, { "epoch": 1.59, "learning_rate": 2.3455908406413325e-05, "loss": 0.6422, "step": 143835 }, { "epoch": 1.59, "learning_rate": 2.345498567927481e-05, "loss": 0.673, "step": 143840 }, { "epoch": 1.59, "learning_rate": 2.3454062952136298e-05, "loss": 0.5849, "step": 143845 }, { "epoch": 1.59, "learning_rate": 2.3453140224997786e-05, "loss": 0.6093, "step": 143850 }, { "epoch": 1.59, "learning_rate": 2.3452217497859273e-05, "loss": 0.6894, "step": 143855 }, { "epoch": 1.59, "learning_rate": 2.345129477072076e-05, "loss": 0.6151, "step": 143860 }, { "epoch": 1.59, "learning_rate": 2.345037204358225e-05, "loss": 0.5801, "step": 143865 }, { "epoch": 1.59, "learning_rate": 2.3449449316443737e-05, "loss": 0.6597, "step": 143870 }, { "epoch": 1.59, "learning_rate": 2.3448526589305225e-05, "loss": 0.7004, "step": 143875 }, { "epoch": 1.59, "learning_rate": 2.3447603862166713e-05, "loss": 0.6137, "step": 143880 }, { "epoch": 1.59, "learning_rate": 2.34466811350282e-05, "loss": 0.6399, "step": 143885 }, { "epoch": 1.59, "learning_rate": 2.344575840788969e-05, "loss": 0.6339, "step": 143890 }, { "epoch": 1.59, "learning_rate": 2.3444835680751176e-05, "loss": 0.6808, "step": 143895 }, { "epoch": 1.59, "learning_rate": 2.344391295361266e-05, "loss": 0.6426, "step": 143900 }, { "epoch": 1.59, "learning_rate": 2.3442990226474152e-05, "loss": 0.639, "step": 143905 }, { "epoch": 1.59, "learning_rate": 2.3442067499335636e-05, "loss": 0.6032, "step": 143910 }, { "epoch": 1.59, "learning_rate": 2.3441144772197124e-05, "loss": 0.6668, "step": 143915 }, { "epoch": 1.59, "learning_rate": 2.3440222045058612e-05, "loss": 0.6763, "step": 143920 }, { "epoch": 1.59, "learning_rate": 2.34392993179201e-05, "loss": 0.6456, "step": 143925 }, { "epoch": 1.59, "learning_rate": 2.3438376590781588e-05, "loss": 0.6322, "step": 143930 }, { "epoch": 1.59, "learning_rate": 2.3437453863643076e-05, "loss": 0.6106, "step": 143935 }, { "epoch": 1.59, "learning_rate": 2.3436531136504563e-05, "loss": 0.6224, "step": 143940 }, { "epoch": 1.59, "learning_rate": 2.343560840936605e-05, "loss": 0.6514, "step": 143945 }, { "epoch": 1.59, "learning_rate": 2.343468568222754e-05, "loss": 0.6616, "step": 143950 }, { "epoch": 1.59, "learning_rate": 2.3433762955089027e-05, "loss": 0.6357, "step": 143955 }, { "epoch": 1.59, "learning_rate": 2.3432840227950515e-05, "loss": 0.6175, "step": 143960 }, { "epoch": 1.59, "learning_rate": 2.3431917500812e-05, "loss": 0.6729, "step": 143965 }, { "epoch": 1.59, "learning_rate": 2.343099477367349e-05, "loss": 0.645, "step": 143970 }, { "epoch": 1.59, "learning_rate": 2.3430072046534975e-05, "loss": 0.6688, "step": 143975 }, { "epoch": 1.59, "learning_rate": 2.3429149319396466e-05, "loss": 0.6273, "step": 143980 }, { "epoch": 1.59, "learning_rate": 2.342822659225795e-05, "loss": 0.6375, "step": 143985 }, { "epoch": 1.59, "learning_rate": 2.342730386511944e-05, "loss": 0.5958, "step": 143990 }, { "epoch": 1.59, "learning_rate": 2.3426381137980926e-05, "loss": 0.6233, "step": 143995 }, { "epoch": 1.59, "learning_rate": 2.3425458410842414e-05, "loss": 0.6154, "step": 144000 }, { "epoch": 1.59, "eval_loss": 0.6352951526641846, "eval_runtime": 69.1546, "eval_samples_per_second": 28.921, "eval_steps_per_second": 14.46, "step": 144000 }, { "epoch": 1.59, "learning_rate": 2.3424535683703902e-05, "loss": 0.6657, "step": 144005 }, { "epoch": 1.59, "learning_rate": 2.342361295656539e-05, "loss": 0.6618, "step": 144010 }, { "epoch": 1.59, "learning_rate": 2.3422690229426878e-05, "loss": 0.6182, "step": 144015 }, { "epoch": 1.59, "learning_rate": 2.3421767502288362e-05, "loss": 0.5763, "step": 144020 }, { "epoch": 1.59, "learning_rate": 2.3420844775149853e-05, "loss": 0.6363, "step": 144025 }, { "epoch": 1.59, "learning_rate": 2.3419922048011338e-05, "loss": 0.6954, "step": 144030 }, { "epoch": 1.59, "learning_rate": 2.341899932087283e-05, "loss": 0.6258, "step": 144035 }, { "epoch": 1.59, "learning_rate": 2.3418076593734314e-05, "loss": 0.6655, "step": 144040 }, { "epoch": 1.59, "learning_rate": 2.3417153866595805e-05, "loss": 0.6856, "step": 144045 }, { "epoch": 1.6, "learning_rate": 2.341623113945729e-05, "loss": 0.6151, "step": 144050 }, { "epoch": 1.6, "learning_rate": 2.3415308412318777e-05, "loss": 0.6628, "step": 144055 }, { "epoch": 1.6, "learning_rate": 2.3414385685180265e-05, "loss": 0.595, "step": 144060 }, { "epoch": 1.6, "learning_rate": 2.3413462958041753e-05, "loss": 0.6172, "step": 144065 }, { "epoch": 1.6, "learning_rate": 2.341254023090324e-05, "loss": 0.6753, "step": 144070 }, { "epoch": 1.6, "learning_rate": 2.3411617503764725e-05, "loss": 0.644, "step": 144075 }, { "epoch": 1.6, "learning_rate": 2.3410694776626216e-05, "loss": 0.6235, "step": 144080 }, { "epoch": 1.6, "learning_rate": 2.34097720494877e-05, "loss": 0.6216, "step": 144085 }, { "epoch": 1.6, "learning_rate": 2.3408849322349192e-05, "loss": 0.652, "step": 144090 }, { "epoch": 1.6, "learning_rate": 2.3407926595210677e-05, "loss": 0.6102, "step": 144095 }, { "epoch": 1.6, "learning_rate": 2.3407003868072168e-05, "loss": 0.6858, "step": 144100 }, { "epoch": 1.6, "learning_rate": 2.3406081140933652e-05, "loss": 0.6622, "step": 144105 }, { "epoch": 1.6, "learning_rate": 2.3405158413795143e-05, "loss": 0.591, "step": 144110 }, { "epoch": 1.6, "learning_rate": 2.3404235686656628e-05, "loss": 0.6186, "step": 144115 }, { "epoch": 1.6, "learning_rate": 2.3403312959518116e-05, "loss": 0.6648, "step": 144120 }, { "epoch": 1.6, "learning_rate": 2.3402390232379604e-05, "loss": 0.6141, "step": 144125 }, { "epoch": 1.6, "learning_rate": 2.340146750524109e-05, "loss": 0.5918, "step": 144130 }, { "epoch": 1.6, "learning_rate": 2.340054477810258e-05, "loss": 0.6709, "step": 144135 }, { "epoch": 1.6, "learning_rate": 2.3399622050964064e-05, "loss": 0.6839, "step": 144140 }, { "epoch": 1.6, "learning_rate": 2.3398699323825555e-05, "loss": 0.6171, "step": 144145 }, { "epoch": 1.6, "learning_rate": 2.339777659668704e-05, "loss": 0.6275, "step": 144150 }, { "epoch": 1.6, "learning_rate": 2.339685386954853e-05, "loss": 0.6631, "step": 144155 }, { "epoch": 1.6, "learning_rate": 2.3395931142410015e-05, "loss": 0.6264, "step": 144160 }, { "epoch": 1.6, "learning_rate": 2.3395008415271506e-05, "loss": 0.6363, "step": 144165 }, { "epoch": 1.6, "learning_rate": 2.339408568813299e-05, "loss": 0.5979, "step": 144170 }, { "epoch": 1.6, "learning_rate": 2.339316296099448e-05, "loss": 0.598, "step": 144175 }, { "epoch": 1.6, "learning_rate": 2.3392240233855967e-05, "loss": 0.5928, "step": 144180 }, { "epoch": 1.6, "learning_rate": 2.3391317506717454e-05, "loss": 0.6604, "step": 144185 }, { "epoch": 1.6, "learning_rate": 2.3390394779578942e-05, "loss": 0.7182, "step": 144190 }, { "epoch": 1.6, "learning_rate": 2.338947205244043e-05, "loss": 0.7109, "step": 144195 }, { "epoch": 1.6, "learning_rate": 2.3388549325301918e-05, "loss": 0.6432, "step": 144200 }, { "epoch": 1.6, "learning_rate": 2.3387626598163406e-05, "loss": 0.658, "step": 144205 }, { "epoch": 1.6, "learning_rate": 2.3386703871024894e-05, "loss": 0.6156, "step": 144210 }, { "epoch": 1.6, "learning_rate": 2.3385781143886378e-05, "loss": 0.628, "step": 144215 }, { "epoch": 1.6, "learning_rate": 2.338485841674787e-05, "loss": 0.6775, "step": 144220 }, { "epoch": 1.6, "learning_rate": 2.3383935689609354e-05, "loss": 0.6815, "step": 144225 }, { "epoch": 1.6, "learning_rate": 2.338301296247084e-05, "loss": 0.595, "step": 144230 }, { "epoch": 1.6, "learning_rate": 2.338209023533233e-05, "loss": 0.6235, "step": 144235 }, { "epoch": 1.6, "learning_rate": 2.3381167508193817e-05, "loss": 0.6311, "step": 144240 }, { "epoch": 1.6, "learning_rate": 2.3380244781055305e-05, "loss": 0.5838, "step": 144245 }, { "epoch": 1.6, "learning_rate": 2.3379322053916793e-05, "loss": 0.6073, "step": 144250 }, { "epoch": 1.6, "learning_rate": 2.337839932677828e-05, "loss": 0.6989, "step": 144255 }, { "epoch": 1.6, "learning_rate": 2.337747659963977e-05, "loss": 0.6049, "step": 144260 }, { "epoch": 1.6, "learning_rate": 2.3376553872501257e-05, "loss": 0.5618, "step": 144265 }, { "epoch": 1.6, "learning_rate": 2.3375631145362744e-05, "loss": 0.578, "step": 144270 }, { "epoch": 1.6, "learning_rate": 2.3374708418224232e-05, "loss": 0.6557, "step": 144275 }, { "epoch": 1.6, "learning_rate": 2.337378569108572e-05, "loss": 0.6026, "step": 144280 }, { "epoch": 1.6, "learning_rate": 2.3372862963947205e-05, "loss": 0.6275, "step": 144285 }, { "epoch": 1.6, "learning_rate": 2.3371940236808692e-05, "loss": 0.6154, "step": 144290 }, { "epoch": 1.6, "learning_rate": 2.337101750967018e-05, "loss": 0.6464, "step": 144295 }, { "epoch": 1.6, "learning_rate": 2.3370094782531668e-05, "loss": 0.6248, "step": 144300 }, { "epoch": 1.6, "learning_rate": 2.3369172055393156e-05, "loss": 0.6315, "step": 144305 }, { "epoch": 1.6, "learning_rate": 2.3368249328254644e-05, "loss": 0.6431, "step": 144310 }, { "epoch": 1.6, "learning_rate": 2.3367326601116132e-05, "loss": 0.6765, "step": 144315 }, { "epoch": 1.6, "learning_rate": 2.336640387397762e-05, "loss": 0.6557, "step": 144320 }, { "epoch": 1.6, "learning_rate": 2.3365481146839107e-05, "loss": 0.6594, "step": 144325 }, { "epoch": 1.6, "learning_rate": 2.3364558419700595e-05, "loss": 0.6474, "step": 144330 }, { "epoch": 1.6, "learning_rate": 2.3363635692562083e-05, "loss": 0.6278, "step": 144335 }, { "epoch": 1.6, "learning_rate": 2.336271296542357e-05, "loss": 0.6433, "step": 144340 }, { "epoch": 1.6, "learning_rate": 2.336179023828506e-05, "loss": 0.6561, "step": 144345 }, { "epoch": 1.6, "learning_rate": 2.3360867511146543e-05, "loss": 0.6401, "step": 144350 }, { "epoch": 1.6, "learning_rate": 2.3359944784008035e-05, "loss": 0.6856, "step": 144355 }, { "epoch": 1.6, "learning_rate": 2.335902205686952e-05, "loss": 0.6558, "step": 144360 }, { "epoch": 1.6, "learning_rate": 2.3358099329731007e-05, "loss": 0.6274, "step": 144365 }, { "epoch": 1.6, "learning_rate": 2.3357176602592495e-05, "loss": 0.6215, "step": 144370 }, { "epoch": 1.6, "learning_rate": 2.3356253875453983e-05, "loss": 0.6375, "step": 144375 }, { "epoch": 1.6, "learning_rate": 2.335533114831547e-05, "loss": 0.5963, "step": 144380 }, { "epoch": 1.6, "learning_rate": 2.3354408421176958e-05, "loss": 0.6558, "step": 144385 }, { "epoch": 1.6, "learning_rate": 2.3353485694038446e-05, "loss": 0.5817, "step": 144390 }, { "epoch": 1.6, "learning_rate": 2.3352562966899934e-05, "loss": 0.6578, "step": 144395 }, { "epoch": 1.6, "learning_rate": 2.3351640239761422e-05, "loss": 0.6005, "step": 144400 }, { "epoch": 1.6, "learning_rate": 2.3350717512622906e-05, "loss": 0.6845, "step": 144405 }, { "epoch": 1.6, "learning_rate": 2.3349794785484397e-05, "loss": 0.6334, "step": 144410 }, { "epoch": 1.6, "learning_rate": 2.3348872058345882e-05, "loss": 0.6125, "step": 144415 }, { "epoch": 1.6, "learning_rate": 2.3347949331207373e-05, "loss": 0.6094, "step": 144420 }, { "epoch": 1.6, "learning_rate": 2.3347026604068858e-05, "loss": 0.5985, "step": 144425 }, { "epoch": 1.6, "learning_rate": 2.334610387693035e-05, "loss": 0.6296, "step": 144430 }, { "epoch": 1.6, "learning_rate": 2.3345181149791833e-05, "loss": 0.6542, "step": 144435 }, { "epoch": 1.6, "learning_rate": 2.334425842265332e-05, "loss": 0.6158, "step": 144440 }, { "epoch": 1.6, "learning_rate": 2.334333569551481e-05, "loss": 0.6514, "step": 144445 }, { "epoch": 1.6, "learning_rate": 2.3342412968376297e-05, "loss": 0.624, "step": 144450 }, { "epoch": 1.6, "learning_rate": 2.3341490241237785e-05, "loss": 0.6555, "step": 144455 }, { "epoch": 1.6, "learning_rate": 2.334056751409927e-05, "loss": 0.6422, "step": 144460 }, { "epoch": 1.6, "learning_rate": 2.333964478696076e-05, "loss": 0.6267, "step": 144465 }, { "epoch": 1.6, "learning_rate": 2.3338722059822245e-05, "loss": 0.6219, "step": 144470 }, { "epoch": 1.6, "learning_rate": 2.3337799332683736e-05, "loss": 0.6108, "step": 144475 }, { "epoch": 1.6, "learning_rate": 2.333687660554522e-05, "loss": 0.6307, "step": 144480 }, { "epoch": 1.6, "learning_rate": 2.3335953878406712e-05, "loss": 0.6315, "step": 144485 }, { "epoch": 1.6, "learning_rate": 2.3335031151268196e-05, "loss": 0.6265, "step": 144490 }, { "epoch": 1.6, "learning_rate": 2.3334108424129688e-05, "loss": 0.6004, "step": 144495 }, { "epoch": 1.6, "learning_rate": 2.3333185696991172e-05, "loss": 0.6221, "step": 144500 }, { "epoch": 1.6, "learning_rate": 2.333226296985266e-05, "loss": 0.6177, "step": 144505 }, { "epoch": 1.6, "learning_rate": 2.3331340242714148e-05, "loss": 0.6454, "step": 144510 }, { "epoch": 1.6, "learning_rate": 2.3330417515575636e-05, "loss": 0.6453, "step": 144515 }, { "epoch": 1.6, "learning_rate": 2.3329494788437123e-05, "loss": 0.5818, "step": 144520 }, { "epoch": 1.6, "learning_rate": 2.3328572061298608e-05, "loss": 0.6043, "step": 144525 }, { "epoch": 1.6, "learning_rate": 2.33276493341601e-05, "loss": 0.5983, "step": 144530 }, { "epoch": 1.6, "learning_rate": 2.3326726607021584e-05, "loss": 0.6542, "step": 144535 }, { "epoch": 1.6, "learning_rate": 2.3325803879883075e-05, "loss": 0.5815, "step": 144540 }, { "epoch": 1.6, "learning_rate": 2.332488115274456e-05, "loss": 0.6934, "step": 144545 }, { "epoch": 1.6, "learning_rate": 2.332395842560605e-05, "loss": 0.6463, "step": 144550 }, { "epoch": 1.6, "learning_rate": 2.3323035698467535e-05, "loss": 0.598, "step": 144555 }, { "epoch": 1.6, "learning_rate": 2.3322112971329023e-05, "loss": 0.607, "step": 144560 }, { "epoch": 1.6, "learning_rate": 2.332119024419051e-05, "loss": 0.6104, "step": 144565 }, { "epoch": 1.6, "learning_rate": 2.3320267517052e-05, "loss": 0.6723, "step": 144570 }, { "epoch": 1.6, "learning_rate": 2.3319344789913486e-05, "loss": 0.6227, "step": 144575 }, { "epoch": 1.6, "learning_rate": 2.3318422062774974e-05, "loss": 0.6326, "step": 144580 }, { "epoch": 1.6, "learning_rate": 2.3317499335636462e-05, "loss": 0.6331, "step": 144585 }, { "epoch": 1.6, "learning_rate": 2.331657660849795e-05, "loss": 0.6697, "step": 144590 }, { "epoch": 1.6, "learning_rate": 2.3315653881359438e-05, "loss": 0.7041, "step": 144595 }, { "epoch": 1.6, "learning_rate": 2.3314731154220922e-05, "loss": 0.672, "step": 144600 }, { "epoch": 1.6, "learning_rate": 2.3313808427082413e-05, "loss": 0.6699, "step": 144605 }, { "epoch": 1.6, "learning_rate": 2.3312885699943898e-05, "loss": 0.5516, "step": 144610 }, { "epoch": 1.6, "learning_rate": 2.3311962972805386e-05, "loss": 0.6255, "step": 144615 }, { "epoch": 1.6, "learning_rate": 2.3311040245666874e-05, "loss": 0.6416, "step": 144620 }, { "epoch": 1.6, "learning_rate": 2.331011751852836e-05, "loss": 0.6088, "step": 144625 }, { "epoch": 1.6, "learning_rate": 2.330919479138985e-05, "loss": 0.6373, "step": 144630 }, { "epoch": 1.6, "learning_rate": 2.3308272064251337e-05, "loss": 0.5829, "step": 144635 }, { "epoch": 1.6, "learning_rate": 2.3307349337112825e-05, "loss": 0.5934, "step": 144640 }, { "epoch": 1.6, "learning_rate": 2.3306426609974313e-05, "loss": 0.6509, "step": 144645 }, { "epoch": 1.6, "learning_rate": 2.33055038828358e-05, "loss": 0.6723, "step": 144650 }, { "epoch": 1.6, "learning_rate": 2.330458115569729e-05, "loss": 0.6368, "step": 144655 }, { "epoch": 1.6, "learning_rate": 2.3303658428558776e-05, "loss": 0.6373, "step": 144660 }, { "epoch": 1.6, "learning_rate": 2.3302735701420264e-05, "loss": 0.5934, "step": 144665 }, { "epoch": 1.6, "learning_rate": 2.3301812974281752e-05, "loss": 0.6223, "step": 144670 }, { "epoch": 1.6, "learning_rate": 2.3300890247143237e-05, "loss": 0.6458, "step": 144675 }, { "epoch": 1.6, "learning_rate": 2.3299967520004724e-05, "loss": 0.5968, "step": 144680 }, { "epoch": 1.6, "learning_rate": 2.3299044792866212e-05, "loss": 0.6077, "step": 144685 }, { "epoch": 1.6, "learning_rate": 2.32981220657277e-05, "loss": 0.6569, "step": 144690 }, { "epoch": 1.6, "learning_rate": 2.3297199338589188e-05, "loss": 0.6471, "step": 144695 }, { "epoch": 1.6, "learning_rate": 2.3296276611450676e-05, "loss": 0.6375, "step": 144700 }, { "epoch": 1.6, "learning_rate": 2.3295353884312164e-05, "loss": 0.5833, "step": 144705 }, { "epoch": 1.6, "learning_rate": 2.329443115717365e-05, "loss": 0.643, "step": 144710 }, { "epoch": 1.6, "learning_rate": 2.329350843003514e-05, "loss": 0.5738, "step": 144715 }, { "epoch": 1.6, "learning_rate": 2.3292585702896627e-05, "loss": 0.6003, "step": 144720 }, { "epoch": 1.6, "learning_rate": 2.3291662975758115e-05, "loss": 0.5998, "step": 144725 }, { "epoch": 1.6, "learning_rate": 2.3290740248619603e-05, "loss": 0.6226, "step": 144730 }, { "epoch": 1.6, "learning_rate": 2.3289817521481087e-05, "loss": 0.6309, "step": 144735 }, { "epoch": 1.6, "learning_rate": 2.328889479434258e-05, "loss": 0.5831, "step": 144740 }, { "epoch": 1.6, "learning_rate": 2.3287972067204063e-05, "loss": 0.6495, "step": 144745 }, { "epoch": 1.6, "learning_rate": 2.328704934006555e-05, "loss": 0.6495, "step": 144750 }, { "epoch": 1.6, "learning_rate": 2.328612661292704e-05, "loss": 0.5874, "step": 144755 }, { "epoch": 1.6, "learning_rate": 2.3285203885788527e-05, "loss": 0.6112, "step": 144760 }, { "epoch": 1.6, "learning_rate": 2.3284281158650014e-05, "loss": 0.6091, "step": 144765 }, { "epoch": 1.6, "learning_rate": 2.3283358431511502e-05, "loss": 0.5924, "step": 144770 }, { "epoch": 1.6, "learning_rate": 2.328243570437299e-05, "loss": 0.6636, "step": 144775 }, { "epoch": 1.6, "learning_rate": 2.3281512977234478e-05, "loss": 0.6271, "step": 144780 }, { "epoch": 1.6, "learning_rate": 2.3280590250095966e-05, "loss": 0.628, "step": 144785 }, { "epoch": 1.6, "learning_rate": 2.327966752295745e-05, "loss": 0.6631, "step": 144790 }, { "epoch": 1.6, "learning_rate": 2.327874479581894e-05, "loss": 0.6983, "step": 144795 }, { "epoch": 1.6, "learning_rate": 2.3277822068680426e-05, "loss": 0.6675, "step": 144800 }, { "epoch": 1.6, "learning_rate": 2.3276899341541917e-05, "loss": 0.6947, "step": 144805 }, { "epoch": 1.6, "learning_rate": 2.32759766144034e-05, "loss": 0.6595, "step": 144810 }, { "epoch": 1.6, "learning_rate": 2.3275053887264893e-05, "loss": 0.6935, "step": 144815 }, { "epoch": 1.6, "learning_rate": 2.3274131160126377e-05, "loss": 0.6387, "step": 144820 }, { "epoch": 1.6, "learning_rate": 2.3273208432987865e-05, "loss": 0.6517, "step": 144825 }, { "epoch": 1.6, "learning_rate": 2.3272285705849353e-05, "loss": 0.6489, "step": 144830 }, { "epoch": 1.6, "learning_rate": 2.327136297871084e-05, "loss": 0.6235, "step": 144835 }, { "epoch": 1.6, "learning_rate": 2.327044025157233e-05, "loss": 0.6347, "step": 144840 }, { "epoch": 1.6, "learning_rate": 2.3269517524433813e-05, "loss": 0.586, "step": 144845 }, { "epoch": 1.6, "learning_rate": 2.3268594797295304e-05, "loss": 0.6115, "step": 144850 }, { "epoch": 1.6, "learning_rate": 2.326767207015679e-05, "loss": 0.5963, "step": 144855 }, { "epoch": 1.6, "learning_rate": 2.326674934301828e-05, "loss": 0.6052, "step": 144860 }, { "epoch": 1.6, "learning_rate": 2.3265826615879765e-05, "loss": 0.6369, "step": 144865 }, { "epoch": 1.6, "learning_rate": 2.3264903888741256e-05, "loss": 0.6538, "step": 144870 }, { "epoch": 1.6, "learning_rate": 2.326398116160274e-05, "loss": 0.6146, "step": 144875 }, { "epoch": 1.6, "learning_rate": 2.326305843446423e-05, "loss": 0.6741, "step": 144880 }, { "epoch": 1.6, "learning_rate": 2.3262135707325716e-05, "loss": 0.6327, "step": 144885 }, { "epoch": 1.6, "learning_rate": 2.3261212980187204e-05, "loss": 0.6004, "step": 144890 }, { "epoch": 1.6, "learning_rate": 2.326029025304869e-05, "loss": 0.6476, "step": 144895 }, { "epoch": 1.6, "learning_rate": 2.325936752591018e-05, "loss": 0.6563, "step": 144900 }, { "epoch": 1.6, "learning_rate": 2.3258444798771667e-05, "loss": 0.6151, "step": 144905 }, { "epoch": 1.6, "learning_rate": 2.3257522071633152e-05, "loss": 0.6323, "step": 144910 }, { "epoch": 1.6, "learning_rate": 2.3256599344494643e-05, "loss": 0.6057, "step": 144915 }, { "epoch": 1.6, "learning_rate": 2.3255676617356128e-05, "loss": 0.6112, "step": 144920 }, { "epoch": 1.6, "learning_rate": 2.325475389021762e-05, "loss": 0.6276, "step": 144925 }, { "epoch": 1.6, "learning_rate": 2.3253831163079103e-05, "loss": 0.6724, "step": 144930 }, { "epoch": 1.6, "learning_rate": 2.3252908435940594e-05, "loss": 0.5914, "step": 144935 }, { "epoch": 1.6, "learning_rate": 2.325198570880208e-05, "loss": 0.6465, "step": 144940 }, { "epoch": 1.6, "learning_rate": 2.3251062981663567e-05, "loss": 0.6356, "step": 144945 }, { "epoch": 1.6, "learning_rate": 2.3250140254525055e-05, "loss": 0.6238, "step": 144950 }, { "epoch": 1.61, "learning_rate": 2.3249217527386542e-05, "loss": 0.6402, "step": 144955 }, { "epoch": 1.61, "learning_rate": 2.324829480024803e-05, "loss": 0.6148, "step": 144960 }, { "epoch": 1.61, "learning_rate": 2.3247372073109518e-05, "loss": 0.6335, "step": 144965 }, { "epoch": 1.61, "learning_rate": 2.3246449345971006e-05, "loss": 0.6284, "step": 144970 }, { "epoch": 1.61, "learning_rate": 2.324552661883249e-05, "loss": 0.6125, "step": 144975 }, { "epoch": 1.61, "learning_rate": 2.3244603891693982e-05, "loss": 0.6524, "step": 144980 }, { "epoch": 1.61, "learning_rate": 2.3243681164555466e-05, "loss": 0.6331, "step": 144985 }, { "epoch": 1.61, "learning_rate": 2.3242758437416957e-05, "loss": 0.6226, "step": 144990 }, { "epoch": 1.61, "learning_rate": 2.3241835710278442e-05, "loss": 0.6468, "step": 144995 }, { "epoch": 1.61, "learning_rate": 2.324091298313993e-05, "loss": 0.6298, "step": 145000 }, { "epoch": 1.61, "eval_loss": 0.5997066497802734, "eval_runtime": 69.1091, "eval_samples_per_second": 28.94, "eval_steps_per_second": 14.47, "step": 145000 }, { "epoch": 1.61, "learning_rate": 2.3239990256001418e-05, "loss": 0.5808, "step": 145005 }, { "epoch": 1.61, "learning_rate": 2.3239067528862905e-05, "loss": 0.6238, "step": 145010 }, { "epoch": 1.61, "learning_rate": 2.3238144801724393e-05, "loss": 0.5738, "step": 145015 }, { "epoch": 1.61, "learning_rate": 2.323722207458588e-05, "loss": 0.6878, "step": 145020 }, { "epoch": 1.61, "learning_rate": 2.323629934744737e-05, "loss": 0.6753, "step": 145025 }, { "epoch": 1.61, "learning_rate": 2.3235376620308857e-05, "loss": 0.5871, "step": 145030 }, { "epoch": 1.61, "learning_rate": 2.3234453893170345e-05, "loss": 0.6179, "step": 145035 }, { "epoch": 1.61, "learning_rate": 2.3233531166031833e-05, "loss": 0.6973, "step": 145040 }, { "epoch": 1.61, "learning_rate": 2.323260843889332e-05, "loss": 0.6544, "step": 145045 }, { "epoch": 1.61, "learning_rate": 2.3231685711754805e-05, "loss": 0.638, "step": 145050 }, { "epoch": 1.61, "learning_rate": 2.3230762984616296e-05, "loss": 0.5924, "step": 145055 }, { "epoch": 1.61, "learning_rate": 2.322984025747778e-05, "loss": 0.6411, "step": 145060 }, { "epoch": 1.61, "learning_rate": 2.322891753033927e-05, "loss": 0.642, "step": 145065 }, { "epoch": 1.61, "learning_rate": 2.3227994803200756e-05, "loss": 0.6452, "step": 145070 }, { "epoch": 1.61, "learning_rate": 2.3227072076062244e-05, "loss": 0.6921, "step": 145075 }, { "epoch": 1.61, "learning_rate": 2.3226149348923732e-05, "loss": 0.6101, "step": 145080 }, { "epoch": 1.61, "learning_rate": 2.322522662178522e-05, "loss": 0.6553, "step": 145085 }, { "epoch": 1.61, "learning_rate": 2.3224303894646708e-05, "loss": 0.6407, "step": 145090 }, { "epoch": 1.61, "learning_rate": 2.3223381167508195e-05, "loss": 0.6546, "step": 145095 }, { "epoch": 1.61, "learning_rate": 2.3222458440369683e-05, "loss": 0.6429, "step": 145100 }, { "epoch": 1.61, "learning_rate": 2.322153571323117e-05, "loss": 0.6128, "step": 145105 }, { "epoch": 1.61, "learning_rate": 2.322061298609266e-05, "loss": 0.64, "step": 145110 }, { "epoch": 1.61, "learning_rate": 2.3219690258954147e-05, "loss": 0.6448, "step": 145115 }, { "epoch": 1.61, "learning_rate": 2.321876753181563e-05, "loss": 0.6708, "step": 145120 }, { "epoch": 1.61, "learning_rate": 2.321784480467712e-05, "loss": 0.6271, "step": 145125 }, { "epoch": 1.61, "learning_rate": 2.3216922077538607e-05, "loss": 0.5972, "step": 145130 }, { "epoch": 1.61, "learning_rate": 2.3215999350400095e-05, "loss": 0.6325, "step": 145135 }, { "epoch": 1.61, "learning_rate": 2.3215076623261583e-05, "loss": 0.6535, "step": 145140 }, { "epoch": 1.61, "learning_rate": 2.321415389612307e-05, "loss": 0.5827, "step": 145145 }, { "epoch": 1.61, "learning_rate": 2.321323116898456e-05, "loss": 0.6928, "step": 145150 }, { "epoch": 1.61, "learning_rate": 2.3212308441846046e-05, "loss": 0.6267, "step": 145155 }, { "epoch": 1.61, "learning_rate": 2.3211385714707534e-05, "loss": 0.6187, "step": 145160 }, { "epoch": 1.61, "learning_rate": 2.3210462987569022e-05, "loss": 0.5689, "step": 145165 }, { "epoch": 1.61, "learning_rate": 2.320954026043051e-05, "loss": 0.6295, "step": 145170 }, { "epoch": 1.61, "learning_rate": 2.3208617533291994e-05, "loss": 0.6435, "step": 145175 }, { "epoch": 1.61, "learning_rate": 2.3207694806153486e-05, "loss": 0.6359, "step": 145180 }, { "epoch": 1.61, "learning_rate": 2.320677207901497e-05, "loss": 0.6205, "step": 145185 }, { "epoch": 1.61, "learning_rate": 2.320584935187646e-05, "loss": 0.595, "step": 145190 }, { "epoch": 1.61, "learning_rate": 2.3204926624737946e-05, "loss": 0.5573, "step": 145195 }, { "epoch": 1.61, "learning_rate": 2.3204003897599434e-05, "loss": 0.6578, "step": 145200 }, { "epoch": 1.61, "learning_rate": 2.320308117046092e-05, "loss": 0.6457, "step": 145205 }, { "epoch": 1.61, "learning_rate": 2.320215844332241e-05, "loss": 0.5969, "step": 145210 }, { "epoch": 1.61, "learning_rate": 2.3201235716183897e-05, "loss": 0.7112, "step": 145215 }, { "epoch": 1.61, "learning_rate": 2.3200312989045385e-05, "loss": 0.6303, "step": 145220 }, { "epoch": 1.61, "learning_rate": 2.3199390261906873e-05, "loss": 0.5593, "step": 145225 }, { "epoch": 1.61, "learning_rate": 2.3198467534768357e-05, "loss": 0.6762, "step": 145230 }, { "epoch": 1.61, "learning_rate": 2.319754480762985e-05, "loss": 0.6152, "step": 145235 }, { "epoch": 1.61, "learning_rate": 2.3196622080491333e-05, "loss": 0.6474, "step": 145240 }, { "epoch": 1.61, "learning_rate": 2.3195699353352824e-05, "loss": 0.6457, "step": 145245 }, { "epoch": 1.61, "learning_rate": 2.319477662621431e-05, "loss": 0.6668, "step": 145250 }, { "epoch": 1.61, "learning_rate": 2.31938538990758e-05, "loss": 0.6696, "step": 145255 }, { "epoch": 1.61, "learning_rate": 2.3192931171937284e-05, "loss": 0.6594, "step": 145260 }, { "epoch": 1.61, "learning_rate": 2.3192008444798776e-05, "loss": 0.6106, "step": 145265 }, { "epoch": 1.61, "learning_rate": 2.319108571766026e-05, "loss": 0.5943, "step": 145270 }, { "epoch": 1.61, "learning_rate": 2.3190162990521748e-05, "loss": 0.5917, "step": 145275 }, { "epoch": 1.61, "learning_rate": 2.3189240263383236e-05, "loss": 0.646, "step": 145280 }, { "epoch": 1.61, "learning_rate": 2.3188317536244724e-05, "loss": 0.5939, "step": 145285 }, { "epoch": 1.61, "learning_rate": 2.318739480910621e-05, "loss": 0.5922, "step": 145290 }, { "epoch": 1.61, "learning_rate": 2.3186472081967696e-05, "loss": 0.6626, "step": 145295 }, { "epoch": 1.61, "learning_rate": 2.3185549354829187e-05, "loss": 0.6441, "step": 145300 }, { "epoch": 1.61, "learning_rate": 2.318462662769067e-05, "loss": 0.5734, "step": 145305 }, { "epoch": 1.61, "learning_rate": 2.3183703900552163e-05, "loss": 0.6402, "step": 145310 }, { "epoch": 1.61, "learning_rate": 2.3182781173413647e-05, "loss": 0.62, "step": 145315 }, { "epoch": 1.61, "learning_rate": 2.318185844627514e-05, "loss": 0.6699, "step": 145320 }, { "epoch": 1.61, "learning_rate": 2.3180935719136623e-05, "loss": 0.6047, "step": 145325 }, { "epoch": 1.61, "learning_rate": 2.318001299199811e-05, "loss": 0.6396, "step": 145330 }, { "epoch": 1.61, "learning_rate": 2.31790902648596e-05, "loss": 0.5658, "step": 145335 }, { "epoch": 1.61, "learning_rate": 2.3178167537721086e-05, "loss": 0.6378, "step": 145340 }, { "epoch": 1.61, "learning_rate": 2.3177244810582574e-05, "loss": 0.6613, "step": 145345 }, { "epoch": 1.61, "learning_rate": 2.3176322083444062e-05, "loss": 0.653, "step": 145350 }, { "epoch": 1.61, "learning_rate": 2.317539935630555e-05, "loss": 0.6112, "step": 145355 }, { "epoch": 1.61, "learning_rate": 2.3174476629167035e-05, "loss": 0.5379, "step": 145360 }, { "epoch": 1.61, "learning_rate": 2.3173553902028526e-05, "loss": 0.6657, "step": 145365 }, { "epoch": 1.61, "learning_rate": 2.317263117489001e-05, "loss": 0.6324, "step": 145370 }, { "epoch": 1.61, "learning_rate": 2.31717084477515e-05, "loss": 0.6809, "step": 145375 }, { "epoch": 1.61, "learning_rate": 2.3170785720612986e-05, "loss": 0.6486, "step": 145380 }, { "epoch": 1.61, "learning_rate": 2.3169862993474474e-05, "loss": 0.604, "step": 145385 }, { "epoch": 1.61, "learning_rate": 2.316894026633596e-05, "loss": 0.695, "step": 145390 }, { "epoch": 1.61, "learning_rate": 2.316801753919745e-05, "loss": 0.6499, "step": 145395 }, { "epoch": 1.61, "learning_rate": 2.3167094812058937e-05, "loss": 0.6299, "step": 145400 }, { "epoch": 1.61, "learning_rate": 2.3166172084920425e-05, "loss": 0.6708, "step": 145405 }, { "epoch": 1.61, "learning_rate": 2.3165249357781913e-05, "loss": 0.6612, "step": 145410 }, { "epoch": 1.61, "learning_rate": 2.31643266306434e-05, "loss": 0.6314, "step": 145415 }, { "epoch": 1.61, "learning_rate": 2.316340390350489e-05, "loss": 0.6229, "step": 145420 }, { "epoch": 1.61, "learning_rate": 2.3162481176366377e-05, "loss": 0.6648, "step": 145425 }, { "epoch": 1.61, "learning_rate": 2.3161558449227864e-05, "loss": 0.6239, "step": 145430 }, { "epoch": 1.61, "learning_rate": 2.316063572208935e-05, "loss": 0.6224, "step": 145435 }, { "epoch": 1.61, "learning_rate": 2.315971299495084e-05, "loss": 0.6149, "step": 145440 }, { "epoch": 1.61, "learning_rate": 2.3158790267812325e-05, "loss": 0.6549, "step": 145445 }, { "epoch": 1.61, "learning_rate": 2.3157867540673812e-05, "loss": 0.6754, "step": 145450 }, { "epoch": 1.61, "learning_rate": 2.31569448135353e-05, "loss": 0.6615, "step": 145455 }, { "epoch": 1.61, "learning_rate": 2.3156022086396788e-05, "loss": 0.6114, "step": 145460 }, { "epoch": 1.61, "learning_rate": 2.3155099359258276e-05, "loss": 0.6216, "step": 145465 }, { "epoch": 1.61, "learning_rate": 2.3154176632119764e-05, "loss": 0.6554, "step": 145470 }, { "epoch": 1.61, "learning_rate": 2.315325390498125e-05, "loss": 0.6901, "step": 145475 }, { "epoch": 1.61, "learning_rate": 2.315233117784274e-05, "loss": 0.6746, "step": 145480 }, { "epoch": 1.61, "learning_rate": 2.3151408450704227e-05, "loss": 0.6718, "step": 145485 }, { "epoch": 1.61, "learning_rate": 2.3150485723565715e-05, "loss": 0.6028, "step": 145490 }, { "epoch": 1.61, "learning_rate": 2.3149562996427203e-05, "loss": 0.5975, "step": 145495 }, { "epoch": 1.61, "learning_rate": 2.314864026928869e-05, "loss": 0.5939, "step": 145500 }, { "epoch": 1.61, "learning_rate": 2.3147717542150175e-05, "loss": 0.6501, "step": 145505 }, { "epoch": 1.61, "learning_rate": 2.3146794815011663e-05, "loss": 0.6563, "step": 145510 }, { "epoch": 1.61, "learning_rate": 2.314587208787315e-05, "loss": 0.6417, "step": 145515 }, { "epoch": 1.61, "learning_rate": 2.314494936073464e-05, "loss": 0.6199, "step": 145520 }, { "epoch": 1.61, "learning_rate": 2.3144026633596127e-05, "loss": 0.669, "step": 145525 }, { "epoch": 1.61, "learning_rate": 2.3143103906457615e-05, "loss": 0.6653, "step": 145530 }, { "epoch": 1.61, "learning_rate": 2.3142181179319102e-05, "loss": 0.6022, "step": 145535 }, { "epoch": 1.61, "learning_rate": 2.314125845218059e-05, "loss": 0.6955, "step": 145540 }, { "epoch": 1.61, "learning_rate": 2.3140335725042078e-05, "loss": 0.6655, "step": 145545 }, { "epoch": 1.61, "learning_rate": 2.3139412997903566e-05, "loss": 0.6459, "step": 145550 }, { "epoch": 1.61, "learning_rate": 2.3138490270765054e-05, "loss": 0.6111, "step": 145555 }, { "epoch": 1.61, "learning_rate": 2.3137567543626538e-05, "loss": 0.6371, "step": 145560 }, { "epoch": 1.61, "learning_rate": 2.313664481648803e-05, "loss": 0.5821, "step": 145565 }, { "epoch": 1.61, "learning_rate": 2.3135722089349514e-05, "loss": 0.6365, "step": 145570 }, { "epoch": 1.61, "learning_rate": 2.3134799362211005e-05, "loss": 0.6042, "step": 145575 }, { "epoch": 1.61, "learning_rate": 2.313387663507249e-05, "loss": 0.6275, "step": 145580 }, { "epoch": 1.61, "learning_rate": 2.3132953907933978e-05, "loss": 0.6092, "step": 145585 }, { "epoch": 1.61, "learning_rate": 2.3132031180795465e-05, "loss": 0.5614, "step": 145590 }, { "epoch": 1.61, "learning_rate": 2.3131108453656953e-05, "loss": 0.628, "step": 145595 }, { "epoch": 1.61, "learning_rate": 2.313018572651844e-05, "loss": 0.6637, "step": 145600 }, { "epoch": 1.61, "learning_rate": 2.312926299937993e-05, "loss": 0.6155, "step": 145605 }, { "epoch": 1.61, "learning_rate": 2.3128340272241417e-05, "loss": 0.6082, "step": 145610 }, { "epoch": 1.61, "learning_rate": 2.31274175451029e-05, "loss": 0.6206, "step": 145615 }, { "epoch": 1.61, "learning_rate": 2.3126494817964392e-05, "loss": 0.6387, "step": 145620 }, { "epoch": 1.61, "learning_rate": 2.3125572090825877e-05, "loss": 0.6682, "step": 145625 }, { "epoch": 1.61, "learning_rate": 2.3124649363687368e-05, "loss": 0.6277, "step": 145630 }, { "epoch": 1.61, "learning_rate": 2.3123726636548853e-05, "loss": 0.6234, "step": 145635 }, { "epoch": 1.61, "learning_rate": 2.3122803909410344e-05, "loss": 0.5764, "step": 145640 }, { "epoch": 1.61, "learning_rate": 2.312188118227183e-05, "loss": 0.6556, "step": 145645 }, { "epoch": 1.61, "learning_rate": 2.312095845513332e-05, "loss": 0.6573, "step": 145650 }, { "epoch": 1.61, "learning_rate": 2.3120035727994804e-05, "loss": 0.6788, "step": 145655 }, { "epoch": 1.61, "learning_rate": 2.3119113000856292e-05, "loss": 0.6512, "step": 145660 }, { "epoch": 1.61, "learning_rate": 2.311819027371778e-05, "loss": 0.6136, "step": 145665 }, { "epoch": 1.61, "learning_rate": 2.3117267546579268e-05, "loss": 0.6215, "step": 145670 }, { "epoch": 1.61, "learning_rate": 2.3116344819440755e-05, "loss": 0.662, "step": 145675 }, { "epoch": 1.61, "learning_rate": 2.311542209230224e-05, "loss": 0.5953, "step": 145680 }, { "epoch": 1.61, "learning_rate": 2.311449936516373e-05, "loss": 0.588, "step": 145685 }, { "epoch": 1.61, "learning_rate": 2.3113576638025216e-05, "loss": 0.6501, "step": 145690 }, { "epoch": 1.61, "learning_rate": 2.3112653910886707e-05, "loss": 0.6172, "step": 145695 }, { "epoch": 1.61, "learning_rate": 2.311173118374819e-05, "loss": 0.6201, "step": 145700 }, { "epoch": 1.61, "learning_rate": 2.3110808456609683e-05, "loss": 0.6527, "step": 145705 }, { "epoch": 1.61, "learning_rate": 2.3109885729471167e-05, "loss": 0.6337, "step": 145710 }, { "epoch": 1.61, "learning_rate": 2.3108963002332655e-05, "loss": 0.6612, "step": 145715 }, { "epoch": 1.61, "learning_rate": 2.3108040275194143e-05, "loss": 0.6205, "step": 145720 }, { "epoch": 1.61, "learning_rate": 2.310711754805563e-05, "loss": 0.6165, "step": 145725 }, { "epoch": 1.61, "learning_rate": 2.310619482091712e-05, "loss": 0.6343, "step": 145730 }, { "epoch": 1.61, "learning_rate": 2.3105272093778603e-05, "loss": 0.6968, "step": 145735 }, { "epoch": 1.61, "learning_rate": 2.3104349366640094e-05, "loss": 0.6416, "step": 145740 }, { "epoch": 1.61, "learning_rate": 2.310342663950158e-05, "loss": 0.5699, "step": 145745 }, { "epoch": 1.61, "learning_rate": 2.310250391236307e-05, "loss": 0.6326, "step": 145750 }, { "epoch": 1.61, "learning_rate": 2.3101581185224554e-05, "loss": 0.6062, "step": 145755 }, { "epoch": 1.61, "learning_rate": 2.3100658458086045e-05, "loss": 0.6831, "step": 145760 }, { "epoch": 1.61, "learning_rate": 2.309973573094753e-05, "loss": 0.5863, "step": 145765 }, { "epoch": 1.61, "learning_rate": 2.309881300380902e-05, "loss": 0.6156, "step": 145770 }, { "epoch": 1.61, "learning_rate": 2.3097890276670506e-05, "loss": 0.6349, "step": 145775 }, { "epoch": 1.61, "learning_rate": 2.3096967549531993e-05, "loss": 0.5878, "step": 145780 }, { "epoch": 1.61, "learning_rate": 2.309604482239348e-05, "loss": 0.6873, "step": 145785 }, { "epoch": 1.61, "learning_rate": 2.309512209525497e-05, "loss": 0.7032, "step": 145790 }, { "epoch": 1.61, "learning_rate": 2.3094199368116457e-05, "loss": 0.6526, "step": 145795 }, { "epoch": 1.61, "learning_rate": 2.3093276640977945e-05, "loss": 0.6214, "step": 145800 }, { "epoch": 1.61, "learning_rate": 2.3092353913839433e-05, "loss": 0.6582, "step": 145805 }, { "epoch": 1.61, "learning_rate": 2.3091431186700917e-05, "loss": 0.6776, "step": 145810 }, { "epoch": 1.61, "learning_rate": 2.309050845956241e-05, "loss": 0.5851, "step": 145815 }, { "epoch": 1.61, "learning_rate": 2.3089585732423893e-05, "loss": 0.6932, "step": 145820 }, { "epoch": 1.61, "learning_rate": 2.3088663005285384e-05, "loss": 0.5993, "step": 145825 }, { "epoch": 1.61, "learning_rate": 2.308774027814687e-05, "loss": 0.6709, "step": 145830 }, { "epoch": 1.61, "learning_rate": 2.3086817551008356e-05, "loss": 0.6279, "step": 145835 }, { "epoch": 1.61, "learning_rate": 2.3085894823869844e-05, "loss": 0.6309, "step": 145840 }, { "epoch": 1.61, "learning_rate": 2.3084972096731332e-05, "loss": 0.6212, "step": 145845 }, { "epoch": 1.61, "learning_rate": 2.308404936959282e-05, "loss": 0.6394, "step": 145850 }, { "epoch": 1.62, "learning_rate": 2.3083126642454308e-05, "loss": 0.6935, "step": 145855 }, { "epoch": 1.62, "learning_rate": 2.3082203915315796e-05, "loss": 0.6435, "step": 145860 }, { "epoch": 1.62, "learning_rate": 2.3081281188177284e-05, "loss": 0.6442, "step": 145865 }, { "epoch": 1.62, "learning_rate": 2.308035846103877e-05, "loss": 0.5849, "step": 145870 }, { "epoch": 1.62, "learning_rate": 2.307943573390026e-05, "loss": 0.6058, "step": 145875 }, { "epoch": 1.62, "learning_rate": 2.3078513006761747e-05, "loss": 0.6222, "step": 145880 }, { "epoch": 1.62, "learning_rate": 2.307759027962323e-05, "loss": 0.6301, "step": 145885 }, { "epoch": 1.62, "learning_rate": 2.307666755248472e-05, "loss": 0.6058, "step": 145890 }, { "epoch": 1.62, "learning_rate": 2.3075744825346207e-05, "loss": 0.6839, "step": 145895 }, { "epoch": 1.62, "learning_rate": 2.3074822098207695e-05, "loss": 0.636, "step": 145900 }, { "epoch": 1.62, "learning_rate": 2.3073899371069183e-05, "loss": 0.6239, "step": 145905 }, { "epoch": 1.62, "learning_rate": 2.307297664393067e-05, "loss": 0.6413, "step": 145910 }, { "epoch": 1.62, "learning_rate": 2.307205391679216e-05, "loss": 0.6636, "step": 145915 }, { "epoch": 1.62, "learning_rate": 2.3071131189653646e-05, "loss": 0.6435, "step": 145920 }, { "epoch": 1.62, "learning_rate": 2.3070208462515134e-05, "loss": 0.6186, "step": 145925 }, { "epoch": 1.62, "learning_rate": 2.3069285735376622e-05, "loss": 0.6375, "step": 145930 }, { "epoch": 1.62, "learning_rate": 2.306836300823811e-05, "loss": 0.6657, "step": 145935 }, { "epoch": 1.62, "learning_rate": 2.3067440281099598e-05, "loss": 0.6557, "step": 145940 }, { "epoch": 1.62, "learning_rate": 2.3066517553961082e-05, "loss": 0.6388, "step": 145945 }, { "epoch": 1.62, "learning_rate": 2.3065594826822574e-05, "loss": 0.6408, "step": 145950 }, { "epoch": 1.62, "learning_rate": 2.3064672099684058e-05, "loss": 0.6707, "step": 145955 }, { "epoch": 1.62, "learning_rate": 2.3063749372545546e-05, "loss": 0.6322, "step": 145960 }, { "epoch": 1.62, "learning_rate": 2.3062826645407034e-05, "loss": 0.6921, "step": 145965 }, { "epoch": 1.62, "learning_rate": 2.306190391826852e-05, "loss": 0.6622, "step": 145970 }, { "epoch": 1.62, "learning_rate": 2.306098119113001e-05, "loss": 0.6345, "step": 145975 }, { "epoch": 1.62, "learning_rate": 2.3060058463991497e-05, "loss": 0.6501, "step": 145980 }, { "epoch": 1.62, "learning_rate": 2.3059135736852985e-05, "loss": 0.5912, "step": 145985 }, { "epoch": 1.62, "learning_rate": 2.3058213009714473e-05, "loss": 0.5783, "step": 145990 }, { "epoch": 1.62, "learning_rate": 2.305729028257596e-05, "loss": 0.6024, "step": 145995 }, { "epoch": 1.62, "learning_rate": 2.305636755543745e-05, "loss": 0.5623, "step": 146000 }, { "epoch": 1.62, "eval_loss": 0.5974382162094116, "eval_runtime": 69.1984, "eval_samples_per_second": 28.902, "eval_steps_per_second": 14.451, "step": 146000 }, { "epoch": 1.62, "learning_rate": 2.3055444828298936e-05, "loss": 0.6147, "step": 146005 }, { "epoch": 1.62, "learning_rate": 2.305452210116042e-05, "loss": 0.6706, "step": 146010 }, { "epoch": 1.62, "learning_rate": 2.3053599374021912e-05, "loss": 0.6014, "step": 146015 }, { "epoch": 1.62, "learning_rate": 2.3052676646883397e-05, "loss": 0.6057, "step": 146020 }, { "epoch": 1.62, "learning_rate": 2.3051753919744888e-05, "loss": 0.7136, "step": 146025 }, { "epoch": 1.62, "learning_rate": 2.3050831192606372e-05, "loss": 0.6274, "step": 146030 }, { "epoch": 1.62, "learning_rate": 2.304990846546786e-05, "loss": 0.5778, "step": 146035 }, { "epoch": 1.62, "learning_rate": 2.3048985738329348e-05, "loss": 0.6304, "step": 146040 }, { "epoch": 1.62, "learning_rate": 2.3048063011190836e-05, "loss": 0.6079, "step": 146045 }, { "epoch": 1.62, "learning_rate": 2.3047140284052324e-05, "loss": 0.6699, "step": 146050 }, { "epoch": 1.62, "learning_rate": 2.304621755691381e-05, "loss": 0.5974, "step": 146055 }, { "epoch": 1.62, "learning_rate": 2.30452948297753e-05, "loss": 0.6616, "step": 146060 }, { "epoch": 1.62, "learning_rate": 2.3044372102636784e-05, "loss": 0.6388, "step": 146065 }, { "epoch": 1.62, "learning_rate": 2.3043449375498275e-05, "loss": 0.6484, "step": 146070 }, { "epoch": 1.62, "learning_rate": 2.304252664835976e-05, "loss": 0.6327, "step": 146075 }, { "epoch": 1.62, "learning_rate": 2.304160392122125e-05, "loss": 0.6037, "step": 146080 }, { "epoch": 1.62, "learning_rate": 2.3040681194082735e-05, "loss": 0.6713, "step": 146085 }, { "epoch": 1.62, "learning_rate": 2.3039758466944227e-05, "loss": 0.6505, "step": 146090 }, { "epoch": 1.62, "learning_rate": 2.303883573980571e-05, "loss": 0.6469, "step": 146095 }, { "epoch": 1.62, "learning_rate": 2.30379130126672e-05, "loss": 0.6658, "step": 146100 }, { "epoch": 1.62, "learning_rate": 2.3036990285528687e-05, "loss": 0.6158, "step": 146105 }, { "epoch": 1.62, "learning_rate": 2.3036067558390175e-05, "loss": 0.636, "step": 146110 }, { "epoch": 1.62, "learning_rate": 2.3035144831251662e-05, "loss": 0.6328, "step": 146115 }, { "epoch": 1.62, "learning_rate": 2.3034222104113147e-05, "loss": 0.6138, "step": 146120 }, { "epoch": 1.62, "learning_rate": 2.3033299376974638e-05, "loss": 0.5813, "step": 146125 }, { "epoch": 1.62, "learning_rate": 2.3032376649836123e-05, "loss": 0.696, "step": 146130 }, { "epoch": 1.62, "learning_rate": 2.3031453922697614e-05, "loss": 0.5927, "step": 146135 }, { "epoch": 1.62, "learning_rate": 2.3030531195559098e-05, "loss": 0.6292, "step": 146140 }, { "epoch": 1.62, "learning_rate": 2.302960846842059e-05, "loss": 0.598, "step": 146145 }, { "epoch": 1.62, "learning_rate": 2.3028685741282074e-05, "loss": 0.6491, "step": 146150 }, { "epoch": 1.62, "learning_rate": 2.3027763014143565e-05, "loss": 0.6187, "step": 146155 }, { "epoch": 1.62, "learning_rate": 2.302684028700505e-05, "loss": 0.6501, "step": 146160 }, { "epoch": 1.62, "learning_rate": 2.3025917559866537e-05, "loss": 0.6223, "step": 146165 }, { "epoch": 1.62, "learning_rate": 2.3024994832728025e-05, "loss": 0.6858, "step": 146170 }, { "epoch": 1.62, "learning_rate": 2.3024072105589513e-05, "loss": 0.6288, "step": 146175 }, { "epoch": 1.62, "learning_rate": 2.3023149378451e-05, "loss": 0.6404, "step": 146180 }, { "epoch": 1.62, "learning_rate": 2.302222665131249e-05, "loss": 0.6784, "step": 146185 }, { "epoch": 1.62, "learning_rate": 2.3021303924173977e-05, "loss": 0.6679, "step": 146190 }, { "epoch": 1.62, "learning_rate": 2.302038119703546e-05, "loss": 0.5899, "step": 146195 }, { "epoch": 1.62, "learning_rate": 2.3019458469896952e-05, "loss": 0.5988, "step": 146200 }, { "epoch": 1.62, "learning_rate": 2.3018535742758437e-05, "loss": 0.6074, "step": 146205 }, { "epoch": 1.62, "learning_rate": 2.3017613015619928e-05, "loss": 0.6876, "step": 146210 }, { "epoch": 1.62, "learning_rate": 2.3016690288481413e-05, "loss": 0.6246, "step": 146215 }, { "epoch": 1.62, "learning_rate": 2.30157675613429e-05, "loss": 0.667, "step": 146220 }, { "epoch": 1.62, "learning_rate": 2.3014844834204388e-05, "loss": 0.6119, "step": 146225 }, { "epoch": 1.62, "learning_rate": 2.3013922107065876e-05, "loss": 0.6537, "step": 146230 }, { "epoch": 1.62, "learning_rate": 2.3012999379927364e-05, "loss": 0.612, "step": 146235 }, { "epoch": 1.62, "learning_rate": 2.3012076652788852e-05, "loss": 0.5636, "step": 146240 }, { "epoch": 1.62, "learning_rate": 2.301115392565034e-05, "loss": 0.6552, "step": 146245 }, { "epoch": 1.62, "learning_rate": 2.3010231198511828e-05, "loss": 0.6571, "step": 146250 }, { "epoch": 1.62, "learning_rate": 2.3009308471373315e-05, "loss": 0.5899, "step": 146255 }, { "epoch": 1.62, "learning_rate": 2.3008385744234803e-05, "loss": 0.6612, "step": 146260 }, { "epoch": 1.62, "learning_rate": 2.300746301709629e-05, "loss": 0.6712, "step": 146265 }, { "epoch": 1.62, "learning_rate": 2.3006540289957776e-05, "loss": 0.6368, "step": 146270 }, { "epoch": 1.62, "learning_rate": 2.3005617562819263e-05, "loss": 0.5832, "step": 146275 }, { "epoch": 1.62, "learning_rate": 2.300469483568075e-05, "loss": 0.5887, "step": 146280 }, { "epoch": 1.62, "learning_rate": 2.300377210854224e-05, "loss": 0.603, "step": 146285 }, { "epoch": 1.62, "learning_rate": 2.3002849381403727e-05, "loss": 0.649, "step": 146290 }, { "epoch": 1.62, "learning_rate": 2.3001926654265215e-05, "loss": 0.6405, "step": 146295 }, { "epoch": 1.62, "learning_rate": 2.3001003927126703e-05, "loss": 0.6005, "step": 146300 }, { "epoch": 1.62, "learning_rate": 2.300008119998819e-05, "loss": 0.6435, "step": 146305 }, { "epoch": 1.62, "learning_rate": 2.299915847284968e-05, "loss": 0.6404, "step": 146310 }, { "epoch": 1.62, "learning_rate": 2.2998235745711166e-05, "loss": 0.6874, "step": 146315 }, { "epoch": 1.62, "learning_rate": 2.2997313018572654e-05, "loss": 0.6753, "step": 146320 }, { "epoch": 1.62, "learning_rate": 2.2996390291434142e-05, "loss": 0.6556, "step": 146325 }, { "epoch": 1.62, "learning_rate": 2.2995467564295626e-05, "loss": 0.6657, "step": 146330 }, { "epoch": 1.62, "learning_rate": 2.2994544837157118e-05, "loss": 0.589, "step": 146335 }, { "epoch": 1.62, "learning_rate": 2.2993622110018602e-05, "loss": 0.642, "step": 146340 }, { "epoch": 1.62, "learning_rate": 2.299269938288009e-05, "loss": 0.6475, "step": 146345 }, { "epoch": 1.62, "learning_rate": 2.2991776655741578e-05, "loss": 0.6164, "step": 146350 }, { "epoch": 1.62, "learning_rate": 2.2990853928603066e-05, "loss": 0.5975, "step": 146355 }, { "epoch": 1.62, "learning_rate": 2.2989931201464553e-05, "loss": 0.6309, "step": 146360 }, { "epoch": 1.62, "learning_rate": 2.298900847432604e-05, "loss": 0.6662, "step": 146365 }, { "epoch": 1.62, "learning_rate": 2.298808574718753e-05, "loss": 0.6783, "step": 146370 }, { "epoch": 1.62, "learning_rate": 2.2987163020049017e-05, "loss": 0.6271, "step": 146375 }, { "epoch": 1.62, "learning_rate": 2.2986240292910505e-05, "loss": 0.6603, "step": 146380 }, { "epoch": 1.62, "learning_rate": 2.2985317565771993e-05, "loss": 0.6293, "step": 146385 }, { "epoch": 1.62, "learning_rate": 2.298439483863348e-05, "loss": 0.6727, "step": 146390 }, { "epoch": 1.62, "learning_rate": 2.2983472111494965e-05, "loss": 0.6576, "step": 146395 }, { "epoch": 1.62, "learning_rate": 2.2982549384356456e-05, "loss": 0.5947, "step": 146400 }, { "epoch": 1.62, "learning_rate": 2.298162665721794e-05, "loss": 0.6477, "step": 146405 }, { "epoch": 1.62, "learning_rate": 2.2980703930079432e-05, "loss": 0.6205, "step": 146410 }, { "epoch": 1.62, "learning_rate": 2.2979781202940916e-05, "loss": 0.6167, "step": 146415 }, { "epoch": 1.62, "learning_rate": 2.2978858475802404e-05, "loss": 0.6093, "step": 146420 }, { "epoch": 1.62, "learning_rate": 2.2977935748663892e-05, "loss": 0.7053, "step": 146425 }, { "epoch": 1.62, "learning_rate": 2.297701302152538e-05, "loss": 0.6761, "step": 146430 }, { "epoch": 1.62, "learning_rate": 2.2976090294386868e-05, "loss": 0.6082, "step": 146435 }, { "epoch": 1.62, "learning_rate": 2.2975167567248356e-05, "loss": 0.6468, "step": 146440 }, { "epoch": 1.62, "learning_rate": 2.2974244840109843e-05, "loss": 0.6794, "step": 146445 }, { "epoch": 1.62, "learning_rate": 2.2973322112971328e-05, "loss": 0.6066, "step": 146450 }, { "epoch": 1.62, "learning_rate": 2.297239938583282e-05, "loss": 0.6501, "step": 146455 }, { "epoch": 1.62, "learning_rate": 2.2971476658694304e-05, "loss": 0.6367, "step": 146460 }, { "epoch": 1.62, "learning_rate": 2.2970553931555795e-05, "loss": 0.6593, "step": 146465 }, { "epoch": 1.62, "learning_rate": 2.296963120441728e-05, "loss": 0.6288, "step": 146470 }, { "epoch": 1.62, "learning_rate": 2.296870847727877e-05, "loss": 0.6235, "step": 146475 }, { "epoch": 1.62, "learning_rate": 2.2967785750140255e-05, "loss": 0.6083, "step": 146480 }, { "epoch": 1.62, "learning_rate": 2.2966863023001743e-05, "loss": 0.623, "step": 146485 }, { "epoch": 1.62, "learning_rate": 2.296594029586323e-05, "loss": 0.6297, "step": 146490 }, { "epoch": 1.62, "learning_rate": 2.296501756872472e-05, "loss": 0.6569, "step": 146495 }, { "epoch": 1.62, "learning_rate": 2.2964094841586206e-05, "loss": 0.6483, "step": 146500 }, { "epoch": 1.62, "learning_rate": 2.296317211444769e-05, "loss": 0.6065, "step": 146505 }, { "epoch": 1.62, "learning_rate": 2.2962249387309182e-05, "loss": 0.6375, "step": 146510 }, { "epoch": 1.62, "learning_rate": 2.2961326660170667e-05, "loss": 0.6167, "step": 146515 }, { "epoch": 1.62, "learning_rate": 2.2960403933032158e-05, "loss": 0.6629, "step": 146520 }, { "epoch": 1.62, "learning_rate": 2.2959481205893642e-05, "loss": 0.6447, "step": 146525 }, { "epoch": 1.62, "learning_rate": 2.2958558478755133e-05, "loss": 0.6167, "step": 146530 }, { "epoch": 1.62, "learning_rate": 2.2957635751616618e-05, "loss": 0.6273, "step": 146535 }, { "epoch": 1.62, "learning_rate": 2.295671302447811e-05, "loss": 0.6538, "step": 146540 }, { "epoch": 1.62, "learning_rate": 2.2955790297339594e-05, "loss": 0.6559, "step": 146545 }, { "epoch": 1.62, "learning_rate": 2.295486757020108e-05, "loss": 0.6294, "step": 146550 }, { "epoch": 1.62, "learning_rate": 2.295394484306257e-05, "loss": 0.6237, "step": 146555 }, { "epoch": 1.62, "learning_rate": 2.2953022115924057e-05, "loss": 0.664, "step": 146560 }, { "epoch": 1.62, "learning_rate": 2.2952099388785545e-05, "loss": 0.6549, "step": 146565 }, { "epoch": 1.62, "learning_rate": 2.295117666164703e-05, "loss": 0.634, "step": 146570 }, { "epoch": 1.62, "learning_rate": 2.295025393450852e-05, "loss": 0.6846, "step": 146575 }, { "epoch": 1.62, "learning_rate": 2.2949331207370005e-05, "loss": 0.5991, "step": 146580 }, { "epoch": 1.62, "learning_rate": 2.2948408480231496e-05, "loss": 0.6355, "step": 146585 }, { "epoch": 1.62, "learning_rate": 2.294748575309298e-05, "loss": 0.6013, "step": 146590 }, { "epoch": 1.62, "learning_rate": 2.2946563025954472e-05, "loss": 0.604, "step": 146595 }, { "epoch": 1.62, "learning_rate": 2.2945640298815957e-05, "loss": 0.6269, "step": 146600 }, { "epoch": 1.62, "learning_rate": 2.2944717571677444e-05, "loss": 0.6534, "step": 146605 }, { "epoch": 1.62, "learning_rate": 2.2943794844538932e-05, "loss": 0.608, "step": 146610 }, { "epoch": 1.62, "learning_rate": 2.294287211740042e-05, "loss": 0.6589, "step": 146615 }, { "epoch": 1.62, "learning_rate": 2.2941949390261908e-05, "loss": 0.6127, "step": 146620 }, { "epoch": 1.62, "learning_rate": 2.2941026663123396e-05, "loss": 0.6245, "step": 146625 }, { "epoch": 1.62, "learning_rate": 2.2940103935984884e-05, "loss": 0.7042, "step": 146630 }, { "epoch": 1.62, "learning_rate": 2.293918120884637e-05, "loss": 0.6852, "step": 146635 }, { "epoch": 1.62, "learning_rate": 2.293825848170786e-05, "loss": 0.6211, "step": 146640 }, { "epoch": 1.62, "learning_rate": 2.2937335754569344e-05, "loss": 0.572, "step": 146645 }, { "epoch": 1.62, "learning_rate": 2.2936413027430835e-05, "loss": 0.6227, "step": 146650 }, { "epoch": 1.62, "learning_rate": 2.293549030029232e-05, "loss": 0.6323, "step": 146655 }, { "epoch": 1.62, "learning_rate": 2.2934567573153807e-05, "loss": 0.6138, "step": 146660 }, { "epoch": 1.62, "learning_rate": 2.2933644846015295e-05, "loss": 0.6304, "step": 146665 }, { "epoch": 1.62, "learning_rate": 2.2932722118876783e-05, "loss": 0.6216, "step": 146670 }, { "epoch": 1.62, "learning_rate": 2.293179939173827e-05, "loss": 0.6406, "step": 146675 }, { "epoch": 1.62, "learning_rate": 2.293087666459976e-05, "loss": 0.6258, "step": 146680 }, { "epoch": 1.62, "learning_rate": 2.2929953937461247e-05, "loss": 0.6582, "step": 146685 }, { "epoch": 1.62, "learning_rate": 2.2929031210322734e-05, "loss": 0.6228, "step": 146690 }, { "epoch": 1.62, "learning_rate": 2.2928108483184222e-05, "loss": 0.5928, "step": 146695 }, { "epoch": 1.62, "learning_rate": 2.292718575604571e-05, "loss": 0.6189, "step": 146700 }, { "epoch": 1.62, "learning_rate": 2.2926263028907198e-05, "loss": 0.6223, "step": 146705 }, { "epoch": 1.62, "learning_rate": 2.2925340301768686e-05, "loss": 0.6308, "step": 146710 }, { "epoch": 1.62, "learning_rate": 2.292441757463017e-05, "loss": 0.6057, "step": 146715 }, { "epoch": 1.62, "learning_rate": 2.2923494847491658e-05, "loss": 0.7133, "step": 146720 }, { "epoch": 1.62, "learning_rate": 2.2922572120353146e-05, "loss": 0.6291, "step": 146725 }, { "epoch": 1.62, "learning_rate": 2.2921649393214634e-05, "loss": 0.6681, "step": 146730 }, { "epoch": 1.62, "learning_rate": 2.2920726666076122e-05, "loss": 0.6626, "step": 146735 }, { "epoch": 1.62, "learning_rate": 2.291980393893761e-05, "loss": 0.6439, "step": 146740 }, { "epoch": 1.62, "learning_rate": 2.2918881211799097e-05, "loss": 0.6124, "step": 146745 }, { "epoch": 1.62, "learning_rate": 2.2917958484660585e-05, "loss": 0.6466, "step": 146750 }, { "epoch": 1.62, "learning_rate": 2.2917035757522073e-05, "loss": 0.6303, "step": 146755 }, { "epoch": 1.63, "learning_rate": 2.291611303038356e-05, "loss": 0.6506, "step": 146760 }, { "epoch": 1.63, "learning_rate": 2.291519030324505e-05, "loss": 0.6618, "step": 146765 }, { "epoch": 1.63, "learning_rate": 2.2914267576106537e-05, "loss": 0.5949, "step": 146770 }, { "epoch": 1.63, "learning_rate": 2.2913344848968025e-05, "loss": 0.6342, "step": 146775 }, { "epoch": 1.63, "learning_rate": 2.291242212182951e-05, "loss": 0.6376, "step": 146780 }, { "epoch": 1.63, "learning_rate": 2.2911499394691e-05, "loss": 0.644, "step": 146785 }, { "epoch": 1.63, "learning_rate": 2.2910576667552485e-05, "loss": 0.6591, "step": 146790 }, { "epoch": 1.63, "learning_rate": 2.2909653940413973e-05, "loss": 0.6529, "step": 146795 }, { "epoch": 1.63, "learning_rate": 2.290873121327546e-05, "loss": 0.6526, "step": 146800 }, { "epoch": 1.63, "learning_rate": 2.2907808486136948e-05, "loss": 0.6313, "step": 146805 }, { "epoch": 1.63, "learning_rate": 2.2906885758998436e-05, "loss": 0.5957, "step": 146810 }, { "epoch": 1.63, "learning_rate": 2.2905963031859924e-05, "loss": 0.6585, "step": 146815 }, { "epoch": 1.63, "learning_rate": 2.2905040304721412e-05, "loss": 0.6103, "step": 146820 }, { "epoch": 1.63, "learning_rate": 2.29041175775829e-05, "loss": 0.5915, "step": 146825 }, { "epoch": 1.63, "learning_rate": 2.2903194850444387e-05, "loss": 0.6168, "step": 146830 }, { "epoch": 1.63, "learning_rate": 2.2902272123305872e-05, "loss": 0.6765, "step": 146835 }, { "epoch": 1.63, "learning_rate": 2.2901349396167363e-05, "loss": 0.6374, "step": 146840 }, { "epoch": 1.63, "learning_rate": 2.2900426669028848e-05, "loss": 0.622, "step": 146845 }, { "epoch": 1.63, "learning_rate": 2.289950394189034e-05, "loss": 0.645, "step": 146850 }, { "epoch": 1.63, "learning_rate": 2.2898581214751823e-05, "loss": 0.6086, "step": 146855 }, { "epoch": 1.63, "learning_rate": 2.2897658487613315e-05, "loss": 0.6092, "step": 146860 }, { "epoch": 1.63, "learning_rate": 2.28967357604748e-05, "loss": 0.6112, "step": 146865 }, { "epoch": 1.63, "learning_rate": 2.2895813033336287e-05, "loss": 0.6219, "step": 146870 }, { "epoch": 1.63, "learning_rate": 2.2894890306197775e-05, "loss": 0.6786, "step": 146875 }, { "epoch": 1.63, "learning_rate": 2.2893967579059263e-05, "loss": 0.6864, "step": 146880 }, { "epoch": 1.63, "learning_rate": 2.289304485192075e-05, "loss": 0.6441, "step": 146885 }, { "epoch": 1.63, "learning_rate": 2.2892122124782235e-05, "loss": 0.6212, "step": 146890 }, { "epoch": 1.63, "learning_rate": 2.2891199397643726e-05, "loss": 0.6105, "step": 146895 }, { "epoch": 1.63, "learning_rate": 2.289027667050521e-05, "loss": 0.6355, "step": 146900 }, { "epoch": 1.63, "learning_rate": 2.2889353943366702e-05, "loss": 0.6359, "step": 146905 }, { "epoch": 1.63, "learning_rate": 2.2888431216228186e-05, "loss": 0.6607, "step": 146910 }, { "epoch": 1.63, "learning_rate": 2.2887508489089678e-05, "loss": 0.638, "step": 146915 }, { "epoch": 1.63, "learning_rate": 2.2886585761951162e-05, "loss": 0.6418, "step": 146920 }, { "epoch": 1.63, "learning_rate": 2.2885663034812653e-05, "loss": 0.6699, "step": 146925 }, { "epoch": 1.63, "learning_rate": 2.2884740307674138e-05, "loss": 0.6354, "step": 146930 }, { "epoch": 1.63, "learning_rate": 2.2883817580535626e-05, "loss": 0.6346, "step": 146935 }, { "epoch": 1.63, "learning_rate": 2.2882894853397113e-05, "loss": 0.6874, "step": 146940 }, { "epoch": 1.63, "learning_rate": 2.28819721262586e-05, "loss": 0.6358, "step": 146945 }, { "epoch": 1.63, "learning_rate": 2.288104939912009e-05, "loss": 0.6516, "step": 146950 }, { "epoch": 1.63, "learning_rate": 2.2880126671981574e-05, "loss": 0.6227, "step": 146955 }, { "epoch": 1.63, "learning_rate": 2.2879203944843065e-05, "loss": 0.6725, "step": 146960 }, { "epoch": 1.63, "learning_rate": 2.287828121770455e-05, "loss": 0.6638, "step": 146965 }, { "epoch": 1.63, "learning_rate": 2.287735849056604e-05, "loss": 0.6397, "step": 146970 }, { "epoch": 1.63, "learning_rate": 2.2876435763427525e-05, "loss": 0.6499, "step": 146975 }, { "epoch": 1.63, "learning_rate": 2.2875513036289016e-05, "loss": 0.6307, "step": 146980 }, { "epoch": 1.63, "learning_rate": 2.28745903091505e-05, "loss": 0.639, "step": 146985 }, { "epoch": 1.63, "learning_rate": 2.287366758201199e-05, "loss": 0.629, "step": 146990 }, { "epoch": 1.63, "learning_rate": 2.2872744854873476e-05, "loss": 0.6342, "step": 146995 }, { "epoch": 1.63, "learning_rate": 2.2871822127734964e-05, "loss": 0.5787, "step": 147000 }, { "epoch": 1.63, "eval_loss": 0.6164979338645935, "eval_runtime": 69.1416, "eval_samples_per_second": 28.926, "eval_steps_per_second": 14.463, "step": 147000 }, { "epoch": 1.63, "learning_rate": 2.2870899400596452e-05, "loss": 0.6506, "step": 147005 }, { "epoch": 1.63, "learning_rate": 2.286997667345794e-05, "loss": 0.6631, "step": 147010 }, { "epoch": 1.63, "learning_rate": 2.2869053946319428e-05, "loss": 0.584, "step": 147015 }, { "epoch": 1.63, "learning_rate": 2.2868131219180916e-05, "loss": 0.6251, "step": 147020 }, { "epoch": 1.63, "learning_rate": 2.2867208492042403e-05, "loss": 0.5905, "step": 147025 }, { "epoch": 1.63, "learning_rate": 2.2866285764903888e-05, "loss": 0.6335, "step": 147030 }, { "epoch": 1.63, "learning_rate": 2.286536303776538e-05, "loss": 0.6045, "step": 147035 }, { "epoch": 1.63, "learning_rate": 2.2864440310626864e-05, "loss": 0.669, "step": 147040 }, { "epoch": 1.63, "learning_rate": 2.286351758348835e-05, "loss": 0.5954, "step": 147045 }, { "epoch": 1.63, "learning_rate": 2.286259485634984e-05, "loss": 0.6566, "step": 147050 }, { "epoch": 1.63, "learning_rate": 2.2861672129211327e-05, "loss": 0.6866, "step": 147055 }, { "epoch": 1.63, "learning_rate": 2.2860749402072815e-05, "loss": 0.6324, "step": 147060 }, { "epoch": 1.63, "learning_rate": 2.2859826674934303e-05, "loss": 0.6137, "step": 147065 }, { "epoch": 1.63, "learning_rate": 2.285890394779579e-05, "loss": 0.6144, "step": 147070 }, { "epoch": 1.63, "learning_rate": 2.285798122065728e-05, "loss": 0.6437, "step": 147075 }, { "epoch": 1.63, "learning_rate": 2.2857058493518766e-05, "loss": 0.5952, "step": 147080 }, { "epoch": 1.63, "learning_rate": 2.2856135766380254e-05, "loss": 0.6275, "step": 147085 }, { "epoch": 1.63, "learning_rate": 2.2855213039241742e-05, "loss": 0.6314, "step": 147090 }, { "epoch": 1.63, "learning_rate": 2.285429031210323e-05, "loss": 0.6314, "step": 147095 }, { "epoch": 1.63, "learning_rate": 2.2853367584964718e-05, "loss": 0.6107, "step": 147100 }, { "epoch": 1.63, "learning_rate": 2.2852444857826202e-05, "loss": 0.593, "step": 147105 }, { "epoch": 1.63, "learning_rate": 2.285152213068769e-05, "loss": 0.6743, "step": 147110 }, { "epoch": 1.63, "learning_rate": 2.2850599403549178e-05, "loss": 0.6164, "step": 147115 }, { "epoch": 1.63, "learning_rate": 2.2849676676410666e-05, "loss": 0.6567, "step": 147120 }, { "epoch": 1.63, "learning_rate": 2.2848753949272154e-05, "loss": 0.6112, "step": 147125 }, { "epoch": 1.63, "learning_rate": 2.284783122213364e-05, "loss": 0.6169, "step": 147130 }, { "epoch": 1.63, "learning_rate": 2.284690849499513e-05, "loss": 0.6497, "step": 147135 }, { "epoch": 1.63, "learning_rate": 2.2845985767856617e-05, "loss": 0.603, "step": 147140 }, { "epoch": 1.63, "learning_rate": 2.2845063040718105e-05, "loss": 0.6209, "step": 147145 }, { "epoch": 1.63, "learning_rate": 2.2844140313579593e-05, "loss": 0.6074, "step": 147150 }, { "epoch": 1.63, "learning_rate": 2.284321758644108e-05, "loss": 0.641, "step": 147155 }, { "epoch": 1.63, "learning_rate": 2.284229485930257e-05, "loss": 0.5812, "step": 147160 }, { "epoch": 1.63, "learning_rate": 2.2841372132164053e-05, "loss": 0.6737, "step": 147165 }, { "epoch": 1.63, "learning_rate": 2.2840449405025544e-05, "loss": 0.6631, "step": 147170 }, { "epoch": 1.63, "learning_rate": 2.283952667788703e-05, "loss": 0.6299, "step": 147175 }, { "epoch": 1.63, "learning_rate": 2.2838603950748517e-05, "loss": 0.593, "step": 147180 }, { "epoch": 1.63, "learning_rate": 2.2837681223610004e-05, "loss": 0.5615, "step": 147185 }, { "epoch": 1.63, "learning_rate": 2.2836758496471492e-05, "loss": 0.5792, "step": 147190 }, { "epoch": 1.63, "learning_rate": 2.283583576933298e-05, "loss": 0.6586, "step": 147195 }, { "epoch": 1.63, "learning_rate": 2.2834913042194468e-05, "loss": 0.5924, "step": 147200 }, { "epoch": 1.63, "learning_rate": 2.2833990315055956e-05, "loss": 0.6093, "step": 147205 }, { "epoch": 1.63, "learning_rate": 2.2833067587917444e-05, "loss": 0.6046, "step": 147210 }, { "epoch": 1.63, "learning_rate": 2.283214486077893e-05, "loss": 0.6744, "step": 147215 }, { "epoch": 1.63, "learning_rate": 2.2831222133640416e-05, "loss": 0.6693, "step": 147220 }, { "epoch": 1.63, "learning_rate": 2.2830299406501907e-05, "loss": 0.6971, "step": 147225 }, { "epoch": 1.63, "learning_rate": 2.282937667936339e-05, "loss": 0.6134, "step": 147230 }, { "epoch": 1.63, "learning_rate": 2.2828453952224883e-05, "loss": 0.5805, "step": 147235 }, { "epoch": 1.63, "learning_rate": 2.2827531225086367e-05, "loss": 0.6101, "step": 147240 }, { "epoch": 1.63, "learning_rate": 2.282660849794786e-05, "loss": 0.6308, "step": 147245 }, { "epoch": 1.63, "learning_rate": 2.2825685770809343e-05, "loss": 0.6242, "step": 147250 }, { "epoch": 1.63, "learning_rate": 2.282476304367083e-05, "loss": 0.583, "step": 147255 }, { "epoch": 1.63, "learning_rate": 2.282384031653232e-05, "loss": 0.6206, "step": 147260 }, { "epoch": 1.63, "learning_rate": 2.2822917589393807e-05, "loss": 0.6285, "step": 147265 }, { "epoch": 1.63, "learning_rate": 2.2821994862255294e-05, "loss": 0.6338, "step": 147270 }, { "epoch": 1.63, "learning_rate": 2.282107213511678e-05, "loss": 0.5951, "step": 147275 }, { "epoch": 1.63, "learning_rate": 2.282014940797827e-05, "loss": 0.5944, "step": 147280 }, { "epoch": 1.63, "learning_rate": 2.2819226680839755e-05, "loss": 0.547, "step": 147285 }, { "epoch": 1.63, "learning_rate": 2.2818303953701246e-05, "loss": 0.6321, "step": 147290 }, { "epoch": 1.63, "learning_rate": 2.281738122656273e-05, "loss": 0.6206, "step": 147295 }, { "epoch": 1.63, "learning_rate": 2.281645849942422e-05, "loss": 0.6252, "step": 147300 }, { "epoch": 1.63, "learning_rate": 2.2815535772285706e-05, "loss": 0.6368, "step": 147305 }, { "epoch": 1.63, "learning_rate": 2.2814613045147197e-05, "loss": 0.6357, "step": 147310 }, { "epoch": 1.63, "learning_rate": 2.281369031800868e-05, "loss": 0.6256, "step": 147315 }, { "epoch": 1.63, "learning_rate": 2.281276759087017e-05, "loss": 0.6368, "step": 147320 }, { "epoch": 1.63, "learning_rate": 2.2811844863731657e-05, "loss": 0.6225, "step": 147325 }, { "epoch": 1.63, "learning_rate": 2.2810922136593145e-05, "loss": 0.5653, "step": 147330 }, { "epoch": 1.63, "learning_rate": 2.2809999409454633e-05, "loss": 0.62, "step": 147335 }, { "epoch": 1.63, "learning_rate": 2.2809076682316118e-05, "loss": 0.6431, "step": 147340 }, { "epoch": 1.63, "learning_rate": 2.280815395517761e-05, "loss": 0.6336, "step": 147345 }, { "epoch": 1.63, "learning_rate": 2.2807231228039093e-05, "loss": 0.6196, "step": 147350 }, { "epoch": 1.63, "learning_rate": 2.2806308500900584e-05, "loss": 0.6284, "step": 147355 }, { "epoch": 1.63, "learning_rate": 2.280538577376207e-05, "loss": 0.6036, "step": 147360 }, { "epoch": 1.63, "learning_rate": 2.280446304662356e-05, "loss": 0.568, "step": 147365 }, { "epoch": 1.63, "learning_rate": 2.2803540319485045e-05, "loss": 0.5943, "step": 147370 }, { "epoch": 1.63, "learning_rate": 2.2802617592346532e-05, "loss": 0.6755, "step": 147375 }, { "epoch": 1.63, "learning_rate": 2.280169486520802e-05, "loss": 0.5923, "step": 147380 }, { "epoch": 1.63, "learning_rate": 2.2800772138069508e-05, "loss": 0.6564, "step": 147385 }, { "epoch": 1.63, "learning_rate": 2.2799849410930996e-05, "loss": 0.6146, "step": 147390 }, { "epoch": 1.63, "learning_rate": 2.2798926683792484e-05, "loss": 0.6597, "step": 147395 }, { "epoch": 1.63, "learning_rate": 2.2798003956653972e-05, "loss": 0.6487, "step": 147400 }, { "epoch": 1.63, "learning_rate": 2.2797081229515456e-05, "loss": 0.6961, "step": 147405 }, { "epoch": 1.63, "learning_rate": 2.2796158502376947e-05, "loss": 0.6359, "step": 147410 }, { "epoch": 1.63, "learning_rate": 2.2795235775238432e-05, "loss": 0.5787, "step": 147415 }, { "epoch": 1.63, "learning_rate": 2.2794313048099923e-05, "loss": 0.5912, "step": 147420 }, { "epoch": 1.63, "learning_rate": 2.2793390320961408e-05, "loss": 0.631, "step": 147425 }, { "epoch": 1.63, "learning_rate": 2.2792467593822895e-05, "loss": 0.6769, "step": 147430 }, { "epoch": 1.63, "learning_rate": 2.2791544866684383e-05, "loss": 0.658, "step": 147435 }, { "epoch": 1.63, "learning_rate": 2.279062213954587e-05, "loss": 0.6323, "step": 147440 }, { "epoch": 1.63, "learning_rate": 2.278969941240736e-05, "loss": 0.6419, "step": 147445 }, { "epoch": 1.63, "learning_rate": 2.2788776685268847e-05, "loss": 0.6651, "step": 147450 }, { "epoch": 1.63, "learning_rate": 2.2787853958130335e-05, "loss": 0.5717, "step": 147455 }, { "epoch": 1.63, "learning_rate": 2.2786931230991823e-05, "loss": 0.6446, "step": 147460 }, { "epoch": 1.63, "learning_rate": 2.278600850385331e-05, "loss": 0.6515, "step": 147465 }, { "epoch": 1.63, "learning_rate": 2.2785085776714798e-05, "loss": 0.6614, "step": 147470 }, { "epoch": 1.63, "learning_rate": 2.2784163049576286e-05, "loss": 0.6058, "step": 147475 }, { "epoch": 1.63, "learning_rate": 2.278324032243777e-05, "loss": 0.6481, "step": 147480 }, { "epoch": 1.63, "learning_rate": 2.2782317595299262e-05, "loss": 0.5977, "step": 147485 }, { "epoch": 1.63, "learning_rate": 2.2781394868160746e-05, "loss": 0.6261, "step": 147490 }, { "epoch": 1.63, "learning_rate": 2.2780472141022234e-05, "loss": 0.6303, "step": 147495 }, { "epoch": 1.63, "learning_rate": 2.2779549413883722e-05, "loss": 0.6552, "step": 147500 }, { "epoch": 1.63, "learning_rate": 2.277862668674521e-05, "loss": 0.619, "step": 147505 }, { "epoch": 1.63, "learning_rate": 2.2777703959606698e-05, "loss": 0.6072, "step": 147510 }, { "epoch": 1.63, "learning_rate": 2.2776781232468185e-05, "loss": 0.5814, "step": 147515 }, { "epoch": 1.63, "learning_rate": 2.2775858505329673e-05, "loss": 0.6227, "step": 147520 }, { "epoch": 1.63, "learning_rate": 2.277493577819116e-05, "loss": 0.6444, "step": 147525 }, { "epoch": 1.63, "learning_rate": 2.277401305105265e-05, "loss": 0.591, "step": 147530 }, { "epoch": 1.63, "learning_rate": 2.2773090323914137e-05, "loss": 0.6683, "step": 147535 }, { "epoch": 1.63, "learning_rate": 2.2772167596775625e-05, "loss": 0.6394, "step": 147540 }, { "epoch": 1.63, "learning_rate": 2.2771244869637113e-05, "loss": 0.6218, "step": 147545 }, { "epoch": 1.63, "learning_rate": 2.2770322142498597e-05, "loss": 0.618, "step": 147550 }, { "epoch": 1.63, "learning_rate": 2.2769399415360085e-05, "loss": 0.5962, "step": 147555 }, { "epoch": 1.63, "learning_rate": 2.2768476688221573e-05, "loss": 0.624, "step": 147560 }, { "epoch": 1.63, "learning_rate": 2.276755396108306e-05, "loss": 0.6638, "step": 147565 }, { "epoch": 1.63, "learning_rate": 2.276663123394455e-05, "loss": 0.6549, "step": 147570 }, { "epoch": 1.63, "learning_rate": 2.2765708506806036e-05, "loss": 0.6118, "step": 147575 }, { "epoch": 1.63, "learning_rate": 2.2764785779667524e-05, "loss": 0.5771, "step": 147580 }, { "epoch": 1.63, "learning_rate": 2.2763863052529012e-05, "loss": 0.6597, "step": 147585 }, { "epoch": 1.63, "learning_rate": 2.27629403253905e-05, "loss": 0.6288, "step": 147590 }, { "epoch": 1.63, "learning_rate": 2.2762017598251988e-05, "loss": 0.6188, "step": 147595 }, { "epoch": 1.63, "learning_rate": 2.2761094871113476e-05, "loss": 0.6354, "step": 147600 }, { "epoch": 1.63, "learning_rate": 2.276017214397496e-05, "loss": 0.5979, "step": 147605 }, { "epoch": 1.63, "learning_rate": 2.275924941683645e-05, "loss": 0.6455, "step": 147610 }, { "epoch": 1.63, "learning_rate": 2.2758326689697936e-05, "loss": 0.6277, "step": 147615 }, { "epoch": 1.63, "learning_rate": 2.2757403962559427e-05, "loss": 0.6283, "step": 147620 }, { "epoch": 1.63, "learning_rate": 2.275648123542091e-05, "loss": 0.6535, "step": 147625 }, { "epoch": 1.63, "learning_rate": 2.27555585082824e-05, "loss": 0.6489, "step": 147630 }, { "epoch": 1.63, "learning_rate": 2.2754635781143887e-05, "loss": 0.6286, "step": 147635 }, { "epoch": 1.63, "learning_rate": 2.2753713054005375e-05, "loss": 0.6348, "step": 147640 }, { "epoch": 1.63, "learning_rate": 2.2752790326866863e-05, "loss": 0.6034, "step": 147645 }, { "epoch": 1.63, "learning_rate": 2.275186759972835e-05, "loss": 0.6286, "step": 147650 }, { "epoch": 1.63, "learning_rate": 2.275094487258984e-05, "loss": 0.6534, "step": 147655 }, { "epoch": 1.63, "learning_rate": 2.2750022145451323e-05, "loss": 0.6578, "step": 147660 }, { "epoch": 1.64, "learning_rate": 2.2749099418312814e-05, "loss": 0.6461, "step": 147665 }, { "epoch": 1.64, "learning_rate": 2.27481766911743e-05, "loss": 0.6316, "step": 147670 }, { "epoch": 1.64, "learning_rate": 2.274725396403579e-05, "loss": 0.6106, "step": 147675 }, { "epoch": 1.64, "learning_rate": 2.2746331236897274e-05, "loss": 0.6411, "step": 147680 }, { "epoch": 1.64, "learning_rate": 2.2745408509758766e-05, "loss": 0.6116, "step": 147685 }, { "epoch": 1.64, "learning_rate": 2.274448578262025e-05, "loss": 0.6293, "step": 147690 }, { "epoch": 1.64, "learning_rate": 2.274356305548174e-05, "loss": 0.6572, "step": 147695 }, { "epoch": 1.64, "learning_rate": 2.2742640328343226e-05, "loss": 0.6144, "step": 147700 }, { "epoch": 1.64, "learning_rate": 2.2741717601204714e-05, "loss": 0.6348, "step": 147705 }, { "epoch": 1.64, "learning_rate": 2.27407948740662e-05, "loss": 0.5963, "step": 147710 }, { "epoch": 1.64, "learning_rate": 2.273987214692769e-05, "loss": 0.6079, "step": 147715 }, { "epoch": 1.64, "learning_rate": 2.2738949419789177e-05, "loss": 0.6115, "step": 147720 }, { "epoch": 1.64, "learning_rate": 2.273802669265066e-05, "loss": 0.6466, "step": 147725 }, { "epoch": 1.64, "learning_rate": 2.2737103965512153e-05, "loss": 0.5944, "step": 147730 }, { "epoch": 1.64, "learning_rate": 2.2736181238373637e-05, "loss": 0.6599, "step": 147735 }, { "epoch": 1.64, "learning_rate": 2.273525851123513e-05, "loss": 0.663, "step": 147740 }, { "epoch": 1.64, "learning_rate": 2.2734335784096613e-05, "loss": 0.6544, "step": 147745 }, { "epoch": 1.64, "learning_rate": 2.2733413056958104e-05, "loss": 0.6877, "step": 147750 }, { "epoch": 1.64, "learning_rate": 2.273249032981959e-05, "loss": 0.6454, "step": 147755 }, { "epoch": 1.64, "learning_rate": 2.2731567602681077e-05, "loss": 0.6397, "step": 147760 }, { "epoch": 1.64, "learning_rate": 2.2730644875542564e-05, "loss": 0.6118, "step": 147765 }, { "epoch": 1.64, "learning_rate": 2.2729722148404052e-05, "loss": 0.6348, "step": 147770 }, { "epoch": 1.64, "learning_rate": 2.272879942126554e-05, "loss": 0.6862, "step": 147775 }, { "epoch": 1.64, "learning_rate": 2.2727876694127025e-05, "loss": 0.6605, "step": 147780 }, { "epoch": 1.64, "learning_rate": 2.2726953966988516e-05, "loss": 0.6762, "step": 147785 }, { "epoch": 1.64, "learning_rate": 2.272603123985e-05, "loss": 0.7327, "step": 147790 }, { "epoch": 1.64, "learning_rate": 2.272510851271149e-05, "loss": 0.6747, "step": 147795 }, { "epoch": 1.64, "learning_rate": 2.2724185785572976e-05, "loss": 0.7159, "step": 147800 }, { "epoch": 1.64, "learning_rate": 2.2723263058434467e-05, "loss": 0.6796, "step": 147805 }, { "epoch": 1.64, "learning_rate": 2.272234033129595e-05, "loss": 0.6119, "step": 147810 }, { "epoch": 1.64, "learning_rate": 2.272141760415744e-05, "loss": 0.5877, "step": 147815 }, { "epoch": 1.64, "learning_rate": 2.2720494877018927e-05, "loss": 0.6361, "step": 147820 }, { "epoch": 1.64, "learning_rate": 2.2719572149880415e-05, "loss": 0.5977, "step": 147825 }, { "epoch": 1.64, "learning_rate": 2.2718649422741903e-05, "loss": 0.6501, "step": 147830 }, { "epoch": 1.64, "learning_rate": 2.271772669560339e-05, "loss": 0.6339, "step": 147835 }, { "epoch": 1.64, "learning_rate": 2.271680396846488e-05, "loss": 0.6235, "step": 147840 }, { "epoch": 1.64, "learning_rate": 2.2715881241326367e-05, "loss": 0.6832, "step": 147845 }, { "epoch": 1.64, "learning_rate": 2.2714958514187854e-05, "loss": 0.6598, "step": 147850 }, { "epoch": 1.64, "learning_rate": 2.2714035787049342e-05, "loss": 0.6618, "step": 147855 }, { "epoch": 1.64, "learning_rate": 2.271311305991083e-05, "loss": 0.6248, "step": 147860 }, { "epoch": 1.64, "learning_rate": 2.2712190332772315e-05, "loss": 0.6091, "step": 147865 }, { "epoch": 1.64, "learning_rate": 2.2711267605633806e-05, "loss": 0.607, "step": 147870 }, { "epoch": 1.64, "learning_rate": 2.271034487849529e-05, "loss": 0.7065, "step": 147875 }, { "epoch": 1.64, "learning_rate": 2.2709422151356778e-05, "loss": 0.6641, "step": 147880 }, { "epoch": 1.64, "learning_rate": 2.2708499424218266e-05, "loss": 0.6292, "step": 147885 }, { "epoch": 1.64, "learning_rate": 2.2707576697079754e-05, "loss": 0.6749, "step": 147890 }, { "epoch": 1.64, "learning_rate": 2.270665396994124e-05, "loss": 0.6383, "step": 147895 }, { "epoch": 1.64, "learning_rate": 2.270573124280273e-05, "loss": 0.6573, "step": 147900 }, { "epoch": 1.64, "learning_rate": 2.2704808515664217e-05, "loss": 0.6098, "step": 147905 }, { "epoch": 1.64, "learning_rate": 2.2703885788525705e-05, "loss": 0.6402, "step": 147910 }, { "epoch": 1.64, "learning_rate": 2.2702963061387193e-05, "loss": 0.6259, "step": 147915 }, { "epoch": 1.64, "learning_rate": 2.270204033424868e-05, "loss": 0.6561, "step": 147920 }, { "epoch": 1.64, "learning_rate": 2.270111760711017e-05, "loss": 0.6995, "step": 147925 }, { "epoch": 1.64, "learning_rate": 2.2700194879971657e-05, "loss": 0.6442, "step": 147930 }, { "epoch": 1.64, "learning_rate": 2.269927215283314e-05, "loss": 0.6166, "step": 147935 }, { "epoch": 1.64, "learning_rate": 2.269834942569463e-05, "loss": 0.6342, "step": 147940 }, { "epoch": 1.64, "learning_rate": 2.2697426698556117e-05, "loss": 0.5991, "step": 147945 }, { "epoch": 1.64, "learning_rate": 2.2696503971417605e-05, "loss": 0.592, "step": 147950 }, { "epoch": 1.64, "learning_rate": 2.2695581244279092e-05, "loss": 0.6487, "step": 147955 }, { "epoch": 1.64, "learning_rate": 2.269465851714058e-05, "loss": 0.6414, "step": 147960 }, { "epoch": 1.64, "learning_rate": 2.2693735790002068e-05, "loss": 0.6186, "step": 147965 }, { "epoch": 1.64, "learning_rate": 2.2692813062863556e-05, "loss": 0.6902, "step": 147970 }, { "epoch": 1.64, "learning_rate": 2.2691890335725044e-05, "loss": 0.6485, "step": 147975 }, { "epoch": 1.64, "learning_rate": 2.269096760858653e-05, "loss": 0.6078, "step": 147980 }, { "epoch": 1.64, "learning_rate": 2.269004488144802e-05, "loss": 0.681, "step": 147985 }, { "epoch": 1.64, "learning_rate": 2.2689122154309504e-05, "loss": 0.6025, "step": 147990 }, { "epoch": 1.64, "learning_rate": 2.2688199427170995e-05, "loss": 0.6429, "step": 147995 }, { "epoch": 1.64, "learning_rate": 2.268727670003248e-05, "loss": 0.6099, "step": 148000 }, { "epoch": 1.64, "eval_loss": 0.6246272921562195, "eval_runtime": 69.1072, "eval_samples_per_second": 28.941, "eval_steps_per_second": 14.47, "step": 148000 }, { "epoch": 1.64, "learning_rate": 2.268635397289397e-05, "loss": 0.7096, "step": 148005 }, { "epoch": 1.64, "learning_rate": 2.2685431245755455e-05, "loss": 0.6056, "step": 148010 }, { "epoch": 1.64, "learning_rate": 2.2684508518616943e-05, "loss": 0.6176, "step": 148015 }, { "epoch": 1.64, "learning_rate": 2.268358579147843e-05, "loss": 0.6724, "step": 148020 }, { "epoch": 1.64, "learning_rate": 2.268266306433992e-05, "loss": 0.6185, "step": 148025 }, { "epoch": 1.64, "learning_rate": 2.2681740337201407e-05, "loss": 0.6475, "step": 148030 }, { "epoch": 1.64, "learning_rate": 2.2680817610062895e-05, "loss": 0.674, "step": 148035 }, { "epoch": 1.64, "learning_rate": 2.2679894882924382e-05, "loss": 0.6682, "step": 148040 }, { "epoch": 1.64, "learning_rate": 2.267897215578587e-05, "loss": 0.6139, "step": 148045 }, { "epoch": 1.64, "learning_rate": 2.2678049428647358e-05, "loss": 0.6675, "step": 148050 }, { "epoch": 1.64, "learning_rate": 2.2677126701508843e-05, "loss": 0.6921, "step": 148055 }, { "epoch": 1.64, "learning_rate": 2.2676203974370334e-05, "loss": 0.6144, "step": 148060 }, { "epoch": 1.64, "learning_rate": 2.267528124723182e-05, "loss": 0.6281, "step": 148065 }, { "epoch": 1.64, "learning_rate": 2.267435852009331e-05, "loss": 0.6831, "step": 148070 }, { "epoch": 1.64, "learning_rate": 2.2673435792954794e-05, "loss": 0.687, "step": 148075 }, { "epoch": 1.64, "learning_rate": 2.2672513065816285e-05, "loss": 0.6372, "step": 148080 }, { "epoch": 1.64, "learning_rate": 2.267159033867777e-05, "loss": 0.6562, "step": 148085 }, { "epoch": 1.64, "learning_rate": 2.2670667611539258e-05, "loss": 0.6407, "step": 148090 }, { "epoch": 1.64, "learning_rate": 2.2669744884400745e-05, "loss": 0.6377, "step": 148095 }, { "epoch": 1.64, "learning_rate": 2.2668822157262233e-05, "loss": 0.6554, "step": 148100 }, { "epoch": 1.64, "learning_rate": 2.266789943012372e-05, "loss": 0.6375, "step": 148105 }, { "epoch": 1.64, "learning_rate": 2.2666976702985206e-05, "loss": 0.6353, "step": 148110 }, { "epoch": 1.64, "learning_rate": 2.2666053975846697e-05, "loss": 0.6003, "step": 148115 }, { "epoch": 1.64, "learning_rate": 2.266513124870818e-05, "loss": 0.6503, "step": 148120 }, { "epoch": 1.64, "learning_rate": 2.2664208521569673e-05, "loss": 0.6259, "step": 148125 }, { "epoch": 1.64, "learning_rate": 2.2663285794431157e-05, "loss": 0.6157, "step": 148130 }, { "epoch": 1.64, "learning_rate": 2.2662363067292648e-05, "loss": 0.6379, "step": 148135 }, { "epoch": 1.64, "learning_rate": 2.2661440340154133e-05, "loss": 0.6383, "step": 148140 }, { "epoch": 1.64, "learning_rate": 2.266051761301562e-05, "loss": 0.6844, "step": 148145 }, { "epoch": 1.64, "learning_rate": 2.265959488587711e-05, "loss": 0.6659, "step": 148150 }, { "epoch": 1.64, "learning_rate": 2.2658672158738596e-05, "loss": 0.6218, "step": 148155 }, { "epoch": 1.64, "learning_rate": 2.2657749431600084e-05, "loss": 0.6369, "step": 148160 }, { "epoch": 1.64, "learning_rate": 2.265682670446157e-05, "loss": 0.6166, "step": 148165 }, { "epoch": 1.64, "learning_rate": 2.265590397732306e-05, "loss": 0.5937, "step": 148170 }, { "epoch": 1.64, "learning_rate": 2.2654981250184544e-05, "loss": 0.6201, "step": 148175 }, { "epoch": 1.64, "learning_rate": 2.2654058523046035e-05, "loss": 0.6335, "step": 148180 }, { "epoch": 1.64, "learning_rate": 2.265313579590752e-05, "loss": 0.596, "step": 148185 }, { "epoch": 1.64, "learning_rate": 2.265221306876901e-05, "loss": 0.6691, "step": 148190 }, { "epoch": 1.64, "learning_rate": 2.2651290341630496e-05, "loss": 0.6326, "step": 148195 }, { "epoch": 1.64, "learning_rate": 2.2650367614491987e-05, "loss": 0.6731, "step": 148200 }, { "epoch": 1.64, "learning_rate": 2.264944488735347e-05, "loss": 0.6041, "step": 148205 }, { "epoch": 1.64, "learning_rate": 2.264852216021496e-05, "loss": 0.6385, "step": 148210 }, { "epoch": 1.64, "learning_rate": 2.2647599433076447e-05, "loss": 0.5752, "step": 148215 }, { "epoch": 1.64, "learning_rate": 2.2646676705937935e-05, "loss": 0.6321, "step": 148220 }, { "epoch": 1.64, "learning_rate": 2.2645753978799423e-05, "loss": 0.6327, "step": 148225 }, { "epoch": 1.64, "learning_rate": 2.264483125166091e-05, "loss": 0.6661, "step": 148230 }, { "epoch": 1.64, "learning_rate": 2.26439085245224e-05, "loss": 0.6501, "step": 148235 }, { "epoch": 1.64, "learning_rate": 2.2642985797383883e-05, "loss": 0.6064, "step": 148240 }, { "epoch": 1.64, "learning_rate": 2.2642063070245374e-05, "loss": 0.6727, "step": 148245 }, { "epoch": 1.64, "learning_rate": 2.264114034310686e-05, "loss": 0.5986, "step": 148250 }, { "epoch": 1.64, "learning_rate": 2.264021761596835e-05, "loss": 0.6888, "step": 148255 }, { "epoch": 1.64, "learning_rate": 2.2639294888829834e-05, "loss": 0.644, "step": 148260 }, { "epoch": 1.64, "learning_rate": 2.2638372161691322e-05, "loss": 0.6842, "step": 148265 }, { "epoch": 1.64, "learning_rate": 2.263744943455281e-05, "loss": 0.6053, "step": 148270 }, { "epoch": 1.64, "learning_rate": 2.2636526707414298e-05, "loss": 0.7015, "step": 148275 }, { "epoch": 1.64, "learning_rate": 2.2635603980275786e-05, "loss": 0.6457, "step": 148280 }, { "epoch": 1.64, "learning_rate": 2.2634681253137274e-05, "loss": 0.6517, "step": 148285 }, { "epoch": 1.64, "learning_rate": 2.263375852599876e-05, "loss": 0.5876, "step": 148290 }, { "epoch": 1.64, "learning_rate": 2.263283579886025e-05, "loss": 0.6831, "step": 148295 }, { "epoch": 1.64, "learning_rate": 2.2631913071721737e-05, "loss": 0.6765, "step": 148300 }, { "epoch": 1.64, "learning_rate": 2.2630990344583225e-05, "loss": 0.6392, "step": 148305 }, { "epoch": 1.64, "learning_rate": 2.2630067617444713e-05, "loss": 0.6644, "step": 148310 }, { "epoch": 1.64, "learning_rate": 2.2629144890306197e-05, "loss": 0.6459, "step": 148315 }, { "epoch": 1.64, "learning_rate": 2.2628222163167685e-05, "loss": 0.6333, "step": 148320 }, { "epoch": 1.64, "learning_rate": 2.2627299436029173e-05, "loss": 0.695, "step": 148325 }, { "epoch": 1.64, "learning_rate": 2.262637670889066e-05, "loss": 0.6574, "step": 148330 }, { "epoch": 1.64, "learning_rate": 2.262545398175215e-05, "loss": 0.6245, "step": 148335 }, { "epoch": 1.64, "learning_rate": 2.2624531254613636e-05, "loss": 0.6087, "step": 148340 }, { "epoch": 1.64, "learning_rate": 2.2623608527475124e-05, "loss": 0.6266, "step": 148345 }, { "epoch": 1.64, "learning_rate": 2.2622685800336612e-05, "loss": 0.6318, "step": 148350 }, { "epoch": 1.64, "learning_rate": 2.26217630731981e-05, "loss": 0.6439, "step": 148355 }, { "epoch": 1.64, "learning_rate": 2.2620840346059588e-05, "loss": 0.6471, "step": 148360 }, { "epoch": 1.64, "learning_rate": 2.2619917618921076e-05, "loss": 0.666, "step": 148365 }, { "epoch": 1.64, "learning_rate": 2.2618994891782564e-05, "loss": 0.5823, "step": 148370 }, { "epoch": 1.64, "learning_rate": 2.2618072164644048e-05, "loss": 0.6263, "step": 148375 }, { "epoch": 1.64, "learning_rate": 2.261714943750554e-05, "loss": 0.675, "step": 148380 }, { "epoch": 1.64, "learning_rate": 2.2616226710367024e-05, "loss": 0.6172, "step": 148385 }, { "epoch": 1.64, "learning_rate": 2.261530398322851e-05, "loss": 0.6154, "step": 148390 }, { "epoch": 1.64, "learning_rate": 2.261438125609e-05, "loss": 0.6518, "step": 148395 }, { "epoch": 1.64, "learning_rate": 2.2613458528951487e-05, "loss": 0.6237, "step": 148400 }, { "epoch": 1.64, "learning_rate": 2.2612535801812975e-05, "loss": 0.6475, "step": 148405 }, { "epoch": 1.64, "learning_rate": 2.2611613074674463e-05, "loss": 0.5887, "step": 148410 }, { "epoch": 1.64, "learning_rate": 2.261069034753595e-05, "loss": 0.6374, "step": 148415 }, { "epoch": 1.64, "learning_rate": 2.260976762039744e-05, "loss": 0.6106, "step": 148420 }, { "epoch": 1.64, "learning_rate": 2.2608844893258927e-05, "loss": 0.6336, "step": 148425 }, { "epoch": 1.64, "learning_rate": 2.2607922166120414e-05, "loss": 0.7097, "step": 148430 }, { "epoch": 1.64, "learning_rate": 2.2606999438981902e-05, "loss": 0.6217, "step": 148435 }, { "epoch": 1.64, "learning_rate": 2.2606076711843387e-05, "loss": 0.5748, "step": 148440 }, { "epoch": 1.64, "learning_rate": 2.2605153984704878e-05, "loss": 0.655, "step": 148445 }, { "epoch": 1.64, "learning_rate": 2.2604231257566362e-05, "loss": 0.6197, "step": 148450 }, { "epoch": 1.64, "learning_rate": 2.2603308530427854e-05, "loss": 0.6343, "step": 148455 }, { "epoch": 1.64, "learning_rate": 2.2602385803289338e-05, "loss": 0.6361, "step": 148460 }, { "epoch": 1.64, "learning_rate": 2.2601463076150826e-05, "loss": 0.7031, "step": 148465 }, { "epoch": 1.64, "learning_rate": 2.2600540349012314e-05, "loss": 0.6654, "step": 148470 }, { "epoch": 1.64, "learning_rate": 2.25996176218738e-05, "loss": 0.6128, "step": 148475 }, { "epoch": 1.64, "learning_rate": 2.259869489473529e-05, "loss": 0.6943, "step": 148480 }, { "epoch": 1.64, "learning_rate": 2.2597772167596777e-05, "loss": 0.5674, "step": 148485 }, { "epoch": 1.64, "learning_rate": 2.2596849440458265e-05, "loss": 0.6396, "step": 148490 }, { "epoch": 1.64, "learning_rate": 2.259592671331975e-05, "loss": 0.6344, "step": 148495 }, { "epoch": 1.64, "learning_rate": 2.259500398618124e-05, "loss": 0.6099, "step": 148500 }, { "epoch": 1.64, "learning_rate": 2.2594081259042725e-05, "loss": 0.6491, "step": 148505 }, { "epoch": 1.64, "learning_rate": 2.2593158531904217e-05, "loss": 0.5899, "step": 148510 }, { "epoch": 1.64, "learning_rate": 2.25922358047657e-05, "loss": 0.642, "step": 148515 }, { "epoch": 1.64, "learning_rate": 2.2591313077627192e-05, "loss": 0.6253, "step": 148520 }, { "epoch": 1.64, "learning_rate": 2.2590390350488677e-05, "loss": 0.5987, "step": 148525 }, { "epoch": 1.64, "learning_rate": 2.2589467623350165e-05, "loss": 0.6277, "step": 148530 }, { "epoch": 1.64, "learning_rate": 2.2588544896211652e-05, "loss": 0.595, "step": 148535 }, { "epoch": 1.64, "learning_rate": 2.258762216907314e-05, "loss": 0.6084, "step": 148540 }, { "epoch": 1.64, "learning_rate": 2.2586699441934628e-05, "loss": 0.6181, "step": 148545 }, { "epoch": 1.64, "learning_rate": 2.2585776714796113e-05, "loss": 0.5569, "step": 148550 }, { "epoch": 1.64, "learning_rate": 2.2584853987657604e-05, "loss": 0.6158, "step": 148555 }, { "epoch": 1.64, "learning_rate": 2.2583931260519088e-05, "loss": 0.6273, "step": 148560 }, { "epoch": 1.65, "learning_rate": 2.258300853338058e-05, "loss": 0.5969, "step": 148565 }, { "epoch": 1.65, "learning_rate": 2.2582085806242064e-05, "loss": 0.6125, "step": 148570 }, { "epoch": 1.65, "learning_rate": 2.2581163079103555e-05, "loss": 0.6023, "step": 148575 }, { "epoch": 1.65, "learning_rate": 2.258024035196504e-05, "loss": 0.6139, "step": 148580 }, { "epoch": 1.65, "learning_rate": 2.257931762482653e-05, "loss": 0.6005, "step": 148585 }, { "epoch": 1.65, "learning_rate": 2.2578394897688015e-05, "loss": 0.6284, "step": 148590 }, { "epoch": 1.65, "learning_rate": 2.2577472170549503e-05, "loss": 0.5898, "step": 148595 }, { "epoch": 1.65, "learning_rate": 2.257654944341099e-05, "loss": 0.632, "step": 148600 }, { "epoch": 1.65, "learning_rate": 2.257562671627248e-05, "loss": 0.6285, "step": 148605 }, { "epoch": 1.65, "learning_rate": 2.2574703989133967e-05, "loss": 0.6243, "step": 148610 }, { "epoch": 1.65, "learning_rate": 2.257378126199545e-05, "loss": 0.5728, "step": 148615 }, { "epoch": 1.65, "learning_rate": 2.2572858534856942e-05, "loss": 0.6274, "step": 148620 }, { "epoch": 1.65, "learning_rate": 2.2571935807718427e-05, "loss": 0.6363, "step": 148625 }, { "epoch": 1.65, "learning_rate": 2.2571013080579918e-05, "loss": 0.6734, "step": 148630 }, { "epoch": 1.65, "learning_rate": 2.2570090353441403e-05, "loss": 0.6149, "step": 148635 }, { "epoch": 1.65, "learning_rate": 2.2569167626302894e-05, "loss": 0.6097, "step": 148640 }, { "epoch": 1.65, "learning_rate": 2.2568244899164378e-05, "loss": 0.6518, "step": 148645 }, { "epoch": 1.65, "learning_rate": 2.2567322172025866e-05, "loss": 0.6065, "step": 148650 }, { "epoch": 1.65, "learning_rate": 2.2566399444887354e-05, "loss": 0.643, "step": 148655 }, { "epoch": 1.65, "learning_rate": 2.2565476717748842e-05, "loss": 0.6419, "step": 148660 }, { "epoch": 1.65, "learning_rate": 2.256455399061033e-05, "loss": 0.5693, "step": 148665 }, { "epoch": 1.65, "learning_rate": 2.2563631263471818e-05, "loss": 0.6424, "step": 148670 }, { "epoch": 1.65, "learning_rate": 2.2562708536333305e-05, "loss": 0.6531, "step": 148675 }, { "epoch": 1.65, "learning_rate": 2.2561785809194793e-05, "loss": 0.6197, "step": 148680 }, { "epoch": 1.65, "learning_rate": 2.256086308205628e-05, "loss": 0.6017, "step": 148685 }, { "epoch": 1.65, "learning_rate": 2.2559940354917766e-05, "loss": 0.6548, "step": 148690 }, { "epoch": 1.65, "learning_rate": 2.2559017627779257e-05, "loss": 0.6356, "step": 148695 }, { "epoch": 1.65, "learning_rate": 2.255809490064074e-05, "loss": 0.6448, "step": 148700 }, { "epoch": 1.65, "learning_rate": 2.255717217350223e-05, "loss": 0.5968, "step": 148705 }, { "epoch": 1.65, "learning_rate": 2.2556249446363717e-05, "loss": 0.6534, "step": 148710 }, { "epoch": 1.65, "learning_rate": 2.2555326719225205e-05, "loss": 0.6133, "step": 148715 }, { "epoch": 1.65, "learning_rate": 2.2554403992086693e-05, "loss": 0.6201, "step": 148720 }, { "epoch": 1.65, "learning_rate": 2.255348126494818e-05, "loss": 0.6388, "step": 148725 }, { "epoch": 1.65, "learning_rate": 2.255255853780967e-05, "loss": 0.6477, "step": 148730 }, { "epoch": 1.65, "learning_rate": 2.2551635810671156e-05, "loss": 0.6406, "step": 148735 }, { "epoch": 1.65, "learning_rate": 2.2550713083532644e-05, "loss": 0.677, "step": 148740 }, { "epoch": 1.65, "learning_rate": 2.2549790356394132e-05, "loss": 0.6533, "step": 148745 }, { "epoch": 1.65, "learning_rate": 2.254886762925562e-05, "loss": 0.6608, "step": 148750 }, { "epoch": 1.65, "learning_rate": 2.2547944902117108e-05, "loss": 0.6123, "step": 148755 }, { "epoch": 1.65, "learning_rate": 2.2547022174978592e-05, "loss": 0.6577, "step": 148760 }, { "epoch": 1.65, "learning_rate": 2.2546099447840083e-05, "loss": 0.5638, "step": 148765 }, { "epoch": 1.65, "learning_rate": 2.2545176720701568e-05, "loss": 0.5913, "step": 148770 }, { "epoch": 1.65, "learning_rate": 2.2544253993563056e-05, "loss": 0.6024, "step": 148775 }, { "epoch": 1.65, "learning_rate": 2.2543331266424543e-05, "loss": 0.623, "step": 148780 }, { "epoch": 1.65, "learning_rate": 2.254240853928603e-05, "loss": 0.6432, "step": 148785 }, { "epoch": 1.65, "learning_rate": 2.254148581214752e-05, "loss": 0.6337, "step": 148790 }, { "epoch": 1.65, "learning_rate": 2.2540563085009007e-05, "loss": 0.5535, "step": 148795 }, { "epoch": 1.65, "learning_rate": 2.2539640357870495e-05, "loss": 0.6496, "step": 148800 }, { "epoch": 1.65, "learning_rate": 2.2538717630731983e-05, "loss": 0.6082, "step": 148805 }, { "epoch": 1.65, "learning_rate": 2.253779490359347e-05, "loss": 0.6423, "step": 148810 }, { "epoch": 1.65, "learning_rate": 2.253687217645496e-05, "loss": 0.6163, "step": 148815 }, { "epoch": 1.65, "learning_rate": 2.2535949449316446e-05, "loss": 0.6593, "step": 148820 }, { "epoch": 1.65, "learning_rate": 2.253502672217793e-05, "loss": 0.6313, "step": 148825 }, { "epoch": 1.65, "learning_rate": 2.2534103995039422e-05, "loss": 0.6061, "step": 148830 }, { "epoch": 1.65, "learning_rate": 2.2533181267900906e-05, "loss": 0.6239, "step": 148835 }, { "epoch": 1.65, "learning_rate": 2.2532258540762398e-05, "loss": 0.6035, "step": 148840 }, { "epoch": 1.65, "learning_rate": 2.2531335813623882e-05, "loss": 0.6309, "step": 148845 }, { "epoch": 1.65, "learning_rate": 2.253041308648537e-05, "loss": 0.6319, "step": 148850 }, { "epoch": 1.65, "learning_rate": 2.2529490359346858e-05, "loss": 0.6064, "step": 148855 }, { "epoch": 1.65, "learning_rate": 2.2528567632208346e-05, "loss": 0.61, "step": 148860 }, { "epoch": 1.65, "learning_rate": 2.2527644905069833e-05, "loss": 0.6469, "step": 148865 }, { "epoch": 1.65, "learning_rate": 2.252672217793132e-05, "loss": 0.5995, "step": 148870 }, { "epoch": 1.65, "learning_rate": 2.252579945079281e-05, "loss": 0.7116, "step": 148875 }, { "epoch": 1.65, "learning_rate": 2.2524876723654294e-05, "loss": 0.6307, "step": 148880 }, { "epoch": 1.65, "learning_rate": 2.2523953996515785e-05, "loss": 0.5448, "step": 148885 }, { "epoch": 1.65, "learning_rate": 2.252303126937727e-05, "loss": 0.5333, "step": 148890 }, { "epoch": 1.65, "learning_rate": 2.252210854223876e-05, "loss": 0.625, "step": 148895 }, { "epoch": 1.65, "learning_rate": 2.2521185815100245e-05, "loss": 0.7094, "step": 148900 }, { "epoch": 1.65, "learning_rate": 2.2520263087961736e-05, "loss": 0.6161, "step": 148905 }, { "epoch": 1.65, "learning_rate": 2.251934036082322e-05, "loss": 0.5998, "step": 148910 }, { "epoch": 1.65, "learning_rate": 2.251841763368471e-05, "loss": 0.6966, "step": 148915 }, { "epoch": 1.65, "learning_rate": 2.2517494906546196e-05, "loss": 0.6967, "step": 148920 }, { "epoch": 1.65, "learning_rate": 2.2516572179407684e-05, "loss": 0.6297, "step": 148925 }, { "epoch": 1.65, "learning_rate": 2.2515649452269172e-05, "loss": 0.6493, "step": 148930 }, { "epoch": 1.65, "learning_rate": 2.2514726725130657e-05, "loss": 0.658, "step": 148935 }, { "epoch": 1.65, "learning_rate": 2.2513803997992148e-05, "loss": 0.6625, "step": 148940 }, { "epoch": 1.65, "learning_rate": 2.2512881270853632e-05, "loss": 0.6049, "step": 148945 }, { "epoch": 1.65, "learning_rate": 2.2511958543715124e-05, "loss": 0.6336, "step": 148950 }, { "epoch": 1.65, "learning_rate": 2.2511035816576608e-05, "loss": 0.5995, "step": 148955 }, { "epoch": 1.65, "learning_rate": 2.25101130894381e-05, "loss": 0.6363, "step": 148960 }, { "epoch": 1.65, "learning_rate": 2.2509190362299584e-05, "loss": 0.6231, "step": 148965 }, { "epoch": 1.65, "learning_rate": 2.2508267635161075e-05, "loss": 0.6241, "step": 148970 }, { "epoch": 1.65, "learning_rate": 2.250734490802256e-05, "loss": 0.6575, "step": 148975 }, { "epoch": 1.65, "learning_rate": 2.2506422180884047e-05, "loss": 0.6474, "step": 148980 }, { "epoch": 1.65, "learning_rate": 2.2505499453745535e-05, "loss": 0.6598, "step": 148985 }, { "epoch": 1.65, "learning_rate": 2.2504576726607023e-05, "loss": 0.6519, "step": 148990 }, { "epoch": 1.65, "learning_rate": 2.250365399946851e-05, "loss": 0.6599, "step": 148995 }, { "epoch": 1.65, "learning_rate": 2.2502731272329995e-05, "loss": 0.658, "step": 149000 }, { "epoch": 1.65, "eval_loss": 0.6115843653678894, "eval_runtime": 69.1015, "eval_samples_per_second": 28.943, "eval_steps_per_second": 14.471, "step": 149000 }, { "epoch": 1.65, "learning_rate": 2.2501808545191486e-05, "loss": 0.6121, "step": 149005 }, { "epoch": 1.65, "learning_rate": 2.250088581805297e-05, "loss": 0.622, "step": 149010 }, { "epoch": 1.65, "learning_rate": 2.2499963090914462e-05, "loss": 0.6121, "step": 149015 }, { "epoch": 1.65, "learning_rate": 2.2499040363775947e-05, "loss": 0.6362, "step": 149020 }, { "epoch": 1.65, "learning_rate": 2.2498117636637438e-05, "loss": 0.7015, "step": 149025 }, { "epoch": 1.65, "learning_rate": 2.2497194909498922e-05, "loss": 0.6444, "step": 149030 }, { "epoch": 1.65, "learning_rate": 2.249627218236041e-05, "loss": 0.6598, "step": 149035 }, { "epoch": 1.65, "learning_rate": 2.2495349455221898e-05, "loss": 0.5979, "step": 149040 }, { "epoch": 1.65, "learning_rate": 2.2494426728083386e-05, "loss": 0.6434, "step": 149045 }, { "epoch": 1.65, "learning_rate": 2.2493504000944874e-05, "loss": 0.6167, "step": 149050 }, { "epoch": 1.65, "learning_rate": 2.249258127380636e-05, "loss": 0.5987, "step": 149055 }, { "epoch": 1.65, "learning_rate": 2.249165854666785e-05, "loss": 0.5973, "step": 149060 }, { "epoch": 1.65, "learning_rate": 2.2490735819529337e-05, "loss": 0.6367, "step": 149065 }, { "epoch": 1.65, "learning_rate": 2.2489813092390825e-05, "loss": 0.6336, "step": 149070 }, { "epoch": 1.65, "learning_rate": 2.248889036525231e-05, "loss": 0.666, "step": 149075 }, { "epoch": 1.65, "learning_rate": 2.24879676381138e-05, "loss": 0.6056, "step": 149080 }, { "epoch": 1.65, "learning_rate": 2.2487044910975285e-05, "loss": 0.5898, "step": 149085 }, { "epoch": 1.65, "learning_rate": 2.2486122183836773e-05, "loss": 0.6192, "step": 149090 }, { "epoch": 1.65, "learning_rate": 2.248519945669826e-05, "loss": 0.6366, "step": 149095 }, { "epoch": 1.65, "learning_rate": 2.248427672955975e-05, "loss": 0.6948, "step": 149100 }, { "epoch": 1.65, "learning_rate": 2.2483354002421237e-05, "loss": 0.6521, "step": 149105 }, { "epoch": 1.65, "learning_rate": 2.2482431275282724e-05, "loss": 0.71, "step": 149110 }, { "epoch": 1.65, "learning_rate": 2.2481508548144212e-05, "loss": 0.6345, "step": 149115 }, { "epoch": 1.65, "learning_rate": 2.24805858210057e-05, "loss": 0.6424, "step": 149120 }, { "epoch": 1.65, "learning_rate": 2.2479663093867188e-05, "loss": 0.6444, "step": 149125 }, { "epoch": 1.65, "learning_rate": 2.2478740366728676e-05, "loss": 0.6308, "step": 149130 }, { "epoch": 1.65, "learning_rate": 2.2477817639590164e-05, "loss": 0.666, "step": 149135 }, { "epoch": 1.65, "learning_rate": 2.247689491245165e-05, "loss": 0.6158, "step": 149140 }, { "epoch": 1.65, "learning_rate": 2.247597218531314e-05, "loss": 0.6006, "step": 149145 }, { "epoch": 1.65, "learning_rate": 2.2475049458174624e-05, "loss": 0.6146, "step": 149150 }, { "epoch": 1.65, "learning_rate": 2.2474126731036112e-05, "loss": 0.6339, "step": 149155 }, { "epoch": 1.65, "learning_rate": 2.24732040038976e-05, "loss": 0.6241, "step": 149160 }, { "epoch": 1.65, "learning_rate": 2.2472281276759087e-05, "loss": 0.5986, "step": 149165 }, { "epoch": 1.65, "learning_rate": 2.2471358549620575e-05, "loss": 0.6367, "step": 149170 }, { "epoch": 1.65, "learning_rate": 2.2470435822482063e-05, "loss": 0.6463, "step": 149175 }, { "epoch": 1.65, "learning_rate": 2.246951309534355e-05, "loss": 0.6002, "step": 149180 }, { "epoch": 1.65, "learning_rate": 2.246859036820504e-05, "loss": 0.6407, "step": 149185 }, { "epoch": 1.65, "learning_rate": 2.2467667641066527e-05, "loss": 0.6465, "step": 149190 }, { "epoch": 1.65, "learning_rate": 2.2466744913928015e-05, "loss": 0.628, "step": 149195 }, { "epoch": 1.65, "learning_rate": 2.2465822186789502e-05, "loss": 0.6452, "step": 149200 }, { "epoch": 1.65, "learning_rate": 2.246489945965099e-05, "loss": 0.6236, "step": 149205 }, { "epoch": 1.65, "learning_rate": 2.2463976732512475e-05, "loss": 0.6642, "step": 149210 }, { "epoch": 1.65, "learning_rate": 2.2463054005373966e-05, "loss": 0.6205, "step": 149215 }, { "epoch": 1.65, "learning_rate": 2.246213127823545e-05, "loss": 0.6299, "step": 149220 }, { "epoch": 1.65, "learning_rate": 2.2461208551096938e-05, "loss": 0.6292, "step": 149225 }, { "epoch": 1.65, "learning_rate": 2.2460285823958426e-05, "loss": 0.652, "step": 149230 }, { "epoch": 1.65, "learning_rate": 2.2459363096819914e-05, "loss": 0.6448, "step": 149235 }, { "epoch": 1.65, "learning_rate": 2.2458440369681402e-05, "loss": 0.6828, "step": 149240 }, { "epoch": 1.65, "learning_rate": 2.245751764254289e-05, "loss": 0.6822, "step": 149245 }, { "epoch": 1.65, "learning_rate": 2.2456594915404377e-05, "loss": 0.6447, "step": 149250 }, { "epoch": 1.65, "learning_rate": 2.2455672188265865e-05, "loss": 0.6185, "step": 149255 }, { "epoch": 1.65, "learning_rate": 2.2454749461127353e-05, "loss": 0.6467, "step": 149260 }, { "epoch": 1.65, "learning_rate": 2.2453826733988838e-05, "loss": 0.6486, "step": 149265 }, { "epoch": 1.65, "learning_rate": 2.245290400685033e-05, "loss": 0.6404, "step": 149270 }, { "epoch": 1.65, "learning_rate": 2.2451981279711813e-05, "loss": 0.6425, "step": 149275 }, { "epoch": 1.65, "learning_rate": 2.2451058552573305e-05, "loss": 0.6598, "step": 149280 }, { "epoch": 1.65, "learning_rate": 2.245013582543479e-05, "loss": 0.6098, "step": 149285 }, { "epoch": 1.65, "learning_rate": 2.244921309829628e-05, "loss": 0.6017, "step": 149290 }, { "epoch": 1.65, "learning_rate": 2.2448290371157765e-05, "loss": 0.6067, "step": 149295 }, { "epoch": 1.65, "learning_rate": 2.2447367644019253e-05, "loss": 0.6536, "step": 149300 }, { "epoch": 1.65, "learning_rate": 2.244644491688074e-05, "loss": 0.6395, "step": 149305 }, { "epoch": 1.65, "learning_rate": 2.2445522189742228e-05, "loss": 0.6417, "step": 149310 }, { "epoch": 1.65, "learning_rate": 2.2444599462603716e-05, "loss": 0.6025, "step": 149315 }, { "epoch": 1.65, "learning_rate": 2.24436767354652e-05, "loss": 0.6631, "step": 149320 }, { "epoch": 1.65, "learning_rate": 2.2442754008326692e-05, "loss": 0.6347, "step": 149325 }, { "epoch": 1.65, "learning_rate": 2.2441831281188176e-05, "loss": 0.6377, "step": 149330 }, { "epoch": 1.65, "learning_rate": 2.2440908554049668e-05, "loss": 0.6247, "step": 149335 }, { "epoch": 1.65, "learning_rate": 2.2439985826911152e-05, "loss": 0.6101, "step": 149340 }, { "epoch": 1.65, "learning_rate": 2.2439063099772643e-05, "loss": 0.6181, "step": 149345 }, { "epoch": 1.65, "learning_rate": 2.2438140372634128e-05, "loss": 0.6607, "step": 149350 }, { "epoch": 1.65, "learning_rate": 2.243721764549562e-05, "loss": 0.6913, "step": 149355 }, { "epoch": 1.65, "learning_rate": 2.2436294918357103e-05, "loss": 0.6535, "step": 149360 }, { "epoch": 1.65, "learning_rate": 2.243537219121859e-05, "loss": 0.6661, "step": 149365 }, { "epoch": 1.65, "learning_rate": 2.243444946408008e-05, "loss": 0.6289, "step": 149370 }, { "epoch": 1.65, "learning_rate": 2.2433526736941567e-05, "loss": 0.6404, "step": 149375 }, { "epoch": 1.65, "learning_rate": 2.2432604009803055e-05, "loss": 0.6691, "step": 149380 }, { "epoch": 1.65, "learning_rate": 2.243168128266454e-05, "loss": 0.6421, "step": 149385 }, { "epoch": 1.65, "learning_rate": 2.243075855552603e-05, "loss": 0.6206, "step": 149390 }, { "epoch": 1.65, "learning_rate": 2.2429835828387515e-05, "loss": 0.6706, "step": 149395 }, { "epoch": 1.65, "learning_rate": 2.2428913101249006e-05, "loss": 0.6331, "step": 149400 }, { "epoch": 1.65, "learning_rate": 2.242799037411049e-05, "loss": 0.6051, "step": 149405 }, { "epoch": 1.65, "learning_rate": 2.2427067646971982e-05, "loss": 0.647, "step": 149410 }, { "epoch": 1.65, "learning_rate": 2.2426144919833466e-05, "loss": 0.6503, "step": 149415 }, { "epoch": 1.65, "learning_rate": 2.2425222192694954e-05, "loss": 0.6199, "step": 149420 }, { "epoch": 1.65, "learning_rate": 2.2424299465556442e-05, "loss": 0.6542, "step": 149425 }, { "epoch": 1.65, "learning_rate": 2.242337673841793e-05, "loss": 0.6542, "step": 149430 }, { "epoch": 1.65, "learning_rate": 2.2422454011279418e-05, "loss": 0.6195, "step": 149435 }, { "epoch": 1.65, "learning_rate": 2.2421531284140906e-05, "loss": 0.6348, "step": 149440 }, { "epoch": 1.65, "learning_rate": 2.2420608557002393e-05, "loss": 0.608, "step": 149445 }, { "epoch": 1.65, "learning_rate": 2.2419685829863878e-05, "loss": 0.6153, "step": 149450 }, { "epoch": 1.65, "learning_rate": 2.241876310272537e-05, "loss": 0.6382, "step": 149455 }, { "epoch": 1.65, "learning_rate": 2.2417840375586854e-05, "loss": 0.6342, "step": 149460 }, { "epoch": 1.65, "learning_rate": 2.2416917648448345e-05, "loss": 0.6567, "step": 149465 }, { "epoch": 1.66, "learning_rate": 2.241599492130983e-05, "loss": 0.6388, "step": 149470 }, { "epoch": 1.66, "learning_rate": 2.2415072194171317e-05, "loss": 0.5809, "step": 149475 }, { "epoch": 1.66, "learning_rate": 2.2414149467032805e-05, "loss": 0.6516, "step": 149480 }, { "epoch": 1.66, "learning_rate": 2.2413226739894293e-05, "loss": 0.6418, "step": 149485 }, { "epoch": 1.66, "learning_rate": 2.241230401275578e-05, "loss": 0.6797, "step": 149490 }, { "epoch": 1.66, "learning_rate": 2.241138128561727e-05, "loss": 0.6498, "step": 149495 }, { "epoch": 1.66, "learning_rate": 2.2410458558478756e-05, "loss": 0.6122, "step": 149500 }, { "epoch": 1.66, "learning_rate": 2.2409535831340244e-05, "loss": 0.6159, "step": 149505 }, { "epoch": 1.66, "learning_rate": 2.2408613104201732e-05, "loss": 0.6117, "step": 149510 }, { "epoch": 1.66, "learning_rate": 2.240769037706322e-05, "loss": 0.6315, "step": 149515 }, { "epoch": 1.66, "learning_rate": 2.2406767649924708e-05, "loss": 0.623, "step": 149520 }, { "epoch": 1.66, "learning_rate": 2.2405844922786192e-05, "loss": 0.6372, "step": 149525 }, { "epoch": 1.66, "learning_rate": 2.2404922195647683e-05, "loss": 0.6228, "step": 149530 }, { "epoch": 1.66, "learning_rate": 2.2403999468509168e-05, "loss": 0.6294, "step": 149535 }, { "epoch": 1.66, "learning_rate": 2.2403076741370656e-05, "loss": 0.6121, "step": 149540 }, { "epoch": 1.66, "learning_rate": 2.2402154014232144e-05, "loss": 0.6404, "step": 149545 }, { "epoch": 1.66, "learning_rate": 2.240123128709363e-05, "loss": 0.6479, "step": 149550 }, { "epoch": 1.66, "learning_rate": 2.240030855995512e-05, "loss": 0.6394, "step": 149555 }, { "epoch": 1.66, "learning_rate": 2.2399385832816607e-05, "loss": 0.6167, "step": 149560 }, { "epoch": 1.66, "learning_rate": 2.2398463105678095e-05, "loss": 0.6876, "step": 149565 }, { "epoch": 1.66, "learning_rate": 2.2397540378539583e-05, "loss": 0.573, "step": 149570 }, { "epoch": 1.66, "learning_rate": 2.239661765140107e-05, "loss": 0.5946, "step": 149575 }, { "epoch": 1.66, "learning_rate": 2.239569492426256e-05, "loss": 0.5519, "step": 149580 }, { "epoch": 1.66, "learning_rate": 2.2394772197124046e-05, "loss": 0.6499, "step": 149585 }, { "epoch": 1.66, "learning_rate": 2.2393849469985534e-05, "loss": 0.6465, "step": 149590 }, { "epoch": 1.66, "learning_rate": 2.239292674284702e-05, "loss": 0.6532, "step": 149595 }, { "epoch": 1.66, "learning_rate": 2.239200401570851e-05, "loss": 0.6555, "step": 149600 }, { "epoch": 1.66, "learning_rate": 2.2391081288569994e-05, "loss": 0.6318, "step": 149605 }, { "epoch": 1.66, "learning_rate": 2.2390158561431482e-05, "loss": 0.6427, "step": 149610 }, { "epoch": 1.66, "learning_rate": 2.238923583429297e-05, "loss": 0.6202, "step": 149615 }, { "epoch": 1.66, "learning_rate": 2.2388313107154458e-05, "loss": 0.6591, "step": 149620 }, { "epoch": 1.66, "learning_rate": 2.2387390380015946e-05, "loss": 0.6633, "step": 149625 }, { "epoch": 1.66, "learning_rate": 2.2386467652877434e-05, "loss": 0.6741, "step": 149630 }, { "epoch": 1.66, "learning_rate": 2.238554492573892e-05, "loss": 0.6619, "step": 149635 }, { "epoch": 1.66, "learning_rate": 2.238462219860041e-05, "loss": 0.6816, "step": 149640 }, { "epoch": 1.66, "learning_rate": 2.2383699471461897e-05, "loss": 0.6223, "step": 149645 }, { "epoch": 1.66, "learning_rate": 2.238277674432338e-05, "loss": 0.6238, "step": 149650 }, { "epoch": 1.66, "learning_rate": 2.2381854017184873e-05, "loss": 0.6287, "step": 149655 }, { "epoch": 1.66, "learning_rate": 2.2380931290046357e-05, "loss": 0.6513, "step": 149660 }, { "epoch": 1.66, "learning_rate": 2.238000856290785e-05, "loss": 0.5881, "step": 149665 }, { "epoch": 1.66, "learning_rate": 2.2379085835769333e-05, "loss": 0.5768, "step": 149670 }, { "epoch": 1.66, "learning_rate": 2.2378163108630824e-05, "loss": 0.6309, "step": 149675 }, { "epoch": 1.66, "learning_rate": 2.237724038149231e-05, "loss": 0.5973, "step": 149680 }, { "epoch": 1.66, "learning_rate": 2.2376317654353797e-05, "loss": 0.6198, "step": 149685 }, { "epoch": 1.66, "learning_rate": 2.2375394927215284e-05, "loss": 0.6081, "step": 149690 }, { "epoch": 1.66, "learning_rate": 2.2374472200076772e-05, "loss": 0.6355, "step": 149695 }, { "epoch": 1.66, "learning_rate": 2.237354947293826e-05, "loss": 0.659, "step": 149700 }, { "epoch": 1.66, "learning_rate": 2.2372626745799745e-05, "loss": 0.6908, "step": 149705 }, { "epoch": 1.66, "learning_rate": 2.2371704018661236e-05, "loss": 0.6051, "step": 149710 }, { "epoch": 1.66, "learning_rate": 2.237078129152272e-05, "loss": 0.6609, "step": 149715 }, { "epoch": 1.66, "learning_rate": 2.236985856438421e-05, "loss": 0.6495, "step": 149720 }, { "epoch": 1.66, "learning_rate": 2.2368935837245696e-05, "loss": 0.6203, "step": 149725 }, { "epoch": 1.66, "learning_rate": 2.2368013110107187e-05, "loss": 0.653, "step": 149730 }, { "epoch": 1.66, "learning_rate": 2.2367090382968672e-05, "loss": 0.652, "step": 149735 }, { "epoch": 1.66, "learning_rate": 2.2366167655830163e-05, "loss": 0.6005, "step": 149740 }, { "epoch": 1.66, "learning_rate": 2.2365244928691647e-05, "loss": 0.6485, "step": 149745 }, { "epoch": 1.66, "learning_rate": 2.2364322201553135e-05, "loss": 0.6245, "step": 149750 }, { "epoch": 1.66, "learning_rate": 2.2363399474414623e-05, "loss": 0.6838, "step": 149755 }, { "epoch": 1.66, "learning_rate": 2.236247674727611e-05, "loss": 0.6364, "step": 149760 }, { "epoch": 1.66, "learning_rate": 2.23615540201376e-05, "loss": 0.655, "step": 149765 }, { "epoch": 1.66, "learning_rate": 2.2360631292999083e-05, "loss": 0.5766, "step": 149770 }, { "epoch": 1.66, "learning_rate": 2.2359708565860574e-05, "loss": 0.5988, "step": 149775 }, { "epoch": 1.66, "learning_rate": 2.235878583872206e-05, "loss": 0.6586, "step": 149780 }, { "epoch": 1.66, "learning_rate": 2.235786311158355e-05, "loss": 0.7105, "step": 149785 }, { "epoch": 1.66, "learning_rate": 2.2356940384445035e-05, "loss": 0.6233, "step": 149790 }, { "epoch": 1.66, "learning_rate": 2.2356017657306526e-05, "loss": 0.61, "step": 149795 }, { "epoch": 1.66, "learning_rate": 2.235509493016801e-05, "loss": 0.5915, "step": 149800 }, { "epoch": 1.66, "learning_rate": 2.2354172203029498e-05, "loss": 0.6601, "step": 149805 }, { "epoch": 1.66, "learning_rate": 2.2353249475890986e-05, "loss": 0.6379, "step": 149810 }, { "epoch": 1.66, "learning_rate": 2.2352326748752474e-05, "loss": 0.5816, "step": 149815 }, { "epoch": 1.66, "learning_rate": 2.2351404021613962e-05, "loss": 0.6379, "step": 149820 }, { "epoch": 1.66, "learning_rate": 2.235048129447545e-05, "loss": 0.6749, "step": 149825 }, { "epoch": 1.66, "learning_rate": 2.2349558567336937e-05, "loss": 0.6389, "step": 149830 }, { "epoch": 1.66, "learning_rate": 2.2348635840198422e-05, "loss": 0.5941, "step": 149835 }, { "epoch": 1.66, "learning_rate": 2.2347713113059913e-05, "loss": 0.5767, "step": 149840 }, { "epoch": 1.66, "learning_rate": 2.2346790385921398e-05, "loss": 0.6832, "step": 149845 }, { "epoch": 1.66, "learning_rate": 2.234586765878289e-05, "loss": 0.6653, "step": 149850 }, { "epoch": 1.66, "learning_rate": 2.2344944931644373e-05, "loss": 0.6384, "step": 149855 }, { "epoch": 1.66, "learning_rate": 2.234402220450586e-05, "loss": 0.6742, "step": 149860 }, { "epoch": 1.66, "learning_rate": 2.234309947736735e-05, "loss": 0.6722, "step": 149865 }, { "epoch": 1.66, "learning_rate": 2.2342176750228837e-05, "loss": 0.5888, "step": 149870 }, { "epoch": 1.66, "learning_rate": 2.2341254023090325e-05, "loss": 0.6498, "step": 149875 }, { "epoch": 1.66, "learning_rate": 2.2340331295951813e-05, "loss": 0.6538, "step": 149880 }, { "epoch": 1.66, "learning_rate": 2.23394085688133e-05, "loss": 0.6674, "step": 149885 }, { "epoch": 1.66, "learning_rate": 2.2338485841674788e-05, "loss": 0.6726, "step": 149890 }, { "epoch": 1.66, "learning_rate": 2.2337563114536276e-05, "loss": 0.6308, "step": 149895 }, { "epoch": 1.66, "learning_rate": 2.2336640387397764e-05, "loss": 0.6314, "step": 149900 }, { "epoch": 1.66, "learning_rate": 2.2335717660259252e-05, "loss": 0.6093, "step": 149905 }, { "epoch": 1.66, "learning_rate": 2.2334794933120736e-05, "loss": 0.6298, "step": 149910 }, { "epoch": 1.66, "learning_rate": 2.2333872205982227e-05, "loss": 0.5971, "step": 149915 }, { "epoch": 1.66, "learning_rate": 2.2332949478843712e-05, "loss": 0.6556, "step": 149920 }, { "epoch": 1.66, "learning_rate": 2.23320267517052e-05, "loss": 0.6711, "step": 149925 }, { "epoch": 1.66, "learning_rate": 2.2331104024566688e-05, "loss": 0.6233, "step": 149930 }, { "epoch": 1.66, "learning_rate": 2.2330181297428175e-05, "loss": 0.6297, "step": 149935 }, { "epoch": 1.66, "learning_rate": 2.2329258570289663e-05, "loss": 0.6204, "step": 149940 }, { "epoch": 1.66, "learning_rate": 2.232833584315115e-05, "loss": 0.6286, "step": 149945 }, { "epoch": 1.66, "learning_rate": 2.232741311601264e-05, "loss": 0.6663, "step": 149950 }, { "epoch": 1.66, "learning_rate": 2.2326490388874127e-05, "loss": 0.6349, "step": 149955 }, { "epoch": 1.66, "learning_rate": 2.2325567661735615e-05, "loss": 0.6683, "step": 149960 }, { "epoch": 1.66, "learning_rate": 2.2324644934597103e-05, "loss": 0.6076, "step": 149965 }, { "epoch": 1.66, "learning_rate": 2.232372220745859e-05, "loss": 0.6431, "step": 149970 }, { "epoch": 1.66, "learning_rate": 2.2322799480320078e-05, "loss": 0.6169, "step": 149975 }, { "epoch": 1.66, "learning_rate": 2.2321876753181563e-05, "loss": 0.6557, "step": 149980 }, { "epoch": 1.66, "learning_rate": 2.232095402604305e-05, "loss": 0.6219, "step": 149985 }, { "epoch": 1.66, "learning_rate": 2.232003129890454e-05, "loss": 0.556, "step": 149990 }, { "epoch": 1.66, "learning_rate": 2.2319108571766026e-05, "loss": 0.6781, "step": 149995 }, { "epoch": 1.66, "learning_rate": 2.2318185844627514e-05, "loss": 0.6567, "step": 150000 }, { "epoch": 1.66, "eval_loss": 0.5938353538513184, "eval_runtime": 69.223, "eval_samples_per_second": 28.892, "eval_steps_per_second": 14.446, "step": 150000 }, { "epoch": 1.66, "learning_rate": 2.2317263117489002e-05, "loss": 0.6526, "step": 150005 }, { "epoch": 1.66, "learning_rate": 2.231634039035049e-05, "loss": 0.6175, "step": 150010 }, { "epoch": 1.66, "learning_rate": 2.2315417663211978e-05, "loss": 0.6163, "step": 150015 }, { "epoch": 1.66, "learning_rate": 2.2314494936073466e-05, "loss": 0.6538, "step": 150020 }, { "epoch": 1.66, "learning_rate": 2.2313572208934953e-05, "loss": 0.6702, "step": 150025 }, { "epoch": 1.66, "learning_rate": 2.231264948179644e-05, "loss": 0.6194, "step": 150030 }, { "epoch": 1.66, "learning_rate": 2.2311726754657926e-05, "loss": 0.599, "step": 150035 }, { "epoch": 1.66, "learning_rate": 2.2310804027519417e-05, "loss": 0.6248, "step": 150040 }, { "epoch": 1.66, "learning_rate": 2.23098813003809e-05, "loss": 0.6848, "step": 150045 }, { "epoch": 1.66, "learning_rate": 2.2308958573242393e-05, "loss": 0.656, "step": 150050 }, { "epoch": 1.66, "learning_rate": 2.2308035846103877e-05, "loss": 0.6089, "step": 150055 }, { "epoch": 1.66, "learning_rate": 2.2307113118965365e-05, "loss": 0.6124, "step": 150060 }, { "epoch": 1.66, "learning_rate": 2.2306190391826853e-05, "loss": 0.5948, "step": 150065 }, { "epoch": 1.66, "learning_rate": 2.230526766468834e-05, "loss": 0.6421, "step": 150070 }, { "epoch": 1.66, "learning_rate": 2.230434493754983e-05, "loss": 0.6589, "step": 150075 }, { "epoch": 1.66, "learning_rate": 2.2303422210411316e-05, "loss": 0.6299, "step": 150080 }, { "epoch": 1.66, "learning_rate": 2.2302499483272804e-05, "loss": 0.6527, "step": 150085 }, { "epoch": 1.66, "learning_rate": 2.230157675613429e-05, "loss": 0.6542, "step": 150090 }, { "epoch": 1.66, "learning_rate": 2.230065402899578e-05, "loss": 0.611, "step": 150095 }, { "epoch": 1.66, "learning_rate": 2.2299731301857264e-05, "loss": 0.6855, "step": 150100 }, { "epoch": 1.66, "learning_rate": 2.2298808574718756e-05, "loss": 0.6602, "step": 150105 }, { "epoch": 1.66, "learning_rate": 2.229788584758024e-05, "loss": 0.6363, "step": 150110 }, { "epoch": 1.66, "learning_rate": 2.229696312044173e-05, "loss": 0.6034, "step": 150115 }, { "epoch": 1.66, "learning_rate": 2.2296040393303216e-05, "loss": 0.633, "step": 150120 }, { "epoch": 1.66, "learning_rate": 2.2295117666164707e-05, "loss": 0.6117, "step": 150125 }, { "epoch": 1.66, "learning_rate": 2.229419493902619e-05, "loss": 0.606, "step": 150130 }, { "epoch": 1.66, "learning_rate": 2.229327221188768e-05, "loss": 0.6814, "step": 150135 }, { "epoch": 1.66, "learning_rate": 2.2292349484749167e-05, "loss": 0.615, "step": 150140 }, { "epoch": 1.66, "learning_rate": 2.2291426757610655e-05, "loss": 0.6221, "step": 150145 }, { "epoch": 1.66, "learning_rate": 2.2290504030472143e-05, "loss": 0.6841, "step": 150150 }, { "epoch": 1.66, "learning_rate": 2.2289581303333627e-05, "loss": 0.6626, "step": 150155 }, { "epoch": 1.66, "learning_rate": 2.228865857619512e-05, "loss": 0.5957, "step": 150160 }, { "epoch": 1.66, "learning_rate": 2.2287735849056603e-05, "loss": 0.643, "step": 150165 }, { "epoch": 1.66, "learning_rate": 2.2286813121918094e-05, "loss": 0.6793, "step": 150170 }, { "epoch": 1.66, "learning_rate": 2.228589039477958e-05, "loss": 0.6488, "step": 150175 }, { "epoch": 1.66, "learning_rate": 2.228496766764107e-05, "loss": 0.6696, "step": 150180 }, { "epoch": 1.66, "learning_rate": 2.2284044940502554e-05, "loss": 0.6368, "step": 150185 }, { "epoch": 1.66, "learning_rate": 2.2283122213364042e-05, "loss": 0.6298, "step": 150190 }, { "epoch": 1.66, "learning_rate": 2.228219948622553e-05, "loss": 0.6359, "step": 150195 }, { "epoch": 1.66, "learning_rate": 2.2281276759087018e-05, "loss": 0.6369, "step": 150200 }, { "epoch": 1.66, "learning_rate": 2.2280354031948506e-05, "loss": 0.6133, "step": 150205 }, { "epoch": 1.66, "learning_rate": 2.227943130480999e-05, "loss": 0.6024, "step": 150210 }, { "epoch": 1.66, "learning_rate": 2.227850857767148e-05, "loss": 0.6491, "step": 150215 }, { "epoch": 1.66, "learning_rate": 2.2277585850532966e-05, "loss": 0.6497, "step": 150220 }, { "epoch": 1.66, "learning_rate": 2.2276663123394457e-05, "loss": 0.6656, "step": 150225 }, { "epoch": 1.66, "learning_rate": 2.227574039625594e-05, "loss": 0.6054, "step": 150230 }, { "epoch": 1.66, "learning_rate": 2.2274817669117433e-05, "loss": 0.6021, "step": 150235 }, { "epoch": 1.66, "learning_rate": 2.2273894941978917e-05, "loss": 0.6351, "step": 150240 }, { "epoch": 1.66, "learning_rate": 2.227297221484041e-05, "loss": 0.6329, "step": 150245 }, { "epoch": 1.66, "learning_rate": 2.2272049487701893e-05, "loss": 0.6648, "step": 150250 }, { "epoch": 1.66, "learning_rate": 2.227112676056338e-05, "loss": 0.6825, "step": 150255 }, { "epoch": 1.66, "learning_rate": 2.227020403342487e-05, "loss": 0.6653, "step": 150260 }, { "epoch": 1.66, "learning_rate": 2.2269281306286357e-05, "loss": 0.5964, "step": 150265 }, { "epoch": 1.66, "learning_rate": 2.2268358579147844e-05, "loss": 0.6167, "step": 150270 }, { "epoch": 1.66, "learning_rate": 2.2267435852009332e-05, "loss": 0.6329, "step": 150275 }, { "epoch": 1.66, "learning_rate": 2.226651312487082e-05, "loss": 0.6333, "step": 150280 }, { "epoch": 1.66, "learning_rate": 2.2265590397732305e-05, "loss": 0.5788, "step": 150285 }, { "epoch": 1.66, "learning_rate": 2.2264667670593796e-05, "loss": 0.6279, "step": 150290 }, { "epoch": 1.66, "learning_rate": 2.226374494345528e-05, "loss": 0.6158, "step": 150295 }, { "epoch": 1.66, "learning_rate": 2.226282221631677e-05, "loss": 0.6111, "step": 150300 }, { "epoch": 1.66, "learning_rate": 2.2261899489178256e-05, "loss": 0.6284, "step": 150305 }, { "epoch": 1.66, "learning_rate": 2.2260976762039744e-05, "loss": 0.6444, "step": 150310 }, { "epoch": 1.66, "learning_rate": 2.226005403490123e-05, "loss": 0.6535, "step": 150315 }, { "epoch": 1.66, "learning_rate": 2.225913130776272e-05, "loss": 0.6566, "step": 150320 }, { "epoch": 1.66, "learning_rate": 2.2258208580624207e-05, "loss": 0.6597, "step": 150325 }, { "epoch": 1.66, "learning_rate": 2.2257285853485695e-05, "loss": 0.6798, "step": 150330 }, { "epoch": 1.66, "learning_rate": 2.2256363126347183e-05, "loss": 0.6265, "step": 150335 }, { "epoch": 1.66, "learning_rate": 2.225544039920867e-05, "loss": 0.6685, "step": 150340 }, { "epoch": 1.66, "learning_rate": 2.225451767207016e-05, "loss": 0.6666, "step": 150345 }, { "epoch": 1.66, "learning_rate": 2.2253594944931647e-05, "loss": 0.6963, "step": 150350 }, { "epoch": 1.66, "learning_rate": 2.2252672217793134e-05, "loss": 0.676, "step": 150355 }, { "epoch": 1.66, "learning_rate": 2.225174949065462e-05, "loss": 0.6856, "step": 150360 }, { "epoch": 1.66, "learning_rate": 2.2250826763516107e-05, "loss": 0.6253, "step": 150365 }, { "epoch": 1.67, "learning_rate": 2.2249904036377595e-05, "loss": 0.6329, "step": 150370 }, { "epoch": 1.67, "learning_rate": 2.2248981309239082e-05, "loss": 0.6378, "step": 150375 }, { "epoch": 1.67, "learning_rate": 2.224805858210057e-05, "loss": 0.625, "step": 150380 }, { "epoch": 1.67, "learning_rate": 2.2247135854962058e-05, "loss": 0.6907, "step": 150385 }, { "epoch": 1.67, "learning_rate": 2.2246213127823546e-05, "loss": 0.6329, "step": 150390 }, { "epoch": 1.67, "learning_rate": 2.2245290400685034e-05, "loss": 0.6282, "step": 150395 }, { "epoch": 1.67, "learning_rate": 2.2244367673546522e-05, "loss": 0.6372, "step": 150400 }, { "epoch": 1.67, "learning_rate": 2.224344494640801e-05, "loss": 0.6166, "step": 150405 }, { "epoch": 1.67, "learning_rate": 2.2242522219269497e-05, "loss": 0.6474, "step": 150410 }, { "epoch": 1.67, "learning_rate": 2.2241599492130985e-05, "loss": 0.5952, "step": 150415 }, { "epoch": 1.67, "learning_rate": 2.224067676499247e-05, "loss": 0.6163, "step": 150420 }, { "epoch": 1.67, "learning_rate": 2.223975403785396e-05, "loss": 0.6058, "step": 150425 }, { "epoch": 1.67, "learning_rate": 2.2238831310715445e-05, "loss": 0.6431, "step": 150430 }, { "epoch": 1.67, "learning_rate": 2.2237908583576937e-05, "loss": 0.6001, "step": 150435 }, { "epoch": 1.67, "learning_rate": 2.223698585643842e-05, "loss": 0.6398, "step": 150440 }, { "epoch": 1.67, "learning_rate": 2.223606312929991e-05, "loss": 0.6151, "step": 150445 }, { "epoch": 1.67, "learning_rate": 2.2235140402161397e-05, "loss": 0.6594, "step": 150450 }, { "epoch": 1.67, "learning_rate": 2.2234217675022885e-05, "loss": 0.6666, "step": 150455 }, { "epoch": 1.67, "learning_rate": 2.2233294947884372e-05, "loss": 0.6093, "step": 150460 }, { "epoch": 1.67, "learning_rate": 2.223237222074586e-05, "loss": 0.5876, "step": 150465 }, { "epoch": 1.67, "learning_rate": 2.2231449493607348e-05, "loss": 0.6806, "step": 150470 }, { "epoch": 1.67, "learning_rate": 2.2230526766468836e-05, "loss": 0.6761, "step": 150475 }, { "epoch": 1.67, "learning_rate": 2.2229604039330324e-05, "loss": 0.7021, "step": 150480 }, { "epoch": 1.67, "learning_rate": 2.222868131219181e-05, "loss": 0.6166, "step": 150485 }, { "epoch": 1.67, "learning_rate": 2.22277585850533e-05, "loss": 0.6474, "step": 150490 }, { "epoch": 1.67, "learning_rate": 2.2226835857914784e-05, "loss": 0.6197, "step": 150495 }, { "epoch": 1.67, "learning_rate": 2.2225913130776275e-05, "loss": 0.6685, "step": 150500 }, { "epoch": 1.67, "learning_rate": 2.222499040363776e-05, "loss": 0.6784, "step": 150505 }, { "epoch": 1.67, "learning_rate": 2.222406767649925e-05, "loss": 0.6443, "step": 150510 }, { "epoch": 1.67, "learning_rate": 2.2223144949360735e-05, "loss": 0.6682, "step": 150515 }, { "epoch": 1.67, "learning_rate": 2.2222222222222223e-05, "loss": 0.6187, "step": 150520 }, { "epoch": 1.67, "learning_rate": 2.222129949508371e-05, "loss": 0.6398, "step": 150525 }, { "epoch": 1.67, "learning_rate": 2.22203767679452e-05, "loss": 0.6722, "step": 150530 }, { "epoch": 1.67, "learning_rate": 2.2219454040806687e-05, "loss": 0.5788, "step": 150535 }, { "epoch": 1.67, "learning_rate": 2.221853131366817e-05, "loss": 0.6432, "step": 150540 }, { "epoch": 1.67, "learning_rate": 2.2217608586529663e-05, "loss": 0.6619, "step": 150545 }, { "epoch": 1.67, "learning_rate": 2.2216685859391147e-05, "loss": 0.6034, "step": 150550 }, { "epoch": 1.67, "learning_rate": 2.2215763132252638e-05, "loss": 0.6445, "step": 150555 }, { "epoch": 1.67, "learning_rate": 2.2214840405114123e-05, "loss": 0.6541, "step": 150560 }, { "epoch": 1.67, "learning_rate": 2.2213917677975614e-05, "loss": 0.6321, "step": 150565 }, { "epoch": 1.67, "learning_rate": 2.22129949508371e-05, "loss": 0.5988, "step": 150570 }, { "epoch": 1.67, "learning_rate": 2.2212072223698586e-05, "loss": 0.585, "step": 150575 }, { "epoch": 1.67, "learning_rate": 2.2211149496560074e-05, "loss": 0.6377, "step": 150580 }, { "epoch": 1.67, "learning_rate": 2.2210226769421562e-05, "loss": 0.6514, "step": 150585 }, { "epoch": 1.67, "learning_rate": 2.220930404228305e-05, "loss": 0.6136, "step": 150590 }, { "epoch": 1.67, "learning_rate": 2.2208381315144534e-05, "loss": 0.6053, "step": 150595 }, { "epoch": 1.67, "learning_rate": 2.2207458588006025e-05, "loss": 0.6042, "step": 150600 }, { "epoch": 1.67, "learning_rate": 2.220653586086751e-05, "loss": 0.627, "step": 150605 }, { "epoch": 1.67, "learning_rate": 2.2205613133729e-05, "loss": 0.5685, "step": 150610 }, { "epoch": 1.67, "learning_rate": 2.2204690406590486e-05, "loss": 0.7108, "step": 150615 }, { "epoch": 1.67, "learning_rate": 2.2203767679451977e-05, "loss": 0.6518, "step": 150620 }, { "epoch": 1.67, "learning_rate": 2.220284495231346e-05, "loss": 0.6642, "step": 150625 }, { "epoch": 1.67, "learning_rate": 2.2201922225174953e-05, "loss": 0.645, "step": 150630 }, { "epoch": 1.67, "learning_rate": 2.2200999498036437e-05, "loss": 0.6071, "step": 150635 }, { "epoch": 1.67, "learning_rate": 2.2200076770897925e-05, "loss": 0.616, "step": 150640 }, { "epoch": 1.67, "learning_rate": 2.2199154043759413e-05, "loss": 0.5858, "step": 150645 }, { "epoch": 1.67, "learning_rate": 2.21982313166209e-05, "loss": 0.6196, "step": 150650 }, { "epoch": 1.67, "learning_rate": 2.219730858948239e-05, "loss": 0.6123, "step": 150655 }, { "epoch": 1.67, "learning_rate": 2.2196385862343876e-05, "loss": 0.6575, "step": 150660 }, { "epoch": 1.67, "learning_rate": 2.2195463135205364e-05, "loss": 0.5952, "step": 150665 }, { "epoch": 1.67, "learning_rate": 2.219454040806685e-05, "loss": 0.6511, "step": 150670 }, { "epoch": 1.67, "learning_rate": 2.219361768092834e-05, "loss": 0.6214, "step": 150675 }, { "epoch": 1.67, "learning_rate": 2.2192694953789824e-05, "loss": 0.6499, "step": 150680 }, { "epoch": 1.67, "learning_rate": 2.2191772226651316e-05, "loss": 0.5702, "step": 150685 }, { "epoch": 1.67, "learning_rate": 2.21908494995128e-05, "loss": 0.6049, "step": 150690 }, { "epoch": 1.67, "learning_rate": 2.2189926772374288e-05, "loss": 0.6165, "step": 150695 }, { "epoch": 1.67, "learning_rate": 2.2189004045235776e-05, "loss": 0.6196, "step": 150700 }, { "epoch": 1.67, "learning_rate": 2.2188081318097264e-05, "loss": 0.6008, "step": 150705 }, { "epoch": 1.67, "learning_rate": 2.218715859095875e-05, "loss": 0.6292, "step": 150710 }, { "epoch": 1.67, "learning_rate": 2.218623586382024e-05, "loss": 0.6272, "step": 150715 }, { "epoch": 1.67, "learning_rate": 2.2185313136681727e-05, "loss": 0.7109, "step": 150720 }, { "epoch": 1.67, "learning_rate": 2.2184390409543215e-05, "loss": 0.6186, "step": 150725 }, { "epoch": 1.67, "learning_rate": 2.2183467682404703e-05, "loss": 0.6198, "step": 150730 }, { "epoch": 1.67, "learning_rate": 2.218254495526619e-05, "loss": 0.6325, "step": 150735 }, { "epoch": 1.67, "learning_rate": 2.218162222812768e-05, "loss": 0.6222, "step": 150740 }, { "epoch": 1.67, "learning_rate": 2.2180699500989163e-05, "loss": 0.6427, "step": 150745 }, { "epoch": 1.67, "learning_rate": 2.217977677385065e-05, "loss": 0.5811, "step": 150750 }, { "epoch": 1.67, "learning_rate": 2.217885404671214e-05, "loss": 0.6214, "step": 150755 }, { "epoch": 1.67, "learning_rate": 2.2177931319573626e-05, "loss": 0.6158, "step": 150760 }, { "epoch": 1.67, "learning_rate": 2.2177008592435114e-05, "loss": 0.669, "step": 150765 }, { "epoch": 1.67, "learning_rate": 2.2176085865296602e-05, "loss": 0.6176, "step": 150770 }, { "epoch": 1.67, "learning_rate": 2.217516313815809e-05, "loss": 0.6708, "step": 150775 }, { "epoch": 1.67, "learning_rate": 2.2174240411019578e-05, "loss": 0.6515, "step": 150780 }, { "epoch": 1.67, "learning_rate": 2.2173317683881066e-05, "loss": 0.6052, "step": 150785 }, { "epoch": 1.67, "learning_rate": 2.2172394956742554e-05, "loss": 0.6099, "step": 150790 }, { "epoch": 1.67, "learning_rate": 2.217147222960404e-05, "loss": 0.6213, "step": 150795 }, { "epoch": 1.67, "learning_rate": 2.217054950246553e-05, "loss": 0.6288, "step": 150800 }, { "epoch": 1.67, "learning_rate": 2.2169626775327014e-05, "loss": 0.6449, "step": 150805 }, { "epoch": 1.67, "learning_rate": 2.2168704048188505e-05, "loss": 0.6417, "step": 150810 }, { "epoch": 1.67, "learning_rate": 2.216778132104999e-05, "loss": 0.6572, "step": 150815 }, { "epoch": 1.67, "learning_rate": 2.2166858593911477e-05, "loss": 0.6318, "step": 150820 }, { "epoch": 1.67, "learning_rate": 2.2165935866772965e-05, "loss": 0.6647, "step": 150825 }, { "epoch": 1.67, "learning_rate": 2.2165013139634453e-05, "loss": 0.6595, "step": 150830 }, { "epoch": 1.67, "learning_rate": 2.216409041249594e-05, "loss": 0.6343, "step": 150835 }, { "epoch": 1.67, "learning_rate": 2.216316768535743e-05, "loss": 0.6378, "step": 150840 }, { "epoch": 1.67, "learning_rate": 2.2162244958218917e-05, "loss": 0.6624, "step": 150845 }, { "epoch": 1.67, "learning_rate": 2.2161322231080404e-05, "loss": 0.6676, "step": 150850 }, { "epoch": 1.67, "learning_rate": 2.2160399503941892e-05, "loss": 0.6486, "step": 150855 }, { "epoch": 1.67, "learning_rate": 2.215947677680338e-05, "loss": 0.6518, "step": 150860 }, { "epoch": 1.67, "learning_rate": 2.2158554049664868e-05, "loss": 0.6439, "step": 150865 }, { "epoch": 1.67, "learning_rate": 2.2157631322526352e-05, "loss": 0.6187, "step": 150870 }, { "epoch": 1.67, "learning_rate": 2.2156708595387844e-05, "loss": 0.6248, "step": 150875 }, { "epoch": 1.67, "learning_rate": 2.2155785868249328e-05, "loss": 0.6321, "step": 150880 }, { "epoch": 1.67, "learning_rate": 2.215486314111082e-05, "loss": 0.6185, "step": 150885 }, { "epoch": 1.67, "learning_rate": 2.2153940413972304e-05, "loss": 0.6309, "step": 150890 }, { "epoch": 1.67, "learning_rate": 2.215301768683379e-05, "loss": 0.6512, "step": 150895 }, { "epoch": 1.67, "learning_rate": 2.215209495969528e-05, "loss": 0.6602, "step": 150900 }, { "epoch": 1.67, "learning_rate": 2.2151172232556767e-05, "loss": 0.6383, "step": 150905 }, { "epoch": 1.67, "learning_rate": 2.2150249505418255e-05, "loss": 0.6155, "step": 150910 }, { "epoch": 1.67, "learning_rate": 2.2149326778279743e-05, "loss": 0.6223, "step": 150915 }, { "epoch": 1.67, "learning_rate": 2.214840405114123e-05, "loss": 0.6814, "step": 150920 }, { "epoch": 1.67, "learning_rate": 2.2147481324002715e-05, "loss": 0.626, "step": 150925 }, { "epoch": 1.67, "learning_rate": 2.2146558596864207e-05, "loss": 0.6558, "step": 150930 }, { "epoch": 1.67, "learning_rate": 2.214563586972569e-05, "loss": 0.6005, "step": 150935 }, { "epoch": 1.67, "learning_rate": 2.2144713142587182e-05, "loss": 0.6225, "step": 150940 }, { "epoch": 1.67, "learning_rate": 2.2143790415448667e-05, "loss": 0.596, "step": 150945 }, { "epoch": 1.67, "learning_rate": 2.2142867688310158e-05, "loss": 0.6309, "step": 150950 }, { "epoch": 1.67, "learning_rate": 2.2141944961171642e-05, "loss": 0.653, "step": 150955 }, { "epoch": 1.67, "learning_rate": 2.214102223403313e-05, "loss": 0.6209, "step": 150960 }, { "epoch": 1.67, "learning_rate": 2.2140099506894618e-05, "loss": 0.6922, "step": 150965 }, { "epoch": 1.67, "learning_rate": 2.2139176779756106e-05, "loss": 0.5932, "step": 150970 }, { "epoch": 1.67, "learning_rate": 2.2138254052617594e-05, "loss": 0.6225, "step": 150975 }, { "epoch": 1.67, "learning_rate": 2.2137331325479078e-05, "loss": 0.6432, "step": 150980 }, { "epoch": 1.67, "learning_rate": 2.213640859834057e-05, "loss": 0.6235, "step": 150985 }, { "epoch": 1.67, "learning_rate": 2.2135485871202054e-05, "loss": 0.6636, "step": 150990 }, { "epoch": 1.67, "learning_rate": 2.2134563144063545e-05, "loss": 0.6198, "step": 150995 }, { "epoch": 1.67, "learning_rate": 2.213364041692503e-05, "loss": 0.6227, "step": 151000 }, { "epoch": 1.67, "eval_loss": 0.5948097109794617, "eval_runtime": 69.226, "eval_samples_per_second": 28.891, "eval_steps_per_second": 14.445, "step": 151000 }, { "epoch": 1.67, "learning_rate": 2.213271768978652e-05, "loss": 0.6683, "step": 151005 }, { "epoch": 1.67, "learning_rate": 2.2131794962648005e-05, "loss": 0.6241, "step": 151010 }, { "epoch": 1.67, "learning_rate": 2.2130872235509497e-05, "loss": 0.5895, "step": 151015 }, { "epoch": 1.67, "learning_rate": 2.212994950837098e-05, "loss": 0.6607, "step": 151020 }, { "epoch": 1.67, "learning_rate": 2.212902678123247e-05, "loss": 0.641, "step": 151025 }, { "epoch": 1.67, "learning_rate": 2.2128104054093957e-05, "loss": 0.6358, "step": 151030 }, { "epoch": 1.67, "learning_rate": 2.2127181326955445e-05, "loss": 0.6727, "step": 151035 }, { "epoch": 1.67, "learning_rate": 2.2126258599816932e-05, "loss": 0.5959, "step": 151040 }, { "epoch": 1.67, "learning_rate": 2.2125335872678417e-05, "loss": 0.6059, "step": 151045 }, { "epoch": 1.67, "learning_rate": 2.2124413145539908e-05, "loss": 0.6117, "step": 151050 }, { "epoch": 1.67, "learning_rate": 2.2123490418401393e-05, "loss": 0.6683, "step": 151055 }, { "epoch": 1.67, "learning_rate": 2.2122567691262884e-05, "loss": 0.6283, "step": 151060 }, { "epoch": 1.67, "learning_rate": 2.2121644964124368e-05, "loss": 0.6602, "step": 151065 }, { "epoch": 1.67, "learning_rate": 2.212072223698586e-05, "loss": 0.6246, "step": 151070 }, { "epoch": 1.67, "learning_rate": 2.2119799509847344e-05, "loss": 0.5606, "step": 151075 }, { "epoch": 1.67, "learning_rate": 2.2118876782708832e-05, "loss": 0.6336, "step": 151080 }, { "epoch": 1.67, "learning_rate": 2.211795405557032e-05, "loss": 0.5894, "step": 151085 }, { "epoch": 1.67, "learning_rate": 2.2117031328431808e-05, "loss": 0.6172, "step": 151090 }, { "epoch": 1.67, "learning_rate": 2.2116108601293295e-05, "loss": 0.6383, "step": 151095 }, { "epoch": 1.67, "learning_rate": 2.2115185874154783e-05, "loss": 0.6647, "step": 151100 }, { "epoch": 1.67, "learning_rate": 2.211426314701627e-05, "loss": 0.606, "step": 151105 }, { "epoch": 1.67, "learning_rate": 2.211334041987776e-05, "loss": 0.649, "step": 151110 }, { "epoch": 1.67, "learning_rate": 2.2112417692739247e-05, "loss": 0.6076, "step": 151115 }, { "epoch": 1.67, "learning_rate": 2.211149496560073e-05, "loss": 0.6427, "step": 151120 }, { "epoch": 1.67, "learning_rate": 2.2110572238462222e-05, "loss": 0.6579, "step": 151125 }, { "epoch": 1.67, "learning_rate": 2.2109649511323707e-05, "loss": 0.5708, "step": 151130 }, { "epoch": 1.67, "learning_rate": 2.2108726784185195e-05, "loss": 0.5593, "step": 151135 }, { "epoch": 1.67, "learning_rate": 2.2107804057046683e-05, "loss": 0.6241, "step": 151140 }, { "epoch": 1.67, "learning_rate": 2.210688132990817e-05, "loss": 0.5932, "step": 151145 }, { "epoch": 1.67, "learning_rate": 2.210595860276966e-05, "loss": 0.5565, "step": 151150 }, { "epoch": 1.67, "learning_rate": 2.2105035875631146e-05, "loss": 0.6981, "step": 151155 }, { "epoch": 1.67, "learning_rate": 2.2104113148492634e-05, "loss": 0.6054, "step": 151160 }, { "epoch": 1.67, "learning_rate": 2.2103190421354122e-05, "loss": 0.6745, "step": 151165 }, { "epoch": 1.67, "learning_rate": 2.210226769421561e-05, "loss": 0.6277, "step": 151170 }, { "epoch": 1.67, "learning_rate": 2.2101344967077098e-05, "loss": 0.6183, "step": 151175 }, { "epoch": 1.67, "learning_rate": 2.2100422239938585e-05, "loss": 0.622, "step": 151180 }, { "epoch": 1.67, "learning_rate": 2.2099499512800073e-05, "loss": 0.6152, "step": 151185 }, { "epoch": 1.67, "learning_rate": 2.2098576785661558e-05, "loss": 0.6485, "step": 151190 }, { "epoch": 1.67, "learning_rate": 2.2097654058523046e-05, "loss": 0.6665, "step": 151195 }, { "epoch": 1.67, "learning_rate": 2.2096731331384533e-05, "loss": 0.6127, "step": 151200 }, { "epoch": 1.67, "learning_rate": 2.209580860424602e-05, "loss": 0.6155, "step": 151205 }, { "epoch": 1.67, "learning_rate": 2.209488587710751e-05, "loss": 0.6333, "step": 151210 }, { "epoch": 1.67, "learning_rate": 2.2093963149968997e-05, "loss": 0.5914, "step": 151215 }, { "epoch": 1.67, "learning_rate": 2.2093040422830485e-05, "loss": 0.6331, "step": 151220 }, { "epoch": 1.67, "learning_rate": 2.2092117695691973e-05, "loss": 0.6222, "step": 151225 }, { "epoch": 1.67, "learning_rate": 2.209119496855346e-05, "loss": 0.6624, "step": 151230 }, { "epoch": 1.67, "learning_rate": 2.209027224141495e-05, "loss": 0.6937, "step": 151235 }, { "epoch": 1.67, "learning_rate": 2.2089349514276436e-05, "loss": 0.6213, "step": 151240 }, { "epoch": 1.67, "learning_rate": 2.2088426787137924e-05, "loss": 0.6334, "step": 151245 }, { "epoch": 1.67, "learning_rate": 2.2087504059999412e-05, "loss": 0.6242, "step": 151250 }, { "epoch": 1.67, "learning_rate": 2.2086581332860896e-05, "loss": 0.603, "step": 151255 }, { "epoch": 1.67, "learning_rate": 2.2085658605722388e-05, "loss": 0.6625, "step": 151260 }, { "epoch": 1.67, "learning_rate": 2.2084735878583872e-05, "loss": 0.6467, "step": 151265 }, { "epoch": 1.67, "learning_rate": 2.2083813151445363e-05, "loss": 0.6428, "step": 151270 }, { "epoch": 1.68, "learning_rate": 2.2082890424306848e-05, "loss": 0.6148, "step": 151275 }, { "epoch": 1.68, "learning_rate": 2.2081967697168336e-05, "loss": 0.651, "step": 151280 }, { "epoch": 1.68, "learning_rate": 2.2081044970029823e-05, "loss": 0.6633, "step": 151285 }, { "epoch": 1.68, "learning_rate": 2.208012224289131e-05, "loss": 0.6411, "step": 151290 }, { "epoch": 1.68, "learning_rate": 2.20791995157528e-05, "loss": 0.6054, "step": 151295 }, { "epoch": 1.68, "learning_rate": 2.2078276788614287e-05, "loss": 0.6036, "step": 151300 }, { "epoch": 1.68, "learning_rate": 2.2077354061475775e-05, "loss": 0.6126, "step": 151305 }, { "epoch": 1.68, "learning_rate": 2.207643133433726e-05, "loss": 0.6154, "step": 151310 }, { "epoch": 1.68, "learning_rate": 2.207550860719875e-05, "loss": 0.6296, "step": 151315 }, { "epoch": 1.68, "learning_rate": 2.2074585880060235e-05, "loss": 0.6032, "step": 151320 }, { "epoch": 1.68, "learning_rate": 2.2073663152921726e-05, "loss": 0.5958, "step": 151325 }, { "epoch": 1.68, "learning_rate": 2.207274042578321e-05, "loss": 0.6674, "step": 151330 }, { "epoch": 1.68, "learning_rate": 2.2071817698644702e-05, "loss": 0.6786, "step": 151335 }, { "epoch": 1.68, "learning_rate": 2.2070894971506186e-05, "loss": 0.656, "step": 151340 }, { "epoch": 1.68, "learning_rate": 2.2069972244367674e-05, "loss": 0.629, "step": 151345 }, { "epoch": 1.68, "learning_rate": 2.2069049517229162e-05, "loss": 0.5889, "step": 151350 }, { "epoch": 1.68, "learning_rate": 2.206812679009065e-05, "loss": 0.6454, "step": 151355 }, { "epoch": 1.68, "learning_rate": 2.2067204062952138e-05, "loss": 0.6283, "step": 151360 }, { "epoch": 1.68, "learning_rate": 2.2066281335813622e-05, "loss": 0.6666, "step": 151365 }, { "epoch": 1.68, "learning_rate": 2.2065358608675114e-05, "loss": 0.6435, "step": 151370 }, { "epoch": 1.68, "learning_rate": 2.2064435881536598e-05, "loss": 0.6334, "step": 151375 }, { "epoch": 1.68, "learning_rate": 2.206351315439809e-05, "loss": 0.5812, "step": 151380 }, { "epoch": 1.68, "learning_rate": 2.2062590427259574e-05, "loss": 0.5889, "step": 151385 }, { "epoch": 1.68, "learning_rate": 2.2061667700121065e-05, "loss": 0.621, "step": 151390 }, { "epoch": 1.68, "learning_rate": 2.206074497298255e-05, "loss": 0.6491, "step": 151395 }, { "epoch": 1.68, "learning_rate": 2.205982224584404e-05, "loss": 0.6266, "step": 151400 }, { "epoch": 1.68, "learning_rate": 2.2058899518705525e-05, "loss": 0.6443, "step": 151405 }, { "epoch": 1.68, "learning_rate": 2.2057976791567013e-05, "loss": 0.6036, "step": 151410 }, { "epoch": 1.68, "learning_rate": 2.20570540644285e-05, "loss": 0.6663, "step": 151415 }, { "epoch": 1.68, "learning_rate": 2.205613133728999e-05, "loss": 0.6652, "step": 151420 }, { "epoch": 1.68, "learning_rate": 2.2055208610151476e-05, "loss": 0.6265, "step": 151425 }, { "epoch": 1.68, "learning_rate": 2.205428588301296e-05, "loss": 0.6363, "step": 151430 }, { "epoch": 1.68, "learning_rate": 2.2053363155874452e-05, "loss": 0.6592, "step": 151435 }, { "epoch": 1.68, "learning_rate": 2.2052440428735937e-05, "loss": 0.6327, "step": 151440 }, { "epoch": 1.68, "learning_rate": 2.2051517701597428e-05, "loss": 0.5793, "step": 151445 }, { "epoch": 1.68, "learning_rate": 2.2050594974458912e-05, "loss": 0.6672, "step": 151450 }, { "epoch": 1.68, "learning_rate": 2.2049672247320404e-05, "loss": 0.6219, "step": 151455 }, { "epoch": 1.68, "learning_rate": 2.2048749520181888e-05, "loss": 0.6328, "step": 151460 }, { "epoch": 1.68, "learning_rate": 2.2047826793043376e-05, "loss": 0.657, "step": 151465 }, { "epoch": 1.68, "learning_rate": 2.2046904065904864e-05, "loss": 0.7136, "step": 151470 }, { "epoch": 1.68, "learning_rate": 2.204598133876635e-05, "loss": 0.6242, "step": 151475 }, { "epoch": 1.68, "learning_rate": 2.204505861162784e-05, "loss": 0.656, "step": 151480 }, { "epoch": 1.68, "learning_rate": 2.2044135884489327e-05, "loss": 0.5703, "step": 151485 }, { "epoch": 1.68, "learning_rate": 2.2043213157350815e-05, "loss": 0.6501, "step": 151490 }, { "epoch": 1.68, "learning_rate": 2.2042290430212303e-05, "loss": 0.6114, "step": 151495 }, { "epoch": 1.68, "learning_rate": 2.204136770307379e-05, "loss": 0.6113, "step": 151500 }, { "epoch": 1.68, "learning_rate": 2.2040444975935275e-05, "loss": 0.6487, "step": 151505 }, { "epoch": 1.68, "learning_rate": 2.2039522248796767e-05, "loss": 0.5897, "step": 151510 }, { "epoch": 1.68, "learning_rate": 2.203859952165825e-05, "loss": 0.6032, "step": 151515 }, { "epoch": 1.68, "learning_rate": 2.203767679451974e-05, "loss": 0.6591, "step": 151520 }, { "epoch": 1.68, "learning_rate": 2.2036754067381227e-05, "loss": 0.6605, "step": 151525 }, { "epoch": 1.68, "learning_rate": 2.2035831340242715e-05, "loss": 0.6316, "step": 151530 }, { "epoch": 1.68, "learning_rate": 2.2034908613104202e-05, "loss": 0.6508, "step": 151535 }, { "epoch": 1.68, "learning_rate": 2.203398588596569e-05, "loss": 0.6464, "step": 151540 }, { "epoch": 1.68, "learning_rate": 2.2033063158827178e-05, "loss": 0.6087, "step": 151545 }, { "epoch": 1.68, "learning_rate": 2.2032140431688666e-05, "loss": 0.625, "step": 151550 }, { "epoch": 1.68, "learning_rate": 2.2031217704550154e-05, "loss": 0.6723, "step": 151555 }, { "epoch": 1.68, "learning_rate": 2.203029497741164e-05, "loss": 0.6171, "step": 151560 }, { "epoch": 1.68, "learning_rate": 2.202937225027313e-05, "loss": 0.6272, "step": 151565 }, { "epoch": 1.68, "learning_rate": 2.2028449523134617e-05, "loss": 0.5836, "step": 151570 }, { "epoch": 1.68, "learning_rate": 2.2027526795996105e-05, "loss": 0.6226, "step": 151575 }, { "epoch": 1.68, "learning_rate": 2.202660406885759e-05, "loss": 0.6436, "step": 151580 }, { "epoch": 1.68, "learning_rate": 2.2025681341719077e-05, "loss": 0.6694, "step": 151585 }, { "epoch": 1.68, "learning_rate": 2.2024758614580565e-05, "loss": 0.6418, "step": 151590 }, { "epoch": 1.68, "learning_rate": 2.2023835887442053e-05, "loss": 0.6388, "step": 151595 }, { "epoch": 1.68, "learning_rate": 2.202291316030354e-05, "loss": 0.6196, "step": 151600 }, { "epoch": 1.68, "learning_rate": 2.202199043316503e-05, "loss": 0.6247, "step": 151605 }, { "epoch": 1.68, "learning_rate": 2.2021067706026517e-05, "loss": 0.6516, "step": 151610 }, { "epoch": 1.68, "learning_rate": 2.2020144978888005e-05, "loss": 0.6327, "step": 151615 }, { "epoch": 1.68, "learning_rate": 2.2019222251749492e-05, "loss": 0.6238, "step": 151620 }, { "epoch": 1.68, "learning_rate": 2.201829952461098e-05, "loss": 0.6443, "step": 151625 }, { "epoch": 1.68, "learning_rate": 2.2017376797472468e-05, "loss": 0.634, "step": 151630 }, { "epoch": 1.68, "learning_rate": 2.2016454070333956e-05, "loss": 0.668, "step": 151635 }, { "epoch": 1.68, "learning_rate": 2.201553134319544e-05, "loss": 0.5988, "step": 151640 }, { "epoch": 1.68, "learning_rate": 2.201460861605693e-05, "loss": 0.624, "step": 151645 }, { "epoch": 1.68, "learning_rate": 2.2013685888918416e-05, "loss": 0.6241, "step": 151650 }, { "epoch": 1.68, "learning_rate": 2.2012763161779904e-05, "loss": 0.6286, "step": 151655 }, { "epoch": 1.68, "learning_rate": 2.2011840434641392e-05, "loss": 0.7089, "step": 151660 }, { "epoch": 1.68, "learning_rate": 2.201091770750288e-05, "loss": 0.6375, "step": 151665 }, { "epoch": 1.68, "learning_rate": 2.2009994980364367e-05, "loss": 0.5853, "step": 151670 }, { "epoch": 1.68, "learning_rate": 2.2009072253225855e-05, "loss": 0.5877, "step": 151675 }, { "epoch": 1.68, "learning_rate": 2.2008149526087343e-05, "loss": 0.6288, "step": 151680 }, { "epoch": 1.68, "learning_rate": 2.200722679894883e-05, "loss": 0.6231, "step": 151685 }, { "epoch": 1.68, "learning_rate": 2.200630407181032e-05, "loss": 0.6255, "step": 151690 }, { "epoch": 1.68, "learning_rate": 2.2005381344671803e-05, "loss": 0.6194, "step": 151695 }, { "epoch": 1.68, "learning_rate": 2.2004458617533295e-05, "loss": 0.6724, "step": 151700 }, { "epoch": 1.68, "learning_rate": 2.200353589039478e-05, "loss": 0.6229, "step": 151705 }, { "epoch": 1.68, "learning_rate": 2.200261316325627e-05, "loss": 0.5857, "step": 151710 }, { "epoch": 1.68, "learning_rate": 2.2001690436117755e-05, "loss": 0.6743, "step": 151715 }, { "epoch": 1.68, "learning_rate": 2.2000767708979246e-05, "loss": 0.6172, "step": 151720 }, { "epoch": 1.68, "learning_rate": 2.199984498184073e-05, "loss": 0.6192, "step": 151725 }, { "epoch": 1.68, "learning_rate": 2.1998922254702218e-05, "loss": 0.6066, "step": 151730 }, { "epoch": 1.68, "learning_rate": 2.1997999527563706e-05, "loss": 0.6834, "step": 151735 }, { "epoch": 1.68, "learning_rate": 2.1997076800425194e-05, "loss": 0.6135, "step": 151740 }, { "epoch": 1.68, "learning_rate": 2.1996154073286682e-05, "loss": 0.6282, "step": 151745 }, { "epoch": 1.68, "learning_rate": 2.1995231346148166e-05, "loss": 0.6101, "step": 151750 }, { "epoch": 1.68, "learning_rate": 2.1994308619009658e-05, "loss": 0.6965, "step": 151755 }, { "epoch": 1.68, "learning_rate": 2.1993385891871142e-05, "loss": 0.5891, "step": 151760 }, { "epoch": 1.68, "learning_rate": 2.1992463164732633e-05, "loss": 0.6523, "step": 151765 }, { "epoch": 1.68, "learning_rate": 2.1991540437594118e-05, "loss": 0.5665, "step": 151770 }, { "epoch": 1.68, "learning_rate": 2.199061771045561e-05, "loss": 0.5984, "step": 151775 }, { "epoch": 1.68, "learning_rate": 2.1989694983317093e-05, "loss": 0.6634, "step": 151780 }, { "epoch": 1.68, "learning_rate": 2.1988772256178585e-05, "loss": 0.6049, "step": 151785 }, { "epoch": 1.68, "learning_rate": 2.198784952904007e-05, "loss": 0.5943, "step": 151790 }, { "epoch": 1.68, "learning_rate": 2.1986926801901557e-05, "loss": 0.6243, "step": 151795 }, { "epoch": 1.68, "learning_rate": 2.1986004074763045e-05, "loss": 0.5975, "step": 151800 }, { "epoch": 1.68, "learning_rate": 2.1985081347624533e-05, "loss": 0.657, "step": 151805 }, { "epoch": 1.68, "learning_rate": 2.198415862048602e-05, "loss": 0.6446, "step": 151810 }, { "epoch": 1.68, "learning_rate": 2.1983235893347505e-05, "loss": 0.6315, "step": 151815 }, { "epoch": 1.68, "learning_rate": 2.1982313166208996e-05, "loss": 0.6224, "step": 151820 }, { "epoch": 1.68, "learning_rate": 2.198139043907048e-05, "loss": 0.6555, "step": 151825 }, { "epoch": 1.68, "learning_rate": 2.1980467711931972e-05, "loss": 0.5745, "step": 151830 }, { "epoch": 1.68, "learning_rate": 2.1979544984793456e-05, "loss": 0.62, "step": 151835 }, { "epoch": 1.68, "learning_rate": 2.1978622257654948e-05, "loss": 0.6135, "step": 151840 }, { "epoch": 1.68, "learning_rate": 2.1977699530516432e-05, "loss": 0.6334, "step": 151845 }, { "epoch": 1.68, "learning_rate": 2.197677680337792e-05, "loss": 0.7023, "step": 151850 }, { "epoch": 1.68, "learning_rate": 2.1975854076239408e-05, "loss": 0.646, "step": 151855 }, { "epoch": 1.68, "learning_rate": 2.1974931349100896e-05, "loss": 0.6252, "step": 151860 }, { "epoch": 1.68, "learning_rate": 2.1974008621962383e-05, "loss": 0.6974, "step": 151865 }, { "epoch": 1.68, "learning_rate": 2.197308589482387e-05, "loss": 0.6115, "step": 151870 }, { "epoch": 1.68, "learning_rate": 2.197216316768536e-05, "loss": 0.6561, "step": 151875 }, { "epoch": 1.68, "learning_rate": 2.1971240440546844e-05, "loss": 0.6856, "step": 151880 }, { "epoch": 1.68, "learning_rate": 2.1970317713408335e-05, "loss": 0.6545, "step": 151885 }, { "epoch": 1.68, "learning_rate": 2.196939498626982e-05, "loss": 0.6466, "step": 151890 }, { "epoch": 1.68, "learning_rate": 2.196847225913131e-05, "loss": 0.6989, "step": 151895 }, { "epoch": 1.68, "learning_rate": 2.1967549531992795e-05, "loss": 0.6382, "step": 151900 }, { "epoch": 1.68, "learning_rate": 2.1966626804854283e-05, "loss": 0.6134, "step": 151905 }, { "epoch": 1.68, "learning_rate": 2.196570407771577e-05, "loss": 0.6275, "step": 151910 }, { "epoch": 1.68, "learning_rate": 2.196478135057726e-05, "loss": 0.6194, "step": 151915 }, { "epoch": 1.68, "learning_rate": 2.1963858623438746e-05, "loss": 0.5464, "step": 151920 }, { "epoch": 1.68, "learning_rate": 2.1962935896300234e-05, "loss": 0.6175, "step": 151925 }, { "epoch": 1.68, "learning_rate": 2.1962013169161722e-05, "loss": 0.5881, "step": 151930 }, { "epoch": 1.68, "learning_rate": 2.196109044202321e-05, "loss": 0.6367, "step": 151935 }, { "epoch": 1.68, "learning_rate": 2.1960167714884698e-05, "loss": 0.6908, "step": 151940 }, { "epoch": 1.68, "learning_rate": 2.1959244987746186e-05, "loss": 0.6361, "step": 151945 }, { "epoch": 1.68, "learning_rate": 2.1958322260607673e-05, "loss": 0.6449, "step": 151950 }, { "epoch": 1.68, "learning_rate": 2.1957399533469158e-05, "loss": 0.6376, "step": 151955 }, { "epoch": 1.68, "learning_rate": 2.195647680633065e-05, "loss": 0.6282, "step": 151960 }, { "epoch": 1.68, "learning_rate": 2.1955554079192134e-05, "loss": 0.656, "step": 151965 }, { "epoch": 1.68, "learning_rate": 2.195463135205362e-05, "loss": 0.6486, "step": 151970 }, { "epoch": 1.68, "learning_rate": 2.195370862491511e-05, "loss": 0.6061, "step": 151975 }, { "epoch": 1.68, "learning_rate": 2.1952785897776597e-05, "loss": 0.6078, "step": 151980 }, { "epoch": 1.68, "learning_rate": 2.1951863170638085e-05, "loss": 0.6308, "step": 151985 }, { "epoch": 1.68, "learning_rate": 2.1950940443499573e-05, "loss": 0.6042, "step": 151990 }, { "epoch": 1.68, "learning_rate": 2.195001771636106e-05, "loss": 0.7043, "step": 151995 }, { "epoch": 1.68, "learning_rate": 2.194909498922255e-05, "loss": 0.5858, "step": 152000 }, { "epoch": 1.68, "eval_loss": 0.582176685333252, "eval_runtime": 69.2091, "eval_samples_per_second": 28.898, "eval_steps_per_second": 14.449, "step": 152000 }, { "epoch": 1.68, "learning_rate": 2.1948172262084036e-05, "loss": 0.5825, "step": 152005 }, { "epoch": 1.68, "learning_rate": 2.1947249534945524e-05, "loss": 0.628, "step": 152010 }, { "epoch": 1.68, "learning_rate": 2.1946326807807012e-05, "loss": 0.6244, "step": 152015 }, { "epoch": 1.68, "learning_rate": 2.19454040806685e-05, "loss": 0.6007, "step": 152020 }, { "epoch": 1.68, "learning_rate": 2.1944481353529984e-05, "loss": 0.6315, "step": 152025 }, { "epoch": 1.68, "learning_rate": 2.1943558626391472e-05, "loss": 0.5889, "step": 152030 }, { "epoch": 1.68, "learning_rate": 2.194263589925296e-05, "loss": 0.6218, "step": 152035 }, { "epoch": 1.68, "learning_rate": 2.1941713172114448e-05, "loss": 0.5854, "step": 152040 }, { "epoch": 1.68, "learning_rate": 2.1940790444975936e-05, "loss": 0.6565, "step": 152045 }, { "epoch": 1.68, "learning_rate": 2.1939867717837424e-05, "loss": 0.5759, "step": 152050 }, { "epoch": 1.68, "learning_rate": 2.193894499069891e-05, "loss": 0.6323, "step": 152055 }, { "epoch": 1.68, "learning_rate": 2.19380222635604e-05, "loss": 0.6176, "step": 152060 }, { "epoch": 1.68, "learning_rate": 2.1937099536421887e-05, "loss": 0.5946, "step": 152065 }, { "epoch": 1.68, "learning_rate": 2.1936176809283375e-05, "loss": 0.5867, "step": 152070 }, { "epoch": 1.68, "learning_rate": 2.1935254082144863e-05, "loss": 0.6647, "step": 152075 }, { "epoch": 1.68, "learning_rate": 2.1934331355006347e-05, "loss": 0.6758, "step": 152080 }, { "epoch": 1.68, "learning_rate": 2.193340862786784e-05, "loss": 0.6606, "step": 152085 }, { "epoch": 1.68, "learning_rate": 2.1932485900729323e-05, "loss": 0.6447, "step": 152090 }, { "epoch": 1.68, "learning_rate": 2.1931563173590814e-05, "loss": 0.6561, "step": 152095 }, { "epoch": 1.68, "learning_rate": 2.19306404464523e-05, "loss": 0.6027, "step": 152100 }, { "epoch": 1.68, "learning_rate": 2.192971771931379e-05, "loss": 0.6008, "step": 152105 }, { "epoch": 1.68, "learning_rate": 2.1928794992175274e-05, "loss": 0.6127, "step": 152110 }, { "epoch": 1.68, "learning_rate": 2.1927872265036762e-05, "loss": 0.6392, "step": 152115 }, { "epoch": 1.68, "learning_rate": 2.192694953789825e-05, "loss": 0.6126, "step": 152120 }, { "epoch": 1.68, "learning_rate": 2.1926026810759738e-05, "loss": 0.6625, "step": 152125 }, { "epoch": 1.68, "learning_rate": 2.1925104083621226e-05, "loss": 0.6704, "step": 152130 }, { "epoch": 1.68, "learning_rate": 2.192418135648271e-05, "loss": 0.6536, "step": 152135 }, { "epoch": 1.68, "learning_rate": 2.19232586293442e-05, "loss": 0.6095, "step": 152140 }, { "epoch": 1.68, "learning_rate": 2.1922335902205686e-05, "loss": 0.6464, "step": 152145 }, { "epoch": 1.68, "learning_rate": 2.1921413175067177e-05, "loss": 0.666, "step": 152150 }, { "epoch": 1.68, "learning_rate": 2.1920490447928662e-05, "loss": 0.583, "step": 152155 }, { "epoch": 1.68, "learning_rate": 2.1919567720790153e-05, "loss": 0.6275, "step": 152160 }, { "epoch": 1.68, "learning_rate": 2.1918644993651637e-05, "loss": 0.6186, "step": 152165 }, { "epoch": 1.68, "learning_rate": 2.191772226651313e-05, "loss": 0.6235, "step": 152170 }, { "epoch": 1.68, "learning_rate": 2.1916799539374613e-05, "loss": 0.6395, "step": 152175 }, { "epoch": 1.69, "learning_rate": 2.19158768122361e-05, "loss": 0.6266, "step": 152180 }, { "epoch": 1.69, "learning_rate": 2.191495408509759e-05, "loss": 0.6035, "step": 152185 }, { "epoch": 1.69, "learning_rate": 2.1914031357959077e-05, "loss": 0.6452, "step": 152190 }, { "epoch": 1.69, "learning_rate": 2.1913108630820565e-05, "loss": 0.6157, "step": 152195 }, { "epoch": 1.69, "learning_rate": 2.191218590368205e-05, "loss": 0.6302, "step": 152200 }, { "epoch": 1.69, "learning_rate": 2.191126317654354e-05, "loss": 0.6398, "step": 152205 }, { "epoch": 1.69, "learning_rate": 2.1910340449405025e-05, "loss": 0.622, "step": 152210 }, { "epoch": 1.69, "learning_rate": 2.1909417722266516e-05, "loss": 0.6515, "step": 152215 }, { "epoch": 1.69, "learning_rate": 2.1908494995128e-05, "loss": 0.6422, "step": 152220 }, { "epoch": 1.69, "learning_rate": 2.190757226798949e-05, "loss": 0.6263, "step": 152225 }, { "epoch": 1.69, "learning_rate": 2.1906649540850976e-05, "loss": 0.6023, "step": 152230 }, { "epoch": 1.69, "learning_rate": 2.1905726813712464e-05, "loss": 0.567, "step": 152235 }, { "epoch": 1.69, "learning_rate": 2.1904804086573952e-05, "loss": 0.6392, "step": 152240 }, { "epoch": 1.69, "learning_rate": 2.190388135943544e-05, "loss": 0.619, "step": 152245 }, { "epoch": 1.69, "learning_rate": 2.1902958632296927e-05, "loss": 0.5446, "step": 152250 }, { "epoch": 1.69, "learning_rate": 2.1902035905158415e-05, "loss": 0.6009, "step": 152255 }, { "epoch": 1.69, "learning_rate": 2.1901113178019903e-05, "loss": 0.6108, "step": 152260 }, { "epoch": 1.69, "learning_rate": 2.1900190450881388e-05, "loss": 0.6195, "step": 152265 }, { "epoch": 1.69, "learning_rate": 2.189926772374288e-05, "loss": 0.6646, "step": 152270 }, { "epoch": 1.69, "learning_rate": 2.1898344996604363e-05, "loss": 0.6438, "step": 152275 }, { "epoch": 1.69, "learning_rate": 2.1897422269465855e-05, "loss": 0.5813, "step": 152280 }, { "epoch": 1.69, "learning_rate": 2.189649954232734e-05, "loss": 0.6337, "step": 152285 }, { "epoch": 1.69, "learning_rate": 2.1895576815188827e-05, "loss": 0.6478, "step": 152290 }, { "epoch": 1.69, "learning_rate": 2.1894654088050315e-05, "loss": 0.6136, "step": 152295 }, { "epoch": 1.69, "learning_rate": 2.1893731360911803e-05, "loss": 0.6367, "step": 152300 }, { "epoch": 1.69, "learning_rate": 2.189280863377329e-05, "loss": 0.6153, "step": 152305 }, { "epoch": 1.69, "learning_rate": 2.1891885906634778e-05, "loss": 0.6463, "step": 152310 }, { "epoch": 1.69, "learning_rate": 2.1890963179496266e-05, "loss": 0.5866, "step": 152315 }, { "epoch": 1.69, "learning_rate": 2.1890040452357754e-05, "loss": 0.5978, "step": 152320 }, { "epoch": 1.69, "learning_rate": 2.1889117725219242e-05, "loss": 0.6339, "step": 152325 }, { "epoch": 1.69, "learning_rate": 2.188819499808073e-05, "loss": 0.631, "step": 152330 }, { "epoch": 1.69, "learning_rate": 2.1887272270942217e-05, "loss": 0.6038, "step": 152335 }, { "epoch": 1.69, "learning_rate": 2.1886349543803702e-05, "loss": 0.6616, "step": 152340 }, { "epoch": 1.69, "learning_rate": 2.1885426816665193e-05, "loss": 0.6056, "step": 152345 }, { "epoch": 1.69, "learning_rate": 2.1884504089526678e-05, "loss": 0.6521, "step": 152350 }, { "epoch": 1.69, "learning_rate": 2.1883581362388165e-05, "loss": 0.6311, "step": 152355 }, { "epoch": 1.69, "learning_rate": 2.1882658635249653e-05, "loss": 0.6131, "step": 152360 }, { "epoch": 1.69, "learning_rate": 2.188173590811114e-05, "loss": 0.6004, "step": 152365 }, { "epoch": 1.69, "learning_rate": 2.188081318097263e-05, "loss": 0.6717, "step": 152370 }, { "epoch": 1.69, "learning_rate": 2.1879890453834117e-05, "loss": 0.6056, "step": 152375 }, { "epoch": 1.69, "learning_rate": 2.1878967726695605e-05, "loss": 0.6016, "step": 152380 }, { "epoch": 1.69, "learning_rate": 2.1878044999557093e-05, "loss": 0.6209, "step": 152385 }, { "epoch": 1.69, "learning_rate": 2.187712227241858e-05, "loss": 0.6313, "step": 152390 }, { "epoch": 1.69, "learning_rate": 2.1876199545280068e-05, "loss": 0.6367, "step": 152395 }, { "epoch": 1.69, "learning_rate": 2.1875276818141556e-05, "loss": 0.6117, "step": 152400 }, { "epoch": 1.69, "learning_rate": 2.1874354091003044e-05, "loss": 0.5964, "step": 152405 }, { "epoch": 1.69, "learning_rate": 2.187343136386453e-05, "loss": 0.5988, "step": 152410 }, { "epoch": 1.69, "learning_rate": 2.1872508636726016e-05, "loss": 0.6084, "step": 152415 }, { "epoch": 1.69, "learning_rate": 2.1871585909587504e-05, "loss": 0.6041, "step": 152420 }, { "epoch": 1.69, "learning_rate": 2.1870663182448992e-05, "loss": 0.6219, "step": 152425 }, { "epoch": 1.69, "learning_rate": 2.186974045531048e-05, "loss": 0.6071, "step": 152430 }, { "epoch": 1.69, "learning_rate": 2.1868817728171968e-05, "loss": 0.6379, "step": 152435 }, { "epoch": 1.69, "learning_rate": 2.1867895001033456e-05, "loss": 0.6292, "step": 152440 }, { "epoch": 1.69, "learning_rate": 2.1866972273894943e-05, "loss": 0.6672, "step": 152445 }, { "epoch": 1.69, "learning_rate": 2.186604954675643e-05, "loss": 0.6187, "step": 152450 }, { "epoch": 1.69, "learning_rate": 2.186512681961792e-05, "loss": 0.5909, "step": 152455 }, { "epoch": 1.69, "learning_rate": 2.1864204092479407e-05, "loss": 0.6355, "step": 152460 }, { "epoch": 1.69, "learning_rate": 2.186328136534089e-05, "loss": 0.6311, "step": 152465 }, { "epoch": 1.69, "learning_rate": 2.1862358638202383e-05, "loss": 0.6102, "step": 152470 }, { "epoch": 1.69, "learning_rate": 2.1861435911063867e-05, "loss": 0.6215, "step": 152475 }, { "epoch": 1.69, "learning_rate": 2.186051318392536e-05, "loss": 0.6456, "step": 152480 }, { "epoch": 1.69, "learning_rate": 2.1859590456786843e-05, "loss": 0.6132, "step": 152485 }, { "epoch": 1.69, "learning_rate": 2.185866772964833e-05, "loss": 0.6169, "step": 152490 }, { "epoch": 1.69, "learning_rate": 2.185774500250982e-05, "loss": 0.5761, "step": 152495 }, { "epoch": 1.69, "learning_rate": 2.1856822275371306e-05, "loss": 0.604, "step": 152500 }, { "epoch": 1.69, "learning_rate": 2.1855899548232794e-05, "loss": 0.6103, "step": 152505 }, { "epoch": 1.69, "learning_rate": 2.1854976821094282e-05, "loss": 0.6425, "step": 152510 }, { "epoch": 1.69, "learning_rate": 2.185405409395577e-05, "loss": 0.6596, "step": 152515 }, { "epoch": 1.69, "learning_rate": 2.1853131366817258e-05, "loss": 0.6123, "step": 152520 }, { "epoch": 1.69, "learning_rate": 2.1852208639678746e-05, "loss": 0.6128, "step": 152525 }, { "epoch": 1.69, "learning_rate": 2.185128591254023e-05, "loss": 0.5964, "step": 152530 }, { "epoch": 1.69, "learning_rate": 2.185036318540172e-05, "loss": 0.5961, "step": 152535 }, { "epoch": 1.69, "learning_rate": 2.1849440458263206e-05, "loss": 0.6632, "step": 152540 }, { "epoch": 1.69, "learning_rate": 2.1848517731124697e-05, "loss": 0.6066, "step": 152545 }, { "epoch": 1.69, "learning_rate": 2.184759500398618e-05, "loss": 0.5859, "step": 152550 }, { "epoch": 1.69, "learning_rate": 2.1846672276847673e-05, "loss": 0.6126, "step": 152555 }, { "epoch": 1.69, "learning_rate": 2.1845749549709157e-05, "loss": 0.6262, "step": 152560 }, { "epoch": 1.69, "learning_rate": 2.1844826822570645e-05, "loss": 0.5739, "step": 152565 }, { "epoch": 1.69, "learning_rate": 2.1843904095432133e-05, "loss": 0.6086, "step": 152570 }, { "epoch": 1.69, "learning_rate": 2.184298136829362e-05, "loss": 0.5987, "step": 152575 }, { "epoch": 1.69, "learning_rate": 2.184205864115511e-05, "loss": 0.6442, "step": 152580 }, { "epoch": 1.69, "learning_rate": 2.1841135914016593e-05, "loss": 0.6609, "step": 152585 }, { "epoch": 1.69, "learning_rate": 2.1840213186878084e-05, "loss": 0.6278, "step": 152590 }, { "epoch": 1.69, "learning_rate": 2.183929045973957e-05, "loss": 0.6464, "step": 152595 }, { "epoch": 1.69, "learning_rate": 2.183836773260106e-05, "loss": 0.6005, "step": 152600 }, { "epoch": 1.69, "learning_rate": 2.1837445005462544e-05, "loss": 0.6212, "step": 152605 }, { "epoch": 1.69, "learning_rate": 2.1836522278324036e-05, "loss": 0.6456, "step": 152610 }, { "epoch": 1.69, "learning_rate": 2.183559955118552e-05, "loss": 0.6696, "step": 152615 }, { "epoch": 1.69, "learning_rate": 2.1834676824047008e-05, "loss": 0.6847, "step": 152620 }, { "epoch": 1.69, "learning_rate": 2.1833754096908496e-05, "loss": 0.6011, "step": 152625 }, { "epoch": 1.69, "learning_rate": 2.1832831369769984e-05, "loss": 0.6356, "step": 152630 }, { "epoch": 1.69, "learning_rate": 2.183190864263147e-05, "loss": 0.5947, "step": 152635 }, { "epoch": 1.69, "learning_rate": 2.1830985915492956e-05, "loss": 0.6343, "step": 152640 }, { "epoch": 1.69, "learning_rate": 2.1830063188354447e-05, "loss": 0.6335, "step": 152645 }, { "epoch": 1.69, "learning_rate": 2.182914046121593e-05, "loss": 0.6782, "step": 152650 }, { "epoch": 1.69, "learning_rate": 2.1828217734077423e-05, "loss": 0.6183, "step": 152655 }, { "epoch": 1.69, "learning_rate": 2.1827295006938907e-05, "loss": 0.6504, "step": 152660 }, { "epoch": 1.69, "learning_rate": 2.18263722798004e-05, "loss": 0.6045, "step": 152665 }, { "epoch": 1.69, "learning_rate": 2.1825449552661883e-05, "loss": 0.6408, "step": 152670 }, { "epoch": 1.69, "learning_rate": 2.1824526825523374e-05, "loss": 0.5937, "step": 152675 }, { "epoch": 1.69, "learning_rate": 2.182360409838486e-05, "loss": 0.6642, "step": 152680 }, { "epoch": 1.69, "learning_rate": 2.1822681371246347e-05, "loss": 0.6274, "step": 152685 }, { "epoch": 1.69, "learning_rate": 2.1821758644107834e-05, "loss": 0.6239, "step": 152690 }, { "epoch": 1.69, "learning_rate": 2.1820835916969322e-05, "loss": 0.6436, "step": 152695 }, { "epoch": 1.69, "learning_rate": 2.181991318983081e-05, "loss": 0.6294, "step": 152700 }, { "epoch": 1.69, "learning_rate": 2.1818990462692298e-05, "loss": 0.6232, "step": 152705 }, { "epoch": 1.69, "learning_rate": 2.1818067735553786e-05, "loss": 0.6196, "step": 152710 }, { "epoch": 1.69, "learning_rate": 2.181714500841527e-05, "loss": 0.616, "step": 152715 }, { "epoch": 1.69, "learning_rate": 2.181622228127676e-05, "loss": 0.6436, "step": 152720 }, { "epoch": 1.69, "learning_rate": 2.1815299554138246e-05, "loss": 0.5907, "step": 152725 }, { "epoch": 1.69, "learning_rate": 2.1814376826999737e-05, "loss": 0.5727, "step": 152730 }, { "epoch": 1.69, "learning_rate": 2.181345409986122e-05, "loss": 0.6236, "step": 152735 }, { "epoch": 1.69, "learning_rate": 2.181253137272271e-05, "loss": 0.6073, "step": 152740 }, { "epoch": 1.69, "learning_rate": 2.1811608645584197e-05, "loss": 0.6222, "step": 152745 }, { "epoch": 1.69, "learning_rate": 2.1810685918445685e-05, "loss": 0.6286, "step": 152750 }, { "epoch": 1.69, "learning_rate": 2.1809763191307173e-05, "loss": 0.6138, "step": 152755 }, { "epoch": 1.69, "learning_rate": 2.180884046416866e-05, "loss": 0.6183, "step": 152760 }, { "epoch": 1.69, "learning_rate": 2.180791773703015e-05, "loss": 0.6151, "step": 152765 }, { "epoch": 1.69, "learning_rate": 2.1806995009891637e-05, "loss": 0.6379, "step": 152770 }, { "epoch": 1.69, "learning_rate": 2.1806072282753124e-05, "loss": 0.5631, "step": 152775 }, { "epoch": 1.69, "learning_rate": 2.1805149555614612e-05, "loss": 0.6808, "step": 152780 }, { "epoch": 1.69, "learning_rate": 2.18042268284761e-05, "loss": 0.6155, "step": 152785 }, { "epoch": 1.69, "learning_rate": 2.1803304101337585e-05, "loss": 0.6157, "step": 152790 }, { "epoch": 1.69, "learning_rate": 2.1802381374199072e-05, "loss": 0.6338, "step": 152795 }, { "epoch": 1.69, "learning_rate": 2.180145864706056e-05, "loss": 0.592, "step": 152800 }, { "epoch": 1.69, "learning_rate": 2.1800535919922048e-05, "loss": 0.657, "step": 152805 }, { "epoch": 1.69, "learning_rate": 2.1799613192783536e-05, "loss": 0.6743, "step": 152810 }, { "epoch": 1.69, "learning_rate": 2.1798690465645024e-05, "loss": 0.6317, "step": 152815 }, { "epoch": 1.69, "learning_rate": 2.1797767738506512e-05, "loss": 0.6384, "step": 152820 }, { "epoch": 1.69, "learning_rate": 2.1796845011368e-05, "loss": 0.5894, "step": 152825 }, { "epoch": 1.69, "learning_rate": 2.1795922284229487e-05, "loss": 0.6931, "step": 152830 }, { "epoch": 1.69, "learning_rate": 2.1794999557090975e-05, "loss": 0.6992, "step": 152835 }, { "epoch": 1.69, "learning_rate": 2.1794076829952463e-05, "loss": 0.681, "step": 152840 }, { "epoch": 1.69, "learning_rate": 2.179315410281395e-05, "loss": 0.6396, "step": 152845 }, { "epoch": 1.69, "learning_rate": 2.1792231375675435e-05, "loss": 0.6505, "step": 152850 }, { "epoch": 1.69, "learning_rate": 2.1791308648536927e-05, "loss": 0.6415, "step": 152855 }, { "epoch": 1.69, "learning_rate": 2.179038592139841e-05, "loss": 0.6767, "step": 152860 }, { "epoch": 1.69, "learning_rate": 2.17894631942599e-05, "loss": 0.6274, "step": 152865 }, { "epoch": 1.69, "learning_rate": 2.1788540467121387e-05, "loss": 0.6415, "step": 152870 }, { "epoch": 1.69, "learning_rate": 2.1787617739982875e-05, "loss": 0.6362, "step": 152875 }, { "epoch": 1.69, "learning_rate": 2.1786695012844363e-05, "loss": 0.5799, "step": 152880 }, { "epoch": 1.69, "learning_rate": 2.178577228570585e-05, "loss": 0.6598, "step": 152885 }, { "epoch": 1.69, "learning_rate": 2.1784849558567338e-05, "loss": 0.6577, "step": 152890 }, { "epoch": 1.69, "learning_rate": 2.1783926831428826e-05, "loss": 0.6047, "step": 152895 }, { "epoch": 1.69, "learning_rate": 2.1783004104290314e-05, "loss": 0.6998, "step": 152900 }, { "epoch": 1.69, "learning_rate": 2.1782081377151802e-05, "loss": 0.6765, "step": 152905 }, { "epoch": 1.69, "learning_rate": 2.178115865001329e-05, "loss": 0.6529, "step": 152910 }, { "epoch": 1.69, "learning_rate": 2.1780235922874774e-05, "loss": 0.5842, "step": 152915 }, { "epoch": 1.69, "learning_rate": 2.1779313195736265e-05, "loss": 0.5976, "step": 152920 }, { "epoch": 1.69, "learning_rate": 2.177839046859775e-05, "loss": 0.6526, "step": 152925 }, { "epoch": 1.69, "learning_rate": 2.177746774145924e-05, "loss": 0.6054, "step": 152930 }, { "epoch": 1.69, "learning_rate": 2.1776545014320725e-05, "loss": 0.5915, "step": 152935 }, { "epoch": 1.69, "learning_rate": 2.1775622287182217e-05, "loss": 0.6012, "step": 152940 }, { "epoch": 1.69, "learning_rate": 2.17746995600437e-05, "loss": 0.6293, "step": 152945 }, { "epoch": 1.69, "learning_rate": 2.177377683290519e-05, "loss": 0.5891, "step": 152950 }, { "epoch": 1.69, "learning_rate": 2.1772854105766677e-05, "loss": 0.6733, "step": 152955 }, { "epoch": 1.69, "learning_rate": 2.1771931378628165e-05, "loss": 0.5962, "step": 152960 }, { "epoch": 1.69, "learning_rate": 2.1771008651489653e-05, "loss": 0.6157, "step": 152965 }, { "epoch": 1.69, "learning_rate": 2.1770085924351137e-05, "loss": 0.6146, "step": 152970 }, { "epoch": 1.69, "learning_rate": 2.1769163197212628e-05, "loss": 0.6591, "step": 152975 }, { "epoch": 1.69, "learning_rate": 2.1768240470074113e-05, "loss": 0.6471, "step": 152980 }, { "epoch": 1.69, "learning_rate": 2.1767317742935604e-05, "loss": 0.5825, "step": 152985 }, { "epoch": 1.69, "learning_rate": 2.176639501579709e-05, "loss": 0.6184, "step": 152990 }, { "epoch": 1.69, "learning_rate": 2.176547228865858e-05, "loss": 0.6362, "step": 152995 }, { "epoch": 1.69, "learning_rate": 2.1764549561520064e-05, "loss": 0.6227, "step": 153000 }, { "epoch": 1.69, "eval_loss": 0.5802493691444397, "eval_runtime": 69.3179, "eval_samples_per_second": 28.853, "eval_steps_per_second": 14.426, "step": 153000 }, { "epoch": 1.69, "learning_rate": 2.1763626834381552e-05, "loss": 0.5721, "step": 153005 }, { "epoch": 1.69, "learning_rate": 2.176270410724304e-05, "loss": 0.6216, "step": 153010 }, { "epoch": 1.69, "learning_rate": 2.1761781380104528e-05, "loss": 0.6038, "step": 153015 }, { "epoch": 1.69, "learning_rate": 2.1760858652966015e-05, "loss": 0.6322, "step": 153020 }, { "epoch": 1.69, "learning_rate": 2.17599359258275e-05, "loss": 0.6539, "step": 153025 }, { "epoch": 1.69, "learning_rate": 2.175901319868899e-05, "loss": 0.6262, "step": 153030 }, { "epoch": 1.69, "learning_rate": 2.1758090471550476e-05, "loss": 0.6499, "step": 153035 }, { "epoch": 1.69, "learning_rate": 2.1757167744411967e-05, "loss": 0.6492, "step": 153040 }, { "epoch": 1.69, "learning_rate": 2.175624501727345e-05, "loss": 0.5942, "step": 153045 }, { "epoch": 1.69, "learning_rate": 2.1755322290134943e-05, "loss": 0.6525, "step": 153050 }, { "epoch": 1.69, "learning_rate": 2.1754399562996427e-05, "loss": 0.6141, "step": 153055 }, { "epoch": 1.69, "learning_rate": 2.1753476835857918e-05, "loss": 0.6344, "step": 153060 }, { "epoch": 1.69, "learning_rate": 2.1752554108719403e-05, "loss": 0.6247, "step": 153065 }, { "epoch": 1.69, "learning_rate": 2.175163138158089e-05, "loss": 0.6366, "step": 153070 }, { "epoch": 1.69, "learning_rate": 2.175070865444238e-05, "loss": 0.6425, "step": 153075 }, { "epoch": 1.7, "learning_rate": 2.1749785927303866e-05, "loss": 0.649, "step": 153080 }, { "epoch": 1.7, "learning_rate": 2.1748863200165354e-05, "loss": 0.6275, "step": 153085 }, { "epoch": 1.7, "learning_rate": 2.1747940473026842e-05, "loss": 0.623, "step": 153090 }, { "epoch": 1.7, "learning_rate": 2.174701774588833e-05, "loss": 0.6425, "step": 153095 }, { "epoch": 1.7, "learning_rate": 2.1746095018749814e-05, "loss": 0.6215, "step": 153100 }, { "epoch": 1.7, "learning_rate": 2.1745172291611306e-05, "loss": 0.6227, "step": 153105 }, { "epoch": 1.7, "learning_rate": 2.174424956447279e-05, "loss": 0.5435, "step": 153110 }, { "epoch": 1.7, "learning_rate": 2.174332683733428e-05, "loss": 0.6561, "step": 153115 }, { "epoch": 1.7, "learning_rate": 2.1742404110195766e-05, "loss": 0.6726, "step": 153120 }, { "epoch": 1.7, "learning_rate": 2.1741481383057254e-05, "loss": 0.6479, "step": 153125 }, { "epoch": 1.7, "learning_rate": 2.174055865591874e-05, "loss": 0.6166, "step": 153130 }, { "epoch": 1.7, "learning_rate": 2.173963592878023e-05, "loss": 0.6155, "step": 153135 }, { "epoch": 1.7, "learning_rate": 2.1738713201641717e-05, "loss": 0.6277, "step": 153140 }, { "epoch": 1.7, "learning_rate": 2.1737790474503205e-05, "loss": 0.6162, "step": 153145 }, { "epoch": 1.7, "learning_rate": 2.1736867747364693e-05, "loss": 0.5945, "step": 153150 }, { "epoch": 1.7, "learning_rate": 2.173594502022618e-05, "loss": 0.634, "step": 153155 }, { "epoch": 1.7, "learning_rate": 2.173502229308767e-05, "loss": 0.6559, "step": 153160 }, { "epoch": 1.7, "learning_rate": 2.1734099565949156e-05, "loss": 0.5748, "step": 153165 }, { "epoch": 1.7, "learning_rate": 2.1733176838810644e-05, "loss": 0.6628, "step": 153170 }, { "epoch": 1.7, "learning_rate": 2.173225411167213e-05, "loss": 0.6108, "step": 153175 }, { "epoch": 1.7, "learning_rate": 2.1731331384533616e-05, "loss": 0.5968, "step": 153180 }, { "epoch": 1.7, "learning_rate": 2.1730408657395104e-05, "loss": 0.6497, "step": 153185 }, { "epoch": 1.7, "learning_rate": 2.1729485930256592e-05, "loss": 0.6824, "step": 153190 }, { "epoch": 1.7, "learning_rate": 2.172856320311808e-05, "loss": 0.6181, "step": 153195 }, { "epoch": 1.7, "learning_rate": 2.1727640475979568e-05, "loss": 0.5418, "step": 153200 }, { "epoch": 1.7, "learning_rate": 2.1726717748841056e-05, "loss": 0.662, "step": 153205 }, { "epoch": 1.7, "learning_rate": 2.1725795021702544e-05, "loss": 0.6504, "step": 153210 }, { "epoch": 1.7, "learning_rate": 2.172487229456403e-05, "loss": 0.6466, "step": 153215 }, { "epoch": 1.7, "learning_rate": 2.172394956742552e-05, "loss": 0.6251, "step": 153220 }, { "epoch": 1.7, "learning_rate": 2.1723026840287007e-05, "loss": 0.6347, "step": 153225 }, { "epoch": 1.7, "learning_rate": 2.1722104113148495e-05, "loss": 0.6297, "step": 153230 }, { "epoch": 1.7, "learning_rate": 2.172118138600998e-05, "loss": 0.6583, "step": 153235 }, { "epoch": 1.7, "learning_rate": 2.172025865887147e-05, "loss": 0.6515, "step": 153240 }, { "epoch": 1.7, "learning_rate": 2.1719335931732955e-05, "loss": 0.6243, "step": 153245 }, { "epoch": 1.7, "learning_rate": 2.1718413204594443e-05, "loss": 0.6037, "step": 153250 }, { "epoch": 1.7, "learning_rate": 2.171749047745593e-05, "loss": 0.6189, "step": 153255 }, { "epoch": 1.7, "learning_rate": 2.171656775031742e-05, "loss": 0.572, "step": 153260 }, { "epoch": 1.7, "learning_rate": 2.1715645023178907e-05, "loss": 0.5965, "step": 153265 }, { "epoch": 1.7, "learning_rate": 2.1714722296040394e-05, "loss": 0.5995, "step": 153270 }, { "epoch": 1.7, "learning_rate": 2.1713799568901882e-05, "loss": 0.6716, "step": 153275 }, { "epoch": 1.7, "learning_rate": 2.171287684176337e-05, "loss": 0.6338, "step": 153280 }, { "epoch": 1.7, "learning_rate": 2.1711954114624858e-05, "loss": 0.6019, "step": 153285 }, { "epoch": 1.7, "learning_rate": 2.1711031387486346e-05, "loss": 0.6247, "step": 153290 }, { "epoch": 1.7, "learning_rate": 2.1710108660347834e-05, "loss": 0.6409, "step": 153295 }, { "epoch": 1.7, "learning_rate": 2.1709185933209318e-05, "loss": 0.6129, "step": 153300 }, { "epoch": 1.7, "learning_rate": 2.170826320607081e-05, "loss": 0.6441, "step": 153305 }, { "epoch": 1.7, "learning_rate": 2.1707340478932294e-05, "loss": 0.6034, "step": 153310 }, { "epoch": 1.7, "learning_rate": 2.1706417751793785e-05, "loss": 0.5845, "step": 153315 }, { "epoch": 1.7, "learning_rate": 2.170549502465527e-05, "loss": 0.6473, "step": 153320 }, { "epoch": 1.7, "learning_rate": 2.1704572297516757e-05, "loss": 0.6414, "step": 153325 }, { "epoch": 1.7, "learning_rate": 2.1703649570378245e-05, "loss": 0.6501, "step": 153330 }, { "epoch": 1.7, "learning_rate": 2.1702726843239733e-05, "loss": 0.6606, "step": 153335 }, { "epoch": 1.7, "learning_rate": 2.170180411610122e-05, "loss": 0.6122, "step": 153340 }, { "epoch": 1.7, "learning_rate": 2.170088138896271e-05, "loss": 0.6475, "step": 153345 }, { "epoch": 1.7, "learning_rate": 2.1699958661824197e-05, "loss": 0.6403, "step": 153350 }, { "epoch": 1.7, "learning_rate": 2.169903593468568e-05, "loss": 0.6331, "step": 153355 }, { "epoch": 1.7, "learning_rate": 2.1698113207547172e-05, "loss": 0.6089, "step": 153360 }, { "epoch": 1.7, "learning_rate": 2.1697190480408657e-05, "loss": 0.6329, "step": 153365 }, { "epoch": 1.7, "learning_rate": 2.1696267753270148e-05, "loss": 0.6564, "step": 153370 }, { "epoch": 1.7, "learning_rate": 2.1695345026131632e-05, "loss": 0.6054, "step": 153375 }, { "epoch": 1.7, "learning_rate": 2.1694422298993124e-05, "loss": 0.6526, "step": 153380 }, { "epoch": 1.7, "learning_rate": 2.1693499571854608e-05, "loss": 0.6517, "step": 153385 }, { "epoch": 1.7, "learning_rate": 2.1692576844716096e-05, "loss": 0.631, "step": 153390 }, { "epoch": 1.7, "learning_rate": 2.1691654117577584e-05, "loss": 0.6713, "step": 153395 }, { "epoch": 1.7, "learning_rate": 2.169073139043907e-05, "loss": 0.6595, "step": 153400 }, { "epoch": 1.7, "learning_rate": 2.168980866330056e-05, "loss": 0.6264, "step": 153405 }, { "epoch": 1.7, "learning_rate": 2.1688885936162044e-05, "loss": 0.6391, "step": 153410 }, { "epoch": 1.7, "learning_rate": 2.1687963209023535e-05, "loss": 0.6316, "step": 153415 }, { "epoch": 1.7, "learning_rate": 2.168704048188502e-05, "loss": 0.5988, "step": 153420 }, { "epoch": 1.7, "learning_rate": 2.168611775474651e-05, "loss": 0.6301, "step": 153425 }, { "epoch": 1.7, "learning_rate": 2.1685195027607995e-05, "loss": 0.6475, "step": 153430 }, { "epoch": 1.7, "learning_rate": 2.1684272300469487e-05, "loss": 0.6326, "step": 153435 }, { "epoch": 1.7, "learning_rate": 2.168334957333097e-05, "loss": 0.6153, "step": 153440 }, { "epoch": 1.7, "learning_rate": 2.1682426846192462e-05, "loss": 0.6348, "step": 153445 }, { "epoch": 1.7, "learning_rate": 2.1681504119053947e-05, "loss": 0.6306, "step": 153450 }, { "epoch": 1.7, "learning_rate": 2.1680581391915435e-05, "loss": 0.6187, "step": 153455 }, { "epoch": 1.7, "learning_rate": 2.1679658664776922e-05, "loss": 0.6662, "step": 153460 }, { "epoch": 1.7, "learning_rate": 2.167873593763841e-05, "loss": 0.5742, "step": 153465 }, { "epoch": 1.7, "learning_rate": 2.1677813210499898e-05, "loss": 0.6997, "step": 153470 }, { "epoch": 1.7, "learning_rate": 2.1676890483361383e-05, "loss": 0.6313, "step": 153475 }, { "epoch": 1.7, "learning_rate": 2.1675967756222874e-05, "loss": 0.6167, "step": 153480 }, { "epoch": 1.7, "learning_rate": 2.167504502908436e-05, "loss": 0.6283, "step": 153485 }, { "epoch": 1.7, "learning_rate": 2.167412230194585e-05, "loss": 0.6716, "step": 153490 }, { "epoch": 1.7, "learning_rate": 2.1673199574807334e-05, "loss": 0.6285, "step": 153495 }, { "epoch": 1.7, "learning_rate": 2.1672276847668825e-05, "loss": 0.6189, "step": 153500 }, { "epoch": 1.7, "learning_rate": 2.167135412053031e-05, "loss": 0.6321, "step": 153505 }, { "epoch": 1.7, "learning_rate": 2.1670431393391798e-05, "loss": 0.6231, "step": 153510 }, { "epoch": 1.7, "learning_rate": 2.1669508666253285e-05, "loss": 0.5707, "step": 153515 }, { "epoch": 1.7, "learning_rate": 2.1668585939114773e-05, "loss": 0.6285, "step": 153520 }, { "epoch": 1.7, "learning_rate": 2.166766321197626e-05, "loss": 0.6969, "step": 153525 }, { "epoch": 1.7, "learning_rate": 2.166674048483775e-05, "loss": 0.6715, "step": 153530 }, { "epoch": 1.7, "learning_rate": 2.1665817757699237e-05, "loss": 0.595, "step": 153535 }, { "epoch": 1.7, "learning_rate": 2.1664895030560725e-05, "loss": 0.618, "step": 153540 }, { "epoch": 1.7, "learning_rate": 2.1663972303422212e-05, "loss": 0.6157, "step": 153545 }, { "epoch": 1.7, "learning_rate": 2.1663049576283697e-05, "loss": 0.6007, "step": 153550 }, { "epoch": 1.7, "learning_rate": 2.1662126849145188e-05, "loss": 0.633, "step": 153555 }, { "epoch": 1.7, "learning_rate": 2.1661204122006673e-05, "loss": 0.6344, "step": 153560 }, { "epoch": 1.7, "learning_rate": 2.166028139486816e-05, "loss": 0.6359, "step": 153565 }, { "epoch": 1.7, "learning_rate": 2.165935866772965e-05, "loss": 0.5861, "step": 153570 }, { "epoch": 1.7, "learning_rate": 2.1658435940591136e-05, "loss": 0.6291, "step": 153575 }, { "epoch": 1.7, "learning_rate": 2.1657513213452624e-05, "loss": 0.5962, "step": 153580 }, { "epoch": 1.7, "learning_rate": 2.1656590486314112e-05, "loss": 0.6463, "step": 153585 }, { "epoch": 1.7, "learning_rate": 2.16556677591756e-05, "loss": 0.5959, "step": 153590 }, { "epoch": 1.7, "learning_rate": 2.1654745032037088e-05, "loss": 0.6819, "step": 153595 }, { "epoch": 1.7, "learning_rate": 2.1653822304898575e-05, "loss": 0.5887, "step": 153600 }, { "epoch": 1.7, "learning_rate": 2.1652899577760063e-05, "loss": 0.6441, "step": 153605 }, { "epoch": 1.7, "learning_rate": 2.165197685062155e-05, "loss": 0.6048, "step": 153610 }, { "epoch": 1.7, "learning_rate": 2.165105412348304e-05, "loss": 0.5877, "step": 153615 }, { "epoch": 1.7, "learning_rate": 2.1650131396344523e-05, "loss": 0.6509, "step": 153620 }, { "epoch": 1.7, "learning_rate": 2.164920866920601e-05, "loss": 0.6445, "step": 153625 }, { "epoch": 1.7, "learning_rate": 2.16482859420675e-05, "loss": 0.5989, "step": 153630 }, { "epoch": 1.7, "learning_rate": 2.1647363214928987e-05, "loss": 0.6645, "step": 153635 }, { "epoch": 1.7, "learning_rate": 2.1646440487790475e-05, "loss": 0.6582, "step": 153640 }, { "epoch": 1.7, "learning_rate": 2.1645517760651963e-05, "loss": 0.5773, "step": 153645 }, { "epoch": 1.7, "learning_rate": 2.164459503351345e-05, "loss": 0.673, "step": 153650 }, { "epoch": 1.7, "learning_rate": 2.164367230637494e-05, "loss": 0.6557, "step": 153655 }, { "epoch": 1.7, "learning_rate": 2.1642749579236426e-05, "loss": 0.5956, "step": 153660 }, { "epoch": 1.7, "learning_rate": 2.1641826852097914e-05, "loss": 0.6689, "step": 153665 }, { "epoch": 1.7, "learning_rate": 2.1640904124959402e-05, "loss": 0.6097, "step": 153670 }, { "epoch": 1.7, "learning_rate": 2.163998139782089e-05, "loss": 0.6107, "step": 153675 }, { "epoch": 1.7, "learning_rate": 2.1639058670682378e-05, "loss": 0.6301, "step": 153680 }, { "epoch": 1.7, "learning_rate": 2.1638135943543862e-05, "loss": 0.6194, "step": 153685 }, { "epoch": 1.7, "learning_rate": 2.1637213216405353e-05, "loss": 0.6093, "step": 153690 }, { "epoch": 1.7, "learning_rate": 2.1636290489266838e-05, "loss": 0.6675, "step": 153695 }, { "epoch": 1.7, "learning_rate": 2.1635367762128326e-05, "loss": 0.6402, "step": 153700 }, { "epoch": 1.7, "learning_rate": 2.1634445034989813e-05, "loss": 0.6956, "step": 153705 }, { "epoch": 1.7, "learning_rate": 2.16335223078513e-05, "loss": 0.6056, "step": 153710 }, { "epoch": 1.7, "learning_rate": 2.163259958071279e-05, "loss": 0.6202, "step": 153715 }, { "epoch": 1.7, "learning_rate": 2.1631676853574277e-05, "loss": 0.6374, "step": 153720 }, { "epoch": 1.7, "learning_rate": 2.1630754126435765e-05, "loss": 0.6549, "step": 153725 }, { "epoch": 1.7, "learning_rate": 2.1629831399297253e-05, "loss": 0.6049, "step": 153730 }, { "epoch": 1.7, "learning_rate": 2.162890867215874e-05, "loss": 0.6005, "step": 153735 }, { "epoch": 1.7, "learning_rate": 2.1627985945020225e-05, "loss": 0.6188, "step": 153740 }, { "epoch": 1.7, "learning_rate": 2.1627063217881716e-05, "loss": 0.6502, "step": 153745 }, { "epoch": 1.7, "learning_rate": 2.16261404907432e-05, "loss": 0.6709, "step": 153750 }, { "epoch": 1.7, "learning_rate": 2.1625217763604692e-05, "loss": 0.6343, "step": 153755 }, { "epoch": 1.7, "learning_rate": 2.1624295036466176e-05, "loss": 0.5771, "step": 153760 }, { "epoch": 1.7, "learning_rate": 2.1623372309327668e-05, "loss": 0.6155, "step": 153765 }, { "epoch": 1.7, "learning_rate": 2.1622449582189152e-05, "loss": 0.6757, "step": 153770 }, { "epoch": 1.7, "learning_rate": 2.1621526855050643e-05, "loss": 0.6146, "step": 153775 }, { "epoch": 1.7, "learning_rate": 2.1620604127912128e-05, "loss": 0.6148, "step": 153780 }, { "epoch": 1.7, "learning_rate": 2.1619681400773616e-05, "loss": 0.6276, "step": 153785 }, { "epoch": 1.7, "learning_rate": 2.1618758673635104e-05, "loss": 0.6194, "step": 153790 }, { "epoch": 1.7, "learning_rate": 2.1617835946496588e-05, "loss": 0.5882, "step": 153795 }, { "epoch": 1.7, "learning_rate": 2.161691321935808e-05, "loss": 0.5891, "step": 153800 }, { "epoch": 1.7, "learning_rate": 2.1615990492219564e-05, "loss": 0.5623, "step": 153805 }, { "epoch": 1.7, "learning_rate": 2.1615067765081055e-05, "loss": 0.6876, "step": 153810 }, { "epoch": 1.7, "learning_rate": 2.161414503794254e-05, "loss": 0.6203, "step": 153815 }, { "epoch": 1.7, "learning_rate": 2.161322231080403e-05, "loss": 0.6528, "step": 153820 }, { "epoch": 1.7, "learning_rate": 2.1612299583665515e-05, "loss": 0.5849, "step": 153825 }, { "epoch": 1.7, "learning_rate": 2.1611376856527006e-05, "loss": 0.6797, "step": 153830 }, { "epoch": 1.7, "learning_rate": 2.161045412938849e-05, "loss": 0.6778, "step": 153835 }, { "epoch": 1.7, "learning_rate": 2.160953140224998e-05, "loss": 0.6362, "step": 153840 }, { "epoch": 1.7, "learning_rate": 2.1608608675111466e-05, "loss": 0.6215, "step": 153845 }, { "epoch": 1.7, "learning_rate": 2.1607685947972954e-05, "loss": 0.6551, "step": 153850 }, { "epoch": 1.7, "learning_rate": 2.1606763220834442e-05, "loss": 0.6268, "step": 153855 }, { "epoch": 1.7, "learning_rate": 2.1605840493695927e-05, "loss": 0.5961, "step": 153860 }, { "epoch": 1.7, "learning_rate": 2.1604917766557418e-05, "loss": 0.5925, "step": 153865 }, { "epoch": 1.7, "learning_rate": 2.1603995039418902e-05, "loss": 0.6368, "step": 153870 }, { "epoch": 1.7, "learning_rate": 2.1603072312280394e-05, "loss": 0.5958, "step": 153875 }, { "epoch": 1.7, "learning_rate": 2.1602149585141878e-05, "loss": 0.6387, "step": 153880 }, { "epoch": 1.7, "learning_rate": 2.160122685800337e-05, "loss": 0.7141, "step": 153885 }, { "epoch": 1.7, "learning_rate": 2.1600304130864854e-05, "loss": 0.6083, "step": 153890 }, { "epoch": 1.7, "learning_rate": 2.159938140372634e-05, "loss": 0.6521, "step": 153895 }, { "epoch": 1.7, "learning_rate": 2.159845867658783e-05, "loss": 0.6469, "step": 153900 }, { "epoch": 1.7, "learning_rate": 2.1597535949449317e-05, "loss": 0.6276, "step": 153905 }, { "epoch": 1.7, "learning_rate": 2.1596613222310805e-05, "loss": 0.6061, "step": 153910 }, { "epoch": 1.7, "learning_rate": 2.1595690495172293e-05, "loss": 0.6627, "step": 153915 }, { "epoch": 1.7, "learning_rate": 2.159476776803378e-05, "loss": 0.5788, "step": 153920 }, { "epoch": 1.7, "learning_rate": 2.159384504089527e-05, "loss": 0.6469, "step": 153925 }, { "epoch": 1.7, "learning_rate": 2.1592922313756757e-05, "loss": 0.6174, "step": 153930 }, { "epoch": 1.7, "learning_rate": 2.159199958661824e-05, "loss": 0.6457, "step": 153935 }, { "epoch": 1.7, "learning_rate": 2.1591076859479732e-05, "loss": 0.6274, "step": 153940 }, { "epoch": 1.7, "learning_rate": 2.1590154132341217e-05, "loss": 0.6305, "step": 153945 }, { "epoch": 1.7, "learning_rate": 2.1589231405202705e-05, "loss": 0.6852, "step": 153950 }, { "epoch": 1.7, "learning_rate": 2.1588308678064192e-05, "loss": 0.6083, "step": 153955 }, { "epoch": 1.7, "learning_rate": 2.158738595092568e-05, "loss": 0.6173, "step": 153960 }, { "epoch": 1.7, "learning_rate": 2.1586463223787168e-05, "loss": 0.6496, "step": 153965 }, { "epoch": 1.7, "learning_rate": 2.1585540496648656e-05, "loss": 0.5969, "step": 153970 }, { "epoch": 1.7, "learning_rate": 2.1584617769510144e-05, "loss": 0.6212, "step": 153975 }, { "epoch": 1.7, "learning_rate": 2.158369504237163e-05, "loss": 0.6584, "step": 153980 }, { "epoch": 1.71, "learning_rate": 2.158277231523312e-05, "loss": 0.6289, "step": 153985 }, { "epoch": 1.71, "learning_rate": 2.1581849588094607e-05, "loss": 0.6364, "step": 153990 }, { "epoch": 1.71, "learning_rate": 2.1580926860956095e-05, "loss": 0.6026, "step": 153995 }, { "epoch": 1.71, "learning_rate": 2.1580004133817583e-05, "loss": 0.6699, "step": 154000 }, { "epoch": 1.71, "eval_loss": 0.606746256351471, "eval_runtime": 69.2143, "eval_samples_per_second": 28.896, "eval_steps_per_second": 14.448, "step": 154000 }, { "epoch": 1.71, "learning_rate": 2.157908140667907e-05, "loss": 0.6508, "step": 154005 }, { "epoch": 1.71, "learning_rate": 2.1578158679540555e-05, "loss": 0.642, "step": 154010 }, { "epoch": 1.71, "learning_rate": 2.1577235952402043e-05, "loss": 0.615, "step": 154015 }, { "epoch": 1.71, "learning_rate": 2.157631322526353e-05, "loss": 0.6101, "step": 154020 }, { "epoch": 1.71, "learning_rate": 2.157539049812502e-05, "loss": 0.6293, "step": 154025 }, { "epoch": 1.71, "learning_rate": 2.1574467770986507e-05, "loss": 0.6622, "step": 154030 }, { "epoch": 1.71, "learning_rate": 2.1573545043847995e-05, "loss": 0.5686, "step": 154035 }, { "epoch": 1.71, "learning_rate": 2.1572622316709482e-05, "loss": 0.6199, "step": 154040 }, { "epoch": 1.71, "learning_rate": 2.157169958957097e-05, "loss": 0.6716, "step": 154045 }, { "epoch": 1.71, "learning_rate": 2.1570776862432458e-05, "loss": 0.6348, "step": 154050 }, { "epoch": 1.71, "learning_rate": 2.1569854135293946e-05, "loss": 0.6757, "step": 154055 }, { "epoch": 1.71, "learning_rate": 2.1568931408155434e-05, "loss": 0.6483, "step": 154060 }, { "epoch": 1.71, "learning_rate": 2.156800868101692e-05, "loss": 0.6141, "step": 154065 }, { "epoch": 1.71, "learning_rate": 2.1567085953878406e-05, "loss": 0.6117, "step": 154070 }, { "epoch": 1.71, "learning_rate": 2.1566163226739897e-05, "loss": 0.536, "step": 154075 }, { "epoch": 1.71, "learning_rate": 2.1565240499601382e-05, "loss": 0.6174, "step": 154080 }, { "epoch": 1.71, "learning_rate": 2.156431777246287e-05, "loss": 0.637, "step": 154085 }, { "epoch": 1.71, "learning_rate": 2.1563395045324358e-05, "loss": 0.6838, "step": 154090 }, { "epoch": 1.71, "learning_rate": 2.1562472318185845e-05, "loss": 0.6176, "step": 154095 }, { "epoch": 1.71, "learning_rate": 2.1561549591047333e-05, "loss": 0.6861, "step": 154100 }, { "epoch": 1.71, "learning_rate": 2.156062686390882e-05, "loss": 0.6074, "step": 154105 }, { "epoch": 1.71, "learning_rate": 2.155970413677031e-05, "loss": 0.6737, "step": 154110 }, { "epoch": 1.71, "learning_rate": 2.1558781409631797e-05, "loss": 0.5605, "step": 154115 }, { "epoch": 1.71, "learning_rate": 2.1557858682493285e-05, "loss": 0.6926, "step": 154120 }, { "epoch": 1.71, "learning_rate": 2.155693595535477e-05, "loss": 0.6863, "step": 154125 }, { "epoch": 1.71, "learning_rate": 2.155601322821626e-05, "loss": 0.6136, "step": 154130 }, { "epoch": 1.71, "learning_rate": 2.1555090501077745e-05, "loss": 0.6441, "step": 154135 }, { "epoch": 1.71, "learning_rate": 2.1554167773939236e-05, "loss": 0.601, "step": 154140 }, { "epoch": 1.71, "learning_rate": 2.155324504680072e-05, "loss": 0.5928, "step": 154145 }, { "epoch": 1.71, "learning_rate": 2.155232231966221e-05, "loss": 0.5652, "step": 154150 }, { "epoch": 1.71, "learning_rate": 2.1551399592523696e-05, "loss": 0.6626, "step": 154155 }, { "epoch": 1.71, "learning_rate": 2.1550476865385184e-05, "loss": 0.6341, "step": 154160 }, { "epoch": 1.71, "learning_rate": 2.1549554138246672e-05, "loss": 0.5929, "step": 154165 }, { "epoch": 1.71, "learning_rate": 2.154863141110816e-05, "loss": 0.6832, "step": 154170 }, { "epoch": 1.71, "learning_rate": 2.1547708683969648e-05, "loss": 0.6703, "step": 154175 }, { "epoch": 1.71, "learning_rate": 2.1546785956831132e-05, "loss": 0.6653, "step": 154180 }, { "epoch": 1.71, "learning_rate": 2.1545863229692623e-05, "loss": 0.6619, "step": 154185 }, { "epoch": 1.71, "learning_rate": 2.1544940502554108e-05, "loss": 0.5989, "step": 154190 }, { "epoch": 1.71, "learning_rate": 2.15440177754156e-05, "loss": 0.5837, "step": 154195 }, { "epoch": 1.71, "learning_rate": 2.1543095048277083e-05, "loss": 0.6102, "step": 154200 }, { "epoch": 1.71, "learning_rate": 2.1542172321138575e-05, "loss": 0.6026, "step": 154205 }, { "epoch": 1.71, "learning_rate": 2.154124959400006e-05, "loss": 0.6155, "step": 154210 }, { "epoch": 1.71, "learning_rate": 2.154032686686155e-05, "loss": 0.6466, "step": 154215 }, { "epoch": 1.71, "learning_rate": 2.1539404139723035e-05, "loss": 0.6431, "step": 154220 }, { "epoch": 1.71, "learning_rate": 2.1538481412584523e-05, "loss": 0.6527, "step": 154225 }, { "epoch": 1.71, "learning_rate": 2.153755868544601e-05, "loss": 0.6445, "step": 154230 }, { "epoch": 1.71, "learning_rate": 2.15366359583075e-05, "loss": 0.616, "step": 154235 }, { "epoch": 1.71, "learning_rate": 2.1535713231168986e-05, "loss": 0.62, "step": 154240 }, { "epoch": 1.71, "learning_rate": 2.153479050403047e-05, "loss": 0.6747, "step": 154245 }, { "epoch": 1.71, "learning_rate": 2.1533867776891962e-05, "loss": 0.6365, "step": 154250 }, { "epoch": 1.71, "learning_rate": 2.1532945049753446e-05, "loss": 0.6263, "step": 154255 }, { "epoch": 1.71, "learning_rate": 2.1532022322614938e-05, "loss": 0.6686, "step": 154260 }, { "epoch": 1.71, "learning_rate": 2.1531099595476422e-05, "loss": 0.5872, "step": 154265 }, { "epoch": 1.71, "learning_rate": 2.1530176868337913e-05, "loss": 0.6336, "step": 154270 }, { "epoch": 1.71, "learning_rate": 2.1529254141199398e-05, "loss": 0.6121, "step": 154275 }, { "epoch": 1.71, "learning_rate": 2.1528331414060886e-05, "loss": 0.6378, "step": 154280 }, { "epoch": 1.71, "learning_rate": 2.1527408686922373e-05, "loss": 0.6387, "step": 154285 }, { "epoch": 1.71, "learning_rate": 2.152648595978386e-05, "loss": 0.615, "step": 154290 }, { "epoch": 1.71, "learning_rate": 2.152556323264535e-05, "loss": 0.6859, "step": 154295 }, { "epoch": 1.71, "learning_rate": 2.1524640505506837e-05, "loss": 0.6604, "step": 154300 }, { "epoch": 1.71, "learning_rate": 2.1523717778368325e-05, "loss": 0.593, "step": 154305 }, { "epoch": 1.71, "learning_rate": 2.152279505122981e-05, "loss": 0.6427, "step": 154310 }, { "epoch": 1.71, "learning_rate": 2.15218723240913e-05, "loss": 0.585, "step": 154315 }, { "epoch": 1.71, "learning_rate": 2.1520949596952785e-05, "loss": 0.6509, "step": 154320 }, { "epoch": 1.71, "learning_rate": 2.1520026869814276e-05, "loss": 0.6549, "step": 154325 }, { "epoch": 1.71, "learning_rate": 2.151910414267576e-05, "loss": 0.6761, "step": 154330 }, { "epoch": 1.71, "learning_rate": 2.151818141553725e-05, "loss": 0.6091, "step": 154335 }, { "epoch": 1.71, "learning_rate": 2.1517258688398736e-05, "loss": 0.6404, "step": 154340 }, { "epoch": 1.71, "learning_rate": 2.1516335961260224e-05, "loss": 0.6895, "step": 154345 }, { "epoch": 1.71, "learning_rate": 2.1515413234121712e-05, "loss": 0.6701, "step": 154350 }, { "epoch": 1.71, "learning_rate": 2.15144905069832e-05, "loss": 0.5846, "step": 154355 }, { "epoch": 1.71, "learning_rate": 2.1513567779844688e-05, "loss": 0.681, "step": 154360 }, { "epoch": 1.71, "learning_rate": 2.1512645052706176e-05, "loss": 0.6555, "step": 154365 }, { "epoch": 1.71, "learning_rate": 2.1511722325567663e-05, "loss": 0.6311, "step": 154370 }, { "epoch": 1.71, "learning_rate": 2.151079959842915e-05, "loss": 0.613, "step": 154375 }, { "epoch": 1.71, "learning_rate": 2.150987687129064e-05, "loss": 0.6281, "step": 154380 }, { "epoch": 1.71, "learning_rate": 2.1508954144152124e-05, "loss": 0.6227, "step": 154385 }, { "epoch": 1.71, "learning_rate": 2.1508031417013615e-05, "loss": 0.6156, "step": 154390 }, { "epoch": 1.71, "learning_rate": 2.15071086898751e-05, "loss": 0.6429, "step": 154395 }, { "epoch": 1.71, "learning_rate": 2.1506185962736587e-05, "loss": 0.603, "step": 154400 }, { "epoch": 1.71, "learning_rate": 2.1505263235598075e-05, "loss": 0.6325, "step": 154405 }, { "epoch": 1.71, "learning_rate": 2.1504340508459563e-05, "loss": 0.5644, "step": 154410 }, { "epoch": 1.71, "learning_rate": 2.150341778132105e-05, "loss": 0.5942, "step": 154415 }, { "epoch": 1.71, "learning_rate": 2.150249505418254e-05, "loss": 0.6576, "step": 154420 }, { "epoch": 1.71, "learning_rate": 2.1501572327044026e-05, "loss": 0.6173, "step": 154425 }, { "epoch": 1.71, "learning_rate": 2.1500649599905514e-05, "loss": 0.5899, "step": 154430 }, { "epoch": 1.71, "learning_rate": 2.1499726872767002e-05, "loss": 0.6055, "step": 154435 }, { "epoch": 1.71, "learning_rate": 2.149880414562849e-05, "loss": 0.6562, "step": 154440 }, { "epoch": 1.71, "learning_rate": 2.1497881418489978e-05, "loss": 0.5781, "step": 154445 }, { "epoch": 1.71, "learning_rate": 2.1496958691351466e-05, "loss": 0.6342, "step": 154450 }, { "epoch": 1.71, "learning_rate": 2.149603596421295e-05, "loss": 0.6831, "step": 154455 }, { "epoch": 1.71, "learning_rate": 2.1495113237074438e-05, "loss": 0.6283, "step": 154460 }, { "epoch": 1.71, "learning_rate": 2.1494190509935926e-05, "loss": 0.6171, "step": 154465 }, { "epoch": 1.71, "learning_rate": 2.1493267782797414e-05, "loss": 0.5963, "step": 154470 }, { "epoch": 1.71, "learning_rate": 2.14923450556589e-05, "loss": 0.5658, "step": 154475 }, { "epoch": 1.71, "learning_rate": 2.149142232852039e-05, "loss": 0.6557, "step": 154480 }, { "epoch": 1.71, "learning_rate": 2.1490499601381877e-05, "loss": 0.6708, "step": 154485 }, { "epoch": 1.71, "learning_rate": 2.1489576874243365e-05, "loss": 0.6092, "step": 154490 }, { "epoch": 1.71, "learning_rate": 2.1488654147104853e-05, "loss": 0.6239, "step": 154495 }, { "epoch": 1.71, "learning_rate": 2.148773141996634e-05, "loss": 0.6402, "step": 154500 }, { "epoch": 1.71, "learning_rate": 2.148680869282783e-05, "loss": 0.6027, "step": 154505 }, { "epoch": 1.71, "learning_rate": 2.1485885965689313e-05, "loss": 0.6304, "step": 154510 }, { "epoch": 1.71, "learning_rate": 2.1484963238550804e-05, "loss": 0.6294, "step": 154515 }, { "epoch": 1.71, "learning_rate": 2.148404051141229e-05, "loss": 0.628, "step": 154520 }, { "epoch": 1.71, "learning_rate": 2.148311778427378e-05, "loss": 0.6301, "step": 154525 }, { "epoch": 1.71, "learning_rate": 2.1482195057135264e-05, "loss": 0.6518, "step": 154530 }, { "epoch": 1.71, "learning_rate": 2.1481272329996752e-05, "loss": 0.5585, "step": 154535 }, { "epoch": 1.71, "learning_rate": 2.148034960285824e-05, "loss": 0.6006, "step": 154540 }, { "epoch": 1.71, "learning_rate": 2.1479426875719728e-05, "loss": 0.6057, "step": 154545 }, { "epoch": 1.71, "learning_rate": 2.1478504148581216e-05, "loss": 0.624, "step": 154550 }, { "epoch": 1.71, "learning_rate": 2.1477581421442704e-05, "loss": 0.611, "step": 154555 }, { "epoch": 1.71, "learning_rate": 2.147665869430419e-05, "loss": 0.5837, "step": 154560 }, { "epoch": 1.71, "learning_rate": 2.1475735967165676e-05, "loss": 0.7067, "step": 154565 }, { "epoch": 1.71, "learning_rate": 2.1474813240027167e-05, "loss": 0.6095, "step": 154570 }, { "epoch": 1.71, "learning_rate": 2.1473890512888652e-05, "loss": 0.6386, "step": 154575 }, { "epoch": 1.71, "learning_rate": 2.1472967785750143e-05, "loss": 0.5554, "step": 154580 }, { "epoch": 1.71, "learning_rate": 2.1472045058611627e-05, "loss": 0.6868, "step": 154585 }, { "epoch": 1.71, "learning_rate": 2.147112233147312e-05, "loss": 0.6065, "step": 154590 }, { "epoch": 1.71, "learning_rate": 2.1470199604334603e-05, "loss": 0.6026, "step": 154595 }, { "epoch": 1.71, "learning_rate": 2.1469276877196094e-05, "loss": 0.6823, "step": 154600 }, { "epoch": 1.71, "learning_rate": 2.146835415005758e-05, "loss": 0.5749, "step": 154605 }, { "epoch": 1.71, "learning_rate": 2.1467431422919067e-05, "loss": 0.6027, "step": 154610 }, { "epoch": 1.71, "learning_rate": 2.1466508695780555e-05, "loss": 0.6579, "step": 154615 }, { "epoch": 1.71, "learning_rate": 2.1465585968642042e-05, "loss": 0.5814, "step": 154620 }, { "epoch": 1.71, "learning_rate": 2.146466324150353e-05, "loss": 0.6362, "step": 154625 }, { "epoch": 1.71, "learning_rate": 2.1463740514365015e-05, "loss": 0.6174, "step": 154630 }, { "epoch": 1.71, "learning_rate": 2.1462817787226506e-05, "loss": 0.641, "step": 154635 }, { "epoch": 1.71, "learning_rate": 2.146189506008799e-05, "loss": 0.6489, "step": 154640 }, { "epoch": 1.71, "learning_rate": 2.146097233294948e-05, "loss": 0.6788, "step": 154645 }, { "epoch": 1.71, "learning_rate": 2.1460049605810966e-05, "loss": 0.5901, "step": 154650 }, { "epoch": 1.71, "learning_rate": 2.1459126878672457e-05, "loss": 0.652, "step": 154655 }, { "epoch": 1.71, "learning_rate": 2.1458204151533942e-05, "loss": 0.6167, "step": 154660 }, { "epoch": 1.71, "learning_rate": 2.145728142439543e-05, "loss": 0.5483, "step": 154665 }, { "epoch": 1.71, "learning_rate": 2.1456358697256917e-05, "loss": 0.6431, "step": 154670 }, { "epoch": 1.71, "learning_rate": 2.1455435970118405e-05, "loss": 0.64, "step": 154675 }, { "epoch": 1.71, "learning_rate": 2.1454513242979893e-05, "loss": 0.6102, "step": 154680 }, { "epoch": 1.71, "learning_rate": 2.145359051584138e-05, "loss": 0.6419, "step": 154685 }, { "epoch": 1.71, "learning_rate": 2.145266778870287e-05, "loss": 0.6431, "step": 154690 }, { "epoch": 1.71, "learning_rate": 2.1451745061564353e-05, "loss": 0.6166, "step": 154695 }, { "epoch": 1.71, "learning_rate": 2.1450822334425845e-05, "loss": 0.6173, "step": 154700 }, { "epoch": 1.71, "learning_rate": 2.144989960728733e-05, "loss": 0.6223, "step": 154705 }, { "epoch": 1.71, "learning_rate": 2.144897688014882e-05, "loss": 0.5894, "step": 154710 }, { "epoch": 1.71, "learning_rate": 2.1448054153010305e-05, "loss": 0.5781, "step": 154715 }, { "epoch": 1.71, "learning_rate": 2.1447131425871793e-05, "loss": 0.6105, "step": 154720 }, { "epoch": 1.71, "learning_rate": 2.144620869873328e-05, "loss": 0.6107, "step": 154725 }, { "epoch": 1.71, "learning_rate": 2.1445285971594768e-05, "loss": 0.6739, "step": 154730 }, { "epoch": 1.71, "learning_rate": 2.1444363244456256e-05, "loss": 0.6406, "step": 154735 }, { "epoch": 1.71, "learning_rate": 2.1443440517317744e-05, "loss": 0.6583, "step": 154740 }, { "epoch": 1.71, "learning_rate": 2.1442517790179232e-05, "loss": 0.672, "step": 154745 }, { "epoch": 1.71, "learning_rate": 2.144159506304072e-05, "loss": 0.6376, "step": 154750 }, { "epoch": 1.71, "learning_rate": 2.1440672335902207e-05, "loss": 0.6724, "step": 154755 }, { "epoch": 1.71, "learning_rate": 2.1439749608763695e-05, "loss": 0.6288, "step": 154760 }, { "epoch": 1.71, "learning_rate": 2.1438826881625183e-05, "loss": 0.6118, "step": 154765 }, { "epoch": 1.71, "learning_rate": 2.1437904154486668e-05, "loss": 0.6573, "step": 154770 }, { "epoch": 1.71, "learning_rate": 2.143698142734816e-05, "loss": 0.642, "step": 154775 }, { "epoch": 1.71, "learning_rate": 2.1436058700209643e-05, "loss": 0.6047, "step": 154780 }, { "epoch": 1.71, "learning_rate": 2.143513597307113e-05, "loss": 0.6261, "step": 154785 }, { "epoch": 1.71, "learning_rate": 2.143421324593262e-05, "loss": 0.6171, "step": 154790 }, { "epoch": 1.71, "learning_rate": 2.1433290518794107e-05, "loss": 0.6527, "step": 154795 }, { "epoch": 1.71, "learning_rate": 2.1432367791655595e-05, "loss": 0.6376, "step": 154800 }, { "epoch": 1.71, "learning_rate": 2.1431445064517083e-05, "loss": 0.6344, "step": 154805 }, { "epoch": 1.71, "learning_rate": 2.143052233737857e-05, "loss": 0.6102, "step": 154810 }, { "epoch": 1.71, "learning_rate": 2.1429599610240058e-05, "loss": 0.581, "step": 154815 }, { "epoch": 1.71, "learning_rate": 2.1428676883101546e-05, "loss": 0.6582, "step": 154820 }, { "epoch": 1.71, "learning_rate": 2.1427754155963034e-05, "loss": 0.6668, "step": 154825 }, { "epoch": 1.71, "learning_rate": 2.1426831428824522e-05, "loss": 0.5971, "step": 154830 }, { "epoch": 1.71, "learning_rate": 2.142590870168601e-05, "loss": 0.6284, "step": 154835 }, { "epoch": 1.71, "learning_rate": 2.1424985974547494e-05, "loss": 0.6129, "step": 154840 }, { "epoch": 1.71, "learning_rate": 2.1424063247408982e-05, "loss": 0.6292, "step": 154845 }, { "epoch": 1.71, "learning_rate": 2.142314052027047e-05, "loss": 0.6131, "step": 154850 }, { "epoch": 1.71, "learning_rate": 2.1422217793131958e-05, "loss": 0.66, "step": 154855 }, { "epoch": 1.71, "learning_rate": 2.1421295065993446e-05, "loss": 0.6779, "step": 154860 }, { "epoch": 1.71, "learning_rate": 2.1420372338854933e-05, "loss": 0.6257, "step": 154865 }, { "epoch": 1.71, "learning_rate": 2.141944961171642e-05, "loss": 0.6458, "step": 154870 }, { "epoch": 1.71, "learning_rate": 2.141852688457791e-05, "loss": 0.6011, "step": 154875 }, { "epoch": 1.71, "learning_rate": 2.1417604157439397e-05, "loss": 0.7068, "step": 154880 }, { "epoch": 1.71, "learning_rate": 2.1416681430300885e-05, "loss": 0.636, "step": 154885 }, { "epoch": 1.72, "learning_rate": 2.1415758703162373e-05, "loss": 0.6873, "step": 154890 }, { "epoch": 1.72, "learning_rate": 2.1414835976023857e-05, "loss": 0.5971, "step": 154895 }, { "epoch": 1.72, "learning_rate": 2.141391324888535e-05, "loss": 0.6445, "step": 154900 }, { "epoch": 1.72, "learning_rate": 2.1412990521746833e-05, "loss": 0.6482, "step": 154905 }, { "epoch": 1.72, "learning_rate": 2.1412067794608324e-05, "loss": 0.6232, "step": 154910 }, { "epoch": 1.72, "learning_rate": 2.141114506746981e-05, "loss": 0.5942, "step": 154915 }, { "epoch": 1.72, "learning_rate": 2.1410222340331296e-05, "loss": 0.6068, "step": 154920 }, { "epoch": 1.72, "learning_rate": 2.1409299613192784e-05, "loss": 0.6034, "step": 154925 }, { "epoch": 1.72, "learning_rate": 2.1408376886054272e-05, "loss": 0.6613, "step": 154930 }, { "epoch": 1.72, "learning_rate": 2.140745415891576e-05, "loss": 0.5844, "step": 154935 }, { "epoch": 1.72, "learning_rate": 2.1406531431777248e-05, "loss": 0.6082, "step": 154940 }, { "epoch": 1.72, "learning_rate": 2.1405608704638736e-05, "loss": 0.6092, "step": 154945 }, { "epoch": 1.72, "learning_rate": 2.1404685977500223e-05, "loss": 0.6176, "step": 154950 }, { "epoch": 1.72, "learning_rate": 2.140376325036171e-05, "loss": 0.6561, "step": 154955 }, { "epoch": 1.72, "learning_rate": 2.1402840523223196e-05, "loss": 0.6081, "step": 154960 }, { "epoch": 1.72, "learning_rate": 2.1401917796084687e-05, "loss": 0.6167, "step": 154965 }, { "epoch": 1.72, "learning_rate": 2.140099506894617e-05, "loss": 0.6511, "step": 154970 }, { "epoch": 1.72, "learning_rate": 2.1400072341807663e-05, "loss": 0.661, "step": 154975 }, { "epoch": 1.72, "learning_rate": 2.1399149614669147e-05, "loss": 0.6406, "step": 154980 }, { "epoch": 1.72, "learning_rate": 2.139822688753064e-05, "loss": 0.6239, "step": 154985 }, { "epoch": 1.72, "learning_rate": 2.1397304160392123e-05, "loss": 0.6337, "step": 154990 }, { "epoch": 1.72, "learning_rate": 2.139638143325361e-05, "loss": 0.67, "step": 154995 }, { "epoch": 1.72, "learning_rate": 2.13954587061151e-05, "loss": 0.5989, "step": 155000 }, { "epoch": 1.72, "eval_loss": 0.6073497533798218, "eval_runtime": 69.1596, "eval_samples_per_second": 28.919, "eval_steps_per_second": 14.459, "step": 155000 }, { "epoch": 1.72, "learning_rate": 2.1394535978976586e-05, "loss": 0.5538, "step": 155005 }, { "epoch": 1.72, "learning_rate": 2.1393613251838074e-05, "loss": 0.6393, "step": 155010 }, { "epoch": 1.72, "learning_rate": 2.139269052469956e-05, "loss": 0.626, "step": 155015 }, { "epoch": 1.72, "learning_rate": 2.139176779756105e-05, "loss": 0.7236, "step": 155020 }, { "epoch": 1.72, "learning_rate": 2.1390845070422534e-05, "loss": 0.6336, "step": 155025 }, { "epoch": 1.72, "learning_rate": 2.1389922343284026e-05, "loss": 0.6258, "step": 155030 }, { "epoch": 1.72, "learning_rate": 2.138899961614551e-05, "loss": 0.6278, "step": 155035 }, { "epoch": 1.72, "learning_rate": 2.1388076889007e-05, "loss": 0.6588, "step": 155040 }, { "epoch": 1.72, "learning_rate": 2.1387154161868486e-05, "loss": 0.5826, "step": 155045 }, { "epoch": 1.72, "learning_rate": 2.1386231434729974e-05, "loss": 0.6044, "step": 155050 }, { "epoch": 1.72, "learning_rate": 2.138530870759146e-05, "loss": 0.6241, "step": 155055 }, { "epoch": 1.72, "learning_rate": 2.138438598045295e-05, "loss": 0.6398, "step": 155060 }, { "epoch": 1.72, "learning_rate": 2.1383463253314437e-05, "loss": 0.6692, "step": 155065 }, { "epoch": 1.72, "learning_rate": 2.138254052617592e-05, "loss": 0.5905, "step": 155070 }, { "epoch": 1.72, "learning_rate": 2.1381617799037413e-05, "loss": 0.6262, "step": 155075 }, { "epoch": 1.72, "learning_rate": 2.1380695071898897e-05, "loss": 0.61, "step": 155080 }, { "epoch": 1.72, "learning_rate": 2.137977234476039e-05, "loss": 0.6275, "step": 155085 }, { "epoch": 1.72, "learning_rate": 2.1378849617621873e-05, "loss": 0.6531, "step": 155090 }, { "epoch": 1.72, "learning_rate": 2.1377926890483364e-05, "loss": 0.641, "step": 155095 }, { "epoch": 1.72, "learning_rate": 2.137700416334485e-05, "loss": 0.6215, "step": 155100 }, { "epoch": 1.72, "learning_rate": 2.137608143620634e-05, "loss": 0.6293, "step": 155105 }, { "epoch": 1.72, "learning_rate": 2.1375158709067824e-05, "loss": 0.5998, "step": 155110 }, { "epoch": 1.72, "learning_rate": 2.1374235981929312e-05, "loss": 0.5927, "step": 155115 }, { "epoch": 1.72, "learning_rate": 2.13733132547908e-05, "loss": 0.6421, "step": 155120 }, { "epoch": 1.72, "learning_rate": 2.1372390527652288e-05, "loss": 0.6713, "step": 155125 }, { "epoch": 1.72, "learning_rate": 2.1371467800513776e-05, "loss": 0.5903, "step": 155130 }, { "epoch": 1.72, "learning_rate": 2.1370545073375264e-05, "loss": 0.594, "step": 155135 }, { "epoch": 1.72, "learning_rate": 2.136962234623675e-05, "loss": 0.595, "step": 155140 }, { "epoch": 1.72, "learning_rate": 2.1368699619098236e-05, "loss": 0.6057, "step": 155145 }, { "epoch": 1.72, "learning_rate": 2.1367776891959727e-05, "loss": 0.69, "step": 155150 }, { "epoch": 1.72, "learning_rate": 2.136685416482121e-05, "loss": 0.6198, "step": 155155 }, { "epoch": 1.72, "learning_rate": 2.1365931437682703e-05, "loss": 0.6469, "step": 155160 }, { "epoch": 1.72, "learning_rate": 2.1365008710544187e-05, "loss": 0.6202, "step": 155165 }, { "epoch": 1.72, "learning_rate": 2.1364085983405675e-05, "loss": 0.6217, "step": 155170 }, { "epoch": 1.72, "learning_rate": 2.1363163256267163e-05, "loss": 0.7037, "step": 155175 }, { "epoch": 1.72, "learning_rate": 2.136224052912865e-05, "loss": 0.6147, "step": 155180 }, { "epoch": 1.72, "learning_rate": 2.136131780199014e-05, "loss": 0.643, "step": 155185 }, { "epoch": 1.72, "learning_rate": 2.1360395074851627e-05, "loss": 0.6359, "step": 155190 }, { "epoch": 1.72, "learning_rate": 2.1359472347713114e-05, "loss": 0.634, "step": 155195 }, { "epoch": 1.72, "learning_rate": 2.1358549620574602e-05, "loss": 0.571, "step": 155200 }, { "epoch": 1.72, "learning_rate": 2.135762689343609e-05, "loss": 0.5991, "step": 155205 }, { "epoch": 1.72, "learning_rate": 2.1356704166297578e-05, "loss": 0.6148, "step": 155210 }, { "epoch": 1.72, "learning_rate": 2.1355781439159066e-05, "loss": 0.7061, "step": 155215 }, { "epoch": 1.72, "learning_rate": 2.135485871202055e-05, "loss": 0.6119, "step": 155220 }, { "epoch": 1.72, "learning_rate": 2.1353935984882038e-05, "loss": 0.6865, "step": 155225 }, { "epoch": 1.72, "learning_rate": 2.1353013257743526e-05, "loss": 0.6522, "step": 155230 }, { "epoch": 1.72, "learning_rate": 2.1352090530605014e-05, "loss": 0.677, "step": 155235 }, { "epoch": 1.72, "learning_rate": 2.1351167803466502e-05, "loss": 0.7007, "step": 155240 }, { "epoch": 1.72, "learning_rate": 2.135024507632799e-05, "loss": 0.6022, "step": 155245 }, { "epoch": 1.72, "learning_rate": 2.1349322349189477e-05, "loss": 0.657, "step": 155250 }, { "epoch": 1.72, "learning_rate": 2.1348399622050965e-05, "loss": 0.6206, "step": 155255 }, { "epoch": 1.72, "learning_rate": 2.1347476894912453e-05, "loss": 0.59, "step": 155260 }, { "epoch": 1.72, "learning_rate": 2.134655416777394e-05, "loss": 0.6047, "step": 155265 }, { "epoch": 1.72, "learning_rate": 2.134563144063543e-05, "loss": 0.5933, "step": 155270 }, { "epoch": 1.72, "learning_rate": 2.1344708713496917e-05, "loss": 0.6554, "step": 155275 }, { "epoch": 1.72, "learning_rate": 2.13437859863584e-05, "loss": 0.6296, "step": 155280 }, { "epoch": 1.72, "learning_rate": 2.1342863259219892e-05, "loss": 0.6049, "step": 155285 }, { "epoch": 1.72, "learning_rate": 2.1341940532081377e-05, "loss": 0.6259, "step": 155290 }, { "epoch": 1.72, "learning_rate": 2.1341017804942865e-05, "loss": 0.7124, "step": 155295 }, { "epoch": 1.72, "learning_rate": 2.1340095077804353e-05, "loss": 0.6531, "step": 155300 }, { "epoch": 1.72, "learning_rate": 2.133917235066584e-05, "loss": 0.6544, "step": 155305 }, { "epoch": 1.72, "learning_rate": 2.1338249623527328e-05, "loss": 0.6237, "step": 155310 }, { "epoch": 1.72, "learning_rate": 2.1337326896388816e-05, "loss": 0.637, "step": 155315 }, { "epoch": 1.72, "learning_rate": 2.1336404169250304e-05, "loss": 0.5945, "step": 155320 }, { "epoch": 1.72, "learning_rate": 2.1335481442111792e-05, "loss": 0.6454, "step": 155325 }, { "epoch": 1.72, "learning_rate": 2.133455871497328e-05, "loss": 0.5845, "step": 155330 }, { "epoch": 1.72, "learning_rate": 2.1333635987834767e-05, "loss": 0.578, "step": 155335 }, { "epoch": 1.72, "learning_rate": 2.1332713260696255e-05, "loss": 0.6164, "step": 155340 }, { "epoch": 1.72, "learning_rate": 2.133179053355774e-05, "loss": 0.6284, "step": 155345 }, { "epoch": 1.72, "learning_rate": 2.133086780641923e-05, "loss": 0.6395, "step": 155350 }, { "epoch": 1.72, "learning_rate": 2.1329945079280715e-05, "loss": 0.5533, "step": 155355 }, { "epoch": 1.72, "learning_rate": 2.1329022352142207e-05, "loss": 0.6036, "step": 155360 }, { "epoch": 1.72, "learning_rate": 2.132809962500369e-05, "loss": 0.6595, "step": 155365 }, { "epoch": 1.72, "learning_rate": 2.132717689786518e-05, "loss": 0.5566, "step": 155370 }, { "epoch": 1.72, "learning_rate": 2.1326254170726667e-05, "loss": 0.6453, "step": 155375 }, { "epoch": 1.72, "learning_rate": 2.1325331443588155e-05, "loss": 0.6079, "step": 155380 }, { "epoch": 1.72, "learning_rate": 2.1324408716449643e-05, "loss": 0.6334, "step": 155385 }, { "epoch": 1.72, "learning_rate": 2.132348598931113e-05, "loss": 0.661, "step": 155390 }, { "epoch": 1.72, "learning_rate": 2.1322563262172618e-05, "loss": 0.6224, "step": 155395 }, { "epoch": 1.72, "learning_rate": 2.1321640535034103e-05, "loss": 0.6125, "step": 155400 }, { "epoch": 1.72, "learning_rate": 2.1320717807895594e-05, "loss": 0.5996, "step": 155405 }, { "epoch": 1.72, "learning_rate": 2.131979508075708e-05, "loss": 0.6258, "step": 155410 }, { "epoch": 1.72, "learning_rate": 2.131887235361857e-05, "loss": 0.6218, "step": 155415 }, { "epoch": 1.72, "learning_rate": 2.1317949626480054e-05, "loss": 0.6392, "step": 155420 }, { "epoch": 1.72, "learning_rate": 2.1317026899341545e-05, "loss": 0.6649, "step": 155425 }, { "epoch": 1.72, "learning_rate": 2.131610417220303e-05, "loss": 0.6267, "step": 155430 }, { "epoch": 1.72, "learning_rate": 2.1315181445064518e-05, "loss": 0.6598, "step": 155435 }, { "epoch": 1.72, "learning_rate": 2.1314258717926005e-05, "loss": 0.6134, "step": 155440 }, { "epoch": 1.72, "learning_rate": 2.1313335990787493e-05, "loss": 0.5588, "step": 155445 }, { "epoch": 1.72, "learning_rate": 2.131241326364898e-05, "loss": 0.6291, "step": 155450 }, { "epoch": 1.72, "learning_rate": 2.1311490536510466e-05, "loss": 0.6477, "step": 155455 }, { "epoch": 1.72, "learning_rate": 2.1310567809371957e-05, "loss": 0.6459, "step": 155460 }, { "epoch": 1.72, "learning_rate": 2.130964508223344e-05, "loss": 0.5912, "step": 155465 }, { "epoch": 1.72, "learning_rate": 2.1308722355094933e-05, "loss": 0.6153, "step": 155470 }, { "epoch": 1.72, "learning_rate": 2.1307799627956417e-05, "loss": 0.5914, "step": 155475 }, { "epoch": 1.72, "learning_rate": 2.1306876900817908e-05, "loss": 0.6805, "step": 155480 }, { "epoch": 1.72, "learning_rate": 2.1305954173679393e-05, "loss": 0.6385, "step": 155485 }, { "epoch": 1.72, "learning_rate": 2.1305031446540884e-05, "loss": 0.6324, "step": 155490 }, { "epoch": 1.72, "learning_rate": 2.130410871940237e-05, "loss": 0.6105, "step": 155495 }, { "epoch": 1.72, "learning_rate": 2.1303185992263856e-05, "loss": 0.6444, "step": 155500 }, { "epoch": 1.72, "learning_rate": 2.1302263265125344e-05, "loss": 0.6241, "step": 155505 }, { "epoch": 1.72, "learning_rate": 2.1301340537986832e-05, "loss": 0.6056, "step": 155510 }, { "epoch": 1.72, "learning_rate": 2.130041781084832e-05, "loss": 0.6493, "step": 155515 }, { "epoch": 1.72, "learning_rate": 2.1299495083709808e-05, "loss": 0.6261, "step": 155520 }, { "epoch": 1.72, "learning_rate": 2.1298572356571296e-05, "loss": 0.6622, "step": 155525 }, { "epoch": 1.72, "learning_rate": 2.129764962943278e-05, "loss": 0.6269, "step": 155530 }, { "epoch": 1.72, "learning_rate": 2.129672690229427e-05, "loss": 0.6246, "step": 155535 }, { "epoch": 1.72, "learning_rate": 2.1295804175155756e-05, "loss": 0.6399, "step": 155540 }, { "epoch": 1.72, "learning_rate": 2.1294881448017247e-05, "loss": 0.5995, "step": 155545 }, { "epoch": 1.72, "learning_rate": 2.129395872087873e-05, "loss": 0.6432, "step": 155550 }, { "epoch": 1.72, "learning_rate": 2.129303599374022e-05, "loss": 0.5671, "step": 155555 }, { "epoch": 1.72, "learning_rate": 2.1292113266601707e-05, "loss": 0.6184, "step": 155560 }, { "epoch": 1.72, "learning_rate": 2.1291190539463195e-05, "loss": 0.6514, "step": 155565 }, { "epoch": 1.72, "learning_rate": 2.1290267812324683e-05, "loss": 0.6628, "step": 155570 }, { "epoch": 1.72, "learning_rate": 2.128934508518617e-05, "loss": 0.6518, "step": 155575 }, { "epoch": 1.72, "learning_rate": 2.128842235804766e-05, "loss": 0.6502, "step": 155580 }, { "epoch": 1.72, "learning_rate": 2.1287499630909146e-05, "loss": 0.5923, "step": 155585 }, { "epoch": 1.72, "learning_rate": 2.1286576903770634e-05, "loss": 0.6211, "step": 155590 }, { "epoch": 1.72, "learning_rate": 2.1285654176632122e-05, "loss": 0.6749, "step": 155595 }, { "epoch": 1.72, "learning_rate": 2.128473144949361e-05, "loss": 0.5879, "step": 155600 }, { "epoch": 1.72, "learning_rate": 2.1283808722355094e-05, "loss": 0.6169, "step": 155605 }, { "epoch": 1.72, "learning_rate": 2.1282885995216582e-05, "loss": 0.6363, "step": 155610 }, { "epoch": 1.72, "learning_rate": 2.128196326807807e-05, "loss": 0.6466, "step": 155615 }, { "epoch": 1.72, "learning_rate": 2.1281040540939558e-05, "loss": 0.6092, "step": 155620 }, { "epoch": 1.72, "learning_rate": 2.1280117813801046e-05, "loss": 0.6475, "step": 155625 }, { "epoch": 1.72, "learning_rate": 2.1279195086662534e-05, "loss": 0.6414, "step": 155630 }, { "epoch": 1.72, "learning_rate": 2.127827235952402e-05, "loss": 0.6067, "step": 155635 }, { "epoch": 1.72, "learning_rate": 2.127734963238551e-05, "loss": 0.643, "step": 155640 }, { "epoch": 1.72, "learning_rate": 2.1276426905246997e-05, "loss": 0.6335, "step": 155645 }, { "epoch": 1.72, "learning_rate": 2.1275504178108485e-05, "loss": 0.6638, "step": 155650 }, { "epoch": 1.72, "learning_rate": 2.1274581450969973e-05, "loss": 0.5738, "step": 155655 }, { "epoch": 1.72, "learning_rate": 2.127365872383146e-05, "loss": 0.6278, "step": 155660 }, { "epoch": 1.72, "learning_rate": 2.1272735996692945e-05, "loss": 0.6789, "step": 155665 }, { "epoch": 1.72, "learning_rate": 2.1271813269554436e-05, "loss": 0.593, "step": 155670 }, { "epoch": 1.72, "learning_rate": 2.127089054241592e-05, "loss": 0.619, "step": 155675 }, { "epoch": 1.72, "learning_rate": 2.126996781527741e-05, "loss": 0.6534, "step": 155680 }, { "epoch": 1.72, "learning_rate": 2.1269045088138897e-05, "loss": 0.6851, "step": 155685 }, { "epoch": 1.72, "learning_rate": 2.1268122361000384e-05, "loss": 0.665, "step": 155690 }, { "epoch": 1.72, "learning_rate": 2.1267199633861872e-05, "loss": 0.6111, "step": 155695 }, { "epoch": 1.72, "learning_rate": 2.126627690672336e-05, "loss": 0.6704, "step": 155700 }, { "epoch": 1.72, "learning_rate": 2.1265354179584848e-05, "loss": 0.5937, "step": 155705 }, { "epoch": 1.72, "learning_rate": 2.1264431452446336e-05, "loss": 0.6228, "step": 155710 }, { "epoch": 1.72, "learning_rate": 2.1263508725307824e-05, "loss": 0.6155, "step": 155715 }, { "epoch": 1.72, "learning_rate": 2.126258599816931e-05, "loss": 0.6544, "step": 155720 }, { "epoch": 1.72, "learning_rate": 2.12616632710308e-05, "loss": 0.5583, "step": 155725 }, { "epoch": 1.72, "learning_rate": 2.1260740543892284e-05, "loss": 0.6224, "step": 155730 }, { "epoch": 1.72, "learning_rate": 2.1259817816753775e-05, "loss": 0.6027, "step": 155735 }, { "epoch": 1.72, "learning_rate": 2.125889508961526e-05, "loss": 0.6022, "step": 155740 }, { "epoch": 1.72, "learning_rate": 2.125797236247675e-05, "loss": 0.6463, "step": 155745 }, { "epoch": 1.72, "learning_rate": 2.1257049635338235e-05, "loss": 0.6308, "step": 155750 }, { "epoch": 1.72, "learning_rate": 2.1256126908199723e-05, "loss": 0.6609, "step": 155755 }, { "epoch": 1.72, "learning_rate": 2.125520418106121e-05, "loss": 0.6545, "step": 155760 }, { "epoch": 1.72, "learning_rate": 2.12542814539227e-05, "loss": 0.6782, "step": 155765 }, { "epoch": 1.72, "learning_rate": 2.1253358726784187e-05, "loss": 0.5945, "step": 155770 }, { "epoch": 1.72, "learning_rate": 2.1252435999645674e-05, "loss": 0.6407, "step": 155775 }, { "epoch": 1.72, "learning_rate": 2.1251513272507162e-05, "loss": 0.6753, "step": 155780 }, { "epoch": 1.72, "learning_rate": 2.1250590545368647e-05, "loss": 0.6668, "step": 155785 }, { "epoch": 1.73, "learning_rate": 2.1249667818230138e-05, "loss": 0.6269, "step": 155790 }, { "epoch": 1.73, "learning_rate": 2.1248745091091622e-05, "loss": 0.6173, "step": 155795 }, { "epoch": 1.73, "learning_rate": 2.1247822363953114e-05, "loss": 0.6206, "step": 155800 }, { "epoch": 1.73, "learning_rate": 2.1246899636814598e-05, "loss": 0.6382, "step": 155805 }, { "epoch": 1.73, "learning_rate": 2.124597690967609e-05, "loss": 0.6702, "step": 155810 }, { "epoch": 1.73, "learning_rate": 2.1245054182537574e-05, "loss": 0.6438, "step": 155815 }, { "epoch": 1.73, "learning_rate": 2.124413145539906e-05, "loss": 0.6579, "step": 155820 }, { "epoch": 1.73, "learning_rate": 2.124320872826055e-05, "loss": 0.6662, "step": 155825 }, { "epoch": 1.73, "learning_rate": 2.1242286001122037e-05, "loss": 0.6292, "step": 155830 }, { "epoch": 1.73, "learning_rate": 2.1241363273983525e-05, "loss": 0.6456, "step": 155835 }, { "epoch": 1.73, "learning_rate": 2.124044054684501e-05, "loss": 0.6141, "step": 155840 }, { "epoch": 1.73, "learning_rate": 2.12395178197065e-05, "loss": 0.655, "step": 155845 }, { "epoch": 1.73, "learning_rate": 2.1238595092567985e-05, "loss": 0.6307, "step": 155850 }, { "epoch": 1.73, "learning_rate": 2.1237672365429477e-05, "loss": 0.6087, "step": 155855 }, { "epoch": 1.73, "learning_rate": 2.123674963829096e-05, "loss": 0.5774, "step": 155860 }, { "epoch": 1.73, "learning_rate": 2.1235826911152452e-05, "loss": 0.5892, "step": 155865 }, { "epoch": 1.73, "learning_rate": 2.1234904184013937e-05, "loss": 0.6027, "step": 155870 }, { "epoch": 1.73, "learning_rate": 2.1233981456875428e-05, "loss": 0.6737, "step": 155875 }, { "epoch": 1.73, "learning_rate": 2.1233058729736912e-05, "loss": 0.641, "step": 155880 }, { "epoch": 1.73, "learning_rate": 2.12321360025984e-05, "loss": 0.6458, "step": 155885 }, { "epoch": 1.73, "learning_rate": 2.1231213275459888e-05, "loss": 0.6806, "step": 155890 }, { "epoch": 1.73, "learning_rate": 2.1230290548321376e-05, "loss": 0.6736, "step": 155895 }, { "epoch": 1.73, "learning_rate": 2.1229367821182864e-05, "loss": 0.6767, "step": 155900 }, { "epoch": 1.73, "learning_rate": 2.122844509404435e-05, "loss": 0.6003, "step": 155905 }, { "epoch": 1.73, "learning_rate": 2.122752236690584e-05, "loss": 0.6178, "step": 155910 }, { "epoch": 1.73, "learning_rate": 2.1226599639767324e-05, "loss": 0.6273, "step": 155915 }, { "epoch": 1.73, "learning_rate": 2.1225676912628815e-05, "loss": 0.6812, "step": 155920 }, { "epoch": 1.73, "learning_rate": 2.12247541854903e-05, "loss": 0.6169, "step": 155925 }, { "epoch": 1.73, "learning_rate": 2.122383145835179e-05, "loss": 0.5739, "step": 155930 }, { "epoch": 1.73, "learning_rate": 2.1222908731213275e-05, "loss": 0.6444, "step": 155935 }, { "epoch": 1.73, "learning_rate": 2.1221986004074763e-05, "loss": 0.6169, "step": 155940 }, { "epoch": 1.73, "learning_rate": 2.122106327693625e-05, "loss": 0.6183, "step": 155945 }, { "epoch": 1.73, "learning_rate": 2.122014054979774e-05, "loss": 0.589, "step": 155950 }, { "epoch": 1.73, "learning_rate": 2.1219217822659227e-05, "loss": 0.5608, "step": 155955 }, { "epoch": 1.73, "learning_rate": 2.1218295095520715e-05, "loss": 0.6455, "step": 155960 }, { "epoch": 1.73, "learning_rate": 2.1217372368382203e-05, "loss": 0.573, "step": 155965 }, { "epoch": 1.73, "learning_rate": 2.121644964124369e-05, "loss": 0.6311, "step": 155970 }, { "epoch": 1.73, "learning_rate": 2.1215526914105178e-05, "loss": 0.5994, "step": 155975 }, { "epoch": 1.73, "learning_rate": 2.1214604186966663e-05, "loss": 0.6563, "step": 155980 }, { "epoch": 1.73, "learning_rate": 2.1213681459828154e-05, "loss": 0.639, "step": 155985 }, { "epoch": 1.73, "learning_rate": 2.121275873268964e-05, "loss": 0.6695, "step": 155990 }, { "epoch": 1.73, "learning_rate": 2.1211836005551126e-05, "loss": 0.6962, "step": 155995 }, { "epoch": 1.73, "learning_rate": 2.1210913278412614e-05, "loss": 0.6184, "step": 156000 }, { "epoch": 1.73, "eval_loss": 0.6124412417411804, "eval_runtime": 69.1559, "eval_samples_per_second": 28.92, "eval_steps_per_second": 14.46, "step": 156000 }, { "epoch": 1.73, "learning_rate": 2.1209990551274102e-05, "loss": 0.6208, "step": 156005 }, { "epoch": 1.73, "learning_rate": 2.120906782413559e-05, "loss": 0.6634, "step": 156010 }, { "epoch": 1.73, "learning_rate": 2.1208145096997078e-05, "loss": 0.6053, "step": 156015 }, { "epoch": 1.73, "learning_rate": 2.1207222369858565e-05, "loss": 0.618, "step": 156020 }, { "epoch": 1.73, "learning_rate": 2.1206299642720053e-05, "loss": 0.6697, "step": 156025 }, { "epoch": 1.73, "learning_rate": 2.120537691558154e-05, "loss": 0.6094, "step": 156030 }, { "epoch": 1.73, "learning_rate": 2.120445418844303e-05, "loss": 0.6418, "step": 156035 }, { "epoch": 1.73, "learning_rate": 2.1203531461304517e-05, "loss": 0.5678, "step": 156040 }, { "epoch": 1.73, "learning_rate": 2.1202608734166005e-05, "loss": 0.6449, "step": 156045 }, { "epoch": 1.73, "learning_rate": 2.1201686007027493e-05, "loss": 0.5903, "step": 156050 }, { "epoch": 1.73, "learning_rate": 2.1200763279888977e-05, "loss": 0.6502, "step": 156055 }, { "epoch": 1.73, "learning_rate": 2.1199840552750465e-05, "loss": 0.597, "step": 156060 }, { "epoch": 1.73, "learning_rate": 2.1198917825611953e-05, "loss": 0.6099, "step": 156065 }, { "epoch": 1.73, "learning_rate": 2.119799509847344e-05, "loss": 0.5983, "step": 156070 }, { "epoch": 1.73, "learning_rate": 2.119707237133493e-05, "loss": 0.6237, "step": 156075 }, { "epoch": 1.73, "learning_rate": 2.1196149644196416e-05, "loss": 0.6403, "step": 156080 }, { "epoch": 1.73, "learning_rate": 2.1195226917057904e-05, "loss": 0.5762, "step": 156085 }, { "epoch": 1.73, "learning_rate": 2.1194304189919392e-05, "loss": 0.6137, "step": 156090 }, { "epoch": 1.73, "learning_rate": 2.119338146278088e-05, "loss": 0.5807, "step": 156095 }, { "epoch": 1.73, "learning_rate": 2.1192458735642368e-05, "loss": 0.7145, "step": 156100 }, { "epoch": 1.73, "learning_rate": 2.1191536008503855e-05, "loss": 0.6448, "step": 156105 }, { "epoch": 1.73, "learning_rate": 2.1190613281365343e-05, "loss": 0.6429, "step": 156110 }, { "epoch": 1.73, "learning_rate": 2.1189690554226828e-05, "loss": 0.6726, "step": 156115 }, { "epoch": 1.73, "learning_rate": 2.118876782708832e-05, "loss": 0.6652, "step": 156120 }, { "epoch": 1.73, "learning_rate": 2.1187845099949803e-05, "loss": 0.6357, "step": 156125 }, { "epoch": 1.73, "learning_rate": 2.118692237281129e-05, "loss": 0.6054, "step": 156130 }, { "epoch": 1.73, "learning_rate": 2.118599964567278e-05, "loss": 0.628, "step": 156135 }, { "epoch": 1.73, "learning_rate": 2.1185076918534267e-05, "loss": 0.595, "step": 156140 }, { "epoch": 1.73, "learning_rate": 2.1184154191395755e-05, "loss": 0.6641, "step": 156145 }, { "epoch": 1.73, "learning_rate": 2.1183231464257243e-05, "loss": 0.5876, "step": 156150 }, { "epoch": 1.73, "learning_rate": 2.118230873711873e-05, "loss": 0.6462, "step": 156155 }, { "epoch": 1.73, "learning_rate": 2.118138600998022e-05, "loss": 0.6494, "step": 156160 }, { "epoch": 1.73, "learning_rate": 2.1180463282841706e-05, "loss": 0.6641, "step": 156165 }, { "epoch": 1.73, "learning_rate": 2.117954055570319e-05, "loss": 0.623, "step": 156170 }, { "epoch": 1.73, "learning_rate": 2.1178617828564682e-05, "loss": 0.6333, "step": 156175 }, { "epoch": 1.73, "learning_rate": 2.1177695101426166e-05, "loss": 0.564, "step": 156180 }, { "epoch": 1.73, "learning_rate": 2.1176772374287658e-05, "loss": 0.5899, "step": 156185 }, { "epoch": 1.73, "learning_rate": 2.1175849647149142e-05, "loss": 0.5938, "step": 156190 }, { "epoch": 1.73, "learning_rate": 2.1174926920010633e-05, "loss": 0.6911, "step": 156195 }, { "epoch": 1.73, "learning_rate": 2.1174004192872118e-05, "loss": 0.6502, "step": 156200 }, { "epoch": 1.73, "learning_rate": 2.1173081465733606e-05, "loss": 0.6311, "step": 156205 }, { "epoch": 1.73, "learning_rate": 2.1172158738595094e-05, "loss": 0.6072, "step": 156210 }, { "epoch": 1.73, "learning_rate": 2.117123601145658e-05, "loss": 0.6797, "step": 156215 }, { "epoch": 1.73, "learning_rate": 2.117031328431807e-05, "loss": 0.6503, "step": 156220 }, { "epoch": 1.73, "learning_rate": 2.1169390557179554e-05, "loss": 0.6269, "step": 156225 }, { "epoch": 1.73, "learning_rate": 2.1168467830041045e-05, "loss": 0.6169, "step": 156230 }, { "epoch": 1.73, "learning_rate": 2.116754510290253e-05, "loss": 0.6533, "step": 156235 }, { "epoch": 1.73, "learning_rate": 2.116662237576402e-05, "loss": 0.6537, "step": 156240 }, { "epoch": 1.73, "learning_rate": 2.1165699648625505e-05, "loss": 0.662, "step": 156245 }, { "epoch": 1.73, "learning_rate": 2.1164776921486996e-05, "loss": 0.6561, "step": 156250 }, { "epoch": 1.73, "learning_rate": 2.116385419434848e-05, "loss": 0.6239, "step": 156255 }, { "epoch": 1.73, "learning_rate": 2.1162931467209972e-05, "loss": 0.6748, "step": 156260 }, { "epoch": 1.73, "learning_rate": 2.1162008740071456e-05, "loss": 0.6081, "step": 156265 }, { "epoch": 1.73, "learning_rate": 2.1161086012932944e-05, "loss": 0.6264, "step": 156270 }, { "epoch": 1.73, "learning_rate": 2.1160163285794432e-05, "loss": 0.5883, "step": 156275 }, { "epoch": 1.73, "learning_rate": 2.115924055865592e-05, "loss": 0.6159, "step": 156280 }, { "epoch": 1.73, "learning_rate": 2.1158317831517408e-05, "loss": 0.6313, "step": 156285 }, { "epoch": 1.73, "learning_rate": 2.1157395104378892e-05, "loss": 0.6309, "step": 156290 }, { "epoch": 1.73, "learning_rate": 2.1156472377240384e-05, "loss": 0.6257, "step": 156295 }, { "epoch": 1.73, "learning_rate": 2.1155549650101868e-05, "loss": 0.626, "step": 156300 }, { "epoch": 1.73, "learning_rate": 2.115462692296336e-05, "loss": 0.6139, "step": 156305 }, { "epoch": 1.73, "learning_rate": 2.1153704195824844e-05, "loss": 0.61, "step": 156310 }, { "epoch": 1.73, "learning_rate": 2.1152781468686335e-05, "loss": 0.6136, "step": 156315 }, { "epoch": 1.73, "learning_rate": 2.115185874154782e-05, "loss": 0.642, "step": 156320 }, { "epoch": 1.73, "learning_rate": 2.1150936014409307e-05, "loss": 0.654, "step": 156325 }, { "epoch": 1.73, "learning_rate": 2.1150013287270795e-05, "loss": 0.5824, "step": 156330 }, { "epoch": 1.73, "learning_rate": 2.1149090560132283e-05, "loss": 0.5882, "step": 156335 }, { "epoch": 1.73, "learning_rate": 2.114816783299377e-05, "loss": 0.6291, "step": 156340 }, { "epoch": 1.73, "learning_rate": 2.114724510585526e-05, "loss": 0.6327, "step": 156345 }, { "epoch": 1.73, "learning_rate": 2.1146322378716747e-05, "loss": 0.6237, "step": 156350 }, { "epoch": 1.73, "learning_rate": 2.1145399651578234e-05, "loss": 0.6231, "step": 156355 }, { "epoch": 1.73, "learning_rate": 2.1144476924439722e-05, "loss": 0.6393, "step": 156360 }, { "epoch": 1.73, "learning_rate": 2.1143554197301207e-05, "loss": 0.6705, "step": 156365 }, { "epoch": 1.73, "learning_rate": 2.1142631470162698e-05, "loss": 0.6379, "step": 156370 }, { "epoch": 1.73, "learning_rate": 2.1141708743024182e-05, "loss": 0.6254, "step": 156375 }, { "epoch": 1.73, "learning_rate": 2.114078601588567e-05, "loss": 0.6254, "step": 156380 }, { "epoch": 1.73, "learning_rate": 2.1139863288747158e-05, "loss": 0.6288, "step": 156385 }, { "epoch": 1.73, "learning_rate": 2.1138940561608646e-05, "loss": 0.5887, "step": 156390 }, { "epoch": 1.73, "learning_rate": 2.1138017834470134e-05, "loss": 0.5839, "step": 156395 }, { "epoch": 1.73, "learning_rate": 2.113709510733162e-05, "loss": 0.6197, "step": 156400 }, { "epoch": 1.73, "learning_rate": 2.113617238019311e-05, "loss": 0.6246, "step": 156405 }, { "epoch": 1.73, "learning_rate": 2.1135249653054597e-05, "loss": 0.6183, "step": 156410 }, { "epoch": 1.73, "learning_rate": 2.1134326925916085e-05, "loss": 0.6005, "step": 156415 }, { "epoch": 1.73, "learning_rate": 2.1133404198777573e-05, "loss": 0.6391, "step": 156420 }, { "epoch": 1.73, "learning_rate": 2.113248147163906e-05, "loss": 0.6205, "step": 156425 }, { "epoch": 1.73, "learning_rate": 2.113155874450055e-05, "loss": 0.6259, "step": 156430 }, { "epoch": 1.73, "learning_rate": 2.1130636017362037e-05, "loss": 0.654, "step": 156435 }, { "epoch": 1.73, "learning_rate": 2.112971329022352e-05, "loss": 0.6178, "step": 156440 }, { "epoch": 1.73, "learning_rate": 2.112879056308501e-05, "loss": 0.6391, "step": 156445 }, { "epoch": 1.73, "learning_rate": 2.1127867835946497e-05, "loss": 0.6024, "step": 156450 }, { "epoch": 1.73, "learning_rate": 2.1126945108807985e-05, "loss": 0.6399, "step": 156455 }, { "epoch": 1.73, "learning_rate": 2.1126022381669472e-05, "loss": 0.5948, "step": 156460 }, { "epoch": 1.73, "learning_rate": 2.112509965453096e-05, "loss": 0.6122, "step": 156465 }, { "epoch": 1.73, "learning_rate": 2.1124176927392448e-05, "loss": 0.577, "step": 156470 }, { "epoch": 1.73, "learning_rate": 2.1123254200253936e-05, "loss": 0.6325, "step": 156475 }, { "epoch": 1.73, "learning_rate": 2.1122331473115424e-05, "loss": 0.5882, "step": 156480 }, { "epoch": 1.73, "learning_rate": 2.112140874597691e-05, "loss": 0.5685, "step": 156485 }, { "epoch": 1.73, "learning_rate": 2.11204860188384e-05, "loss": 0.6157, "step": 156490 }, { "epoch": 1.73, "learning_rate": 2.1119563291699887e-05, "loss": 0.6112, "step": 156495 }, { "epoch": 1.73, "learning_rate": 2.1118640564561372e-05, "loss": 0.5973, "step": 156500 }, { "epoch": 1.73, "learning_rate": 2.1117717837422863e-05, "loss": 0.5952, "step": 156505 }, { "epoch": 1.73, "learning_rate": 2.1116795110284348e-05, "loss": 0.5862, "step": 156510 }, { "epoch": 1.73, "learning_rate": 2.1115872383145835e-05, "loss": 0.5928, "step": 156515 }, { "epoch": 1.73, "learning_rate": 2.1114949656007323e-05, "loss": 0.6281, "step": 156520 }, { "epoch": 1.73, "learning_rate": 2.111402692886881e-05, "loss": 0.6137, "step": 156525 }, { "epoch": 1.73, "learning_rate": 2.11131042017303e-05, "loss": 0.5892, "step": 156530 }, { "epoch": 1.73, "learning_rate": 2.1112181474591787e-05, "loss": 0.6346, "step": 156535 }, { "epoch": 1.73, "learning_rate": 2.1111258747453275e-05, "loss": 0.5774, "step": 156540 }, { "epoch": 1.73, "learning_rate": 2.1110336020314762e-05, "loss": 0.5802, "step": 156545 }, { "epoch": 1.73, "learning_rate": 2.110941329317625e-05, "loss": 0.5989, "step": 156550 }, { "epoch": 1.73, "learning_rate": 2.1108490566037735e-05, "loss": 0.5949, "step": 156555 }, { "epoch": 1.73, "learning_rate": 2.1107567838899226e-05, "loss": 0.6866, "step": 156560 }, { "epoch": 1.73, "learning_rate": 2.110664511176071e-05, "loss": 0.6205, "step": 156565 }, { "epoch": 1.73, "learning_rate": 2.1105722384622202e-05, "loss": 0.6047, "step": 156570 }, { "epoch": 1.73, "learning_rate": 2.1104799657483686e-05, "loss": 0.511, "step": 156575 }, { "epoch": 1.73, "learning_rate": 2.1103876930345177e-05, "loss": 0.627, "step": 156580 }, { "epoch": 1.73, "learning_rate": 2.1102954203206662e-05, "loss": 0.606, "step": 156585 }, { "epoch": 1.73, "learning_rate": 2.110203147606815e-05, "loss": 0.686, "step": 156590 }, { "epoch": 1.73, "learning_rate": 2.1101108748929638e-05, "loss": 0.6175, "step": 156595 }, { "epoch": 1.73, "learning_rate": 2.1100186021791125e-05, "loss": 0.6243, "step": 156600 }, { "epoch": 1.73, "learning_rate": 2.1099263294652613e-05, "loss": 0.6566, "step": 156605 }, { "epoch": 1.73, "learning_rate": 2.1098340567514098e-05, "loss": 0.5814, "step": 156610 }, { "epoch": 1.73, "learning_rate": 2.109741784037559e-05, "loss": 0.6513, "step": 156615 }, { "epoch": 1.73, "learning_rate": 2.1096495113237073e-05, "loss": 0.6271, "step": 156620 }, { "epoch": 1.73, "learning_rate": 2.1095572386098565e-05, "loss": 0.5976, "step": 156625 }, { "epoch": 1.73, "learning_rate": 2.109464965896005e-05, "loss": 0.6166, "step": 156630 }, { "epoch": 1.73, "learning_rate": 2.109372693182154e-05, "loss": 0.597, "step": 156635 }, { "epoch": 1.73, "learning_rate": 2.1092804204683025e-05, "loss": 0.6197, "step": 156640 }, { "epoch": 1.73, "learning_rate": 2.1091881477544516e-05, "loss": 0.6734, "step": 156645 }, { "epoch": 1.73, "learning_rate": 2.1090958750406e-05, "loss": 0.6184, "step": 156650 }, { "epoch": 1.73, "learning_rate": 2.109003602326749e-05, "loss": 0.6734, "step": 156655 }, { "epoch": 1.73, "learning_rate": 2.1089113296128976e-05, "loss": 0.6331, "step": 156660 }, { "epoch": 1.73, "learning_rate": 2.1088190568990464e-05, "loss": 0.5865, "step": 156665 }, { "epoch": 1.73, "learning_rate": 2.1087267841851952e-05, "loss": 0.6314, "step": 156670 }, { "epoch": 1.73, "learning_rate": 2.1086345114713436e-05, "loss": 0.6528, "step": 156675 }, { "epoch": 1.73, "learning_rate": 2.1085422387574928e-05, "loss": 0.63, "step": 156680 }, { "epoch": 1.73, "learning_rate": 2.1084499660436412e-05, "loss": 0.6337, "step": 156685 }, { "epoch": 1.73, "learning_rate": 2.1083576933297903e-05, "loss": 0.5822, "step": 156690 }, { "epoch": 1.74, "learning_rate": 2.1082654206159388e-05, "loss": 0.6345, "step": 156695 }, { "epoch": 1.74, "learning_rate": 2.108173147902088e-05, "loss": 0.6441, "step": 156700 }, { "epoch": 1.74, "learning_rate": 2.1080808751882363e-05, "loss": 0.6449, "step": 156705 }, { "epoch": 1.74, "learning_rate": 2.107988602474385e-05, "loss": 0.5976, "step": 156710 }, { "epoch": 1.74, "learning_rate": 2.107896329760534e-05, "loss": 0.5784, "step": 156715 }, { "epoch": 1.74, "learning_rate": 2.1078040570466827e-05, "loss": 0.5992, "step": 156720 }, { "epoch": 1.74, "learning_rate": 2.1077117843328315e-05, "loss": 0.5934, "step": 156725 }, { "epoch": 1.74, "learning_rate": 2.1076195116189803e-05, "loss": 0.6109, "step": 156730 }, { "epoch": 1.74, "learning_rate": 2.107527238905129e-05, "loss": 0.6261, "step": 156735 }, { "epoch": 1.74, "learning_rate": 2.1074349661912775e-05, "loss": 0.614, "step": 156740 }, { "epoch": 1.74, "learning_rate": 2.1073426934774266e-05, "loss": 0.6015, "step": 156745 }, { "epoch": 1.74, "learning_rate": 2.107250420763575e-05, "loss": 0.6538, "step": 156750 }, { "epoch": 1.74, "learning_rate": 2.1071581480497242e-05, "loss": 0.6513, "step": 156755 }, { "epoch": 1.74, "learning_rate": 2.1070658753358726e-05, "loss": 0.6706, "step": 156760 }, { "epoch": 1.74, "learning_rate": 2.1069736026220214e-05, "loss": 0.5901, "step": 156765 }, { "epoch": 1.74, "learning_rate": 2.1068813299081702e-05, "loss": 0.6019, "step": 156770 }, { "epoch": 1.74, "learning_rate": 2.106789057194319e-05, "loss": 0.6249, "step": 156775 }, { "epoch": 1.74, "learning_rate": 2.1066967844804678e-05, "loss": 0.6155, "step": 156780 }, { "epoch": 1.74, "learning_rate": 2.1066045117666166e-05, "loss": 0.6153, "step": 156785 }, { "epoch": 1.74, "learning_rate": 2.1065122390527653e-05, "loss": 0.5966, "step": 156790 }, { "epoch": 1.74, "learning_rate": 2.106419966338914e-05, "loss": 0.6039, "step": 156795 }, { "epoch": 1.74, "learning_rate": 2.106327693625063e-05, "loss": 0.6926, "step": 156800 }, { "epoch": 1.74, "learning_rate": 2.1062354209112117e-05, "loss": 0.6416, "step": 156805 }, { "epoch": 1.74, "learning_rate": 2.1061431481973605e-05, "loss": 0.6044, "step": 156810 }, { "epoch": 1.74, "learning_rate": 2.106050875483509e-05, "loss": 0.6289, "step": 156815 }, { "epoch": 1.74, "learning_rate": 2.105958602769658e-05, "loss": 0.6221, "step": 156820 }, { "epoch": 1.74, "learning_rate": 2.1058663300558065e-05, "loss": 0.5747, "step": 156825 }, { "epoch": 1.74, "learning_rate": 2.1057740573419553e-05, "loss": 0.6519, "step": 156830 }, { "epoch": 1.74, "learning_rate": 2.105681784628104e-05, "loss": 0.5918, "step": 156835 }, { "epoch": 1.74, "learning_rate": 2.105589511914253e-05, "loss": 0.6189, "step": 156840 }, { "epoch": 1.74, "learning_rate": 2.1054972392004016e-05, "loss": 0.6195, "step": 156845 }, { "epoch": 1.74, "learning_rate": 2.1054049664865504e-05, "loss": 0.6241, "step": 156850 }, { "epoch": 1.74, "learning_rate": 2.1053126937726992e-05, "loss": 0.6777, "step": 156855 }, { "epoch": 1.74, "learning_rate": 2.105220421058848e-05, "loss": 0.629, "step": 156860 }, { "epoch": 1.74, "learning_rate": 2.1051281483449968e-05, "loss": 0.556, "step": 156865 }, { "epoch": 1.74, "learning_rate": 2.1050358756311456e-05, "loss": 0.632, "step": 156870 }, { "epoch": 1.74, "learning_rate": 2.1049436029172944e-05, "loss": 0.6305, "step": 156875 }, { "epoch": 1.74, "learning_rate": 2.104851330203443e-05, "loss": 0.6509, "step": 156880 }, { "epoch": 1.74, "learning_rate": 2.1047590574895916e-05, "loss": 0.6322, "step": 156885 }, { "epoch": 1.74, "learning_rate": 2.1046667847757404e-05, "loss": 0.5717, "step": 156890 }, { "epoch": 1.74, "learning_rate": 2.104574512061889e-05, "loss": 0.5881, "step": 156895 }, { "epoch": 1.74, "learning_rate": 2.104482239348038e-05, "loss": 0.6262, "step": 156900 }, { "epoch": 1.74, "learning_rate": 2.1043899666341867e-05, "loss": 0.6528, "step": 156905 }, { "epoch": 1.74, "learning_rate": 2.1042976939203355e-05, "loss": 0.7212, "step": 156910 }, { "epoch": 1.74, "learning_rate": 2.1042054212064843e-05, "loss": 0.6556, "step": 156915 }, { "epoch": 1.74, "learning_rate": 2.104113148492633e-05, "loss": 0.6086, "step": 156920 }, { "epoch": 1.74, "learning_rate": 2.104020875778782e-05, "loss": 0.6225, "step": 156925 }, { "epoch": 1.74, "learning_rate": 2.1039286030649306e-05, "loss": 0.5841, "step": 156930 }, { "epoch": 1.74, "learning_rate": 2.1038363303510794e-05, "loss": 0.6652, "step": 156935 }, { "epoch": 1.74, "learning_rate": 2.103744057637228e-05, "loss": 0.6058, "step": 156940 }, { "epoch": 1.74, "learning_rate": 2.103651784923377e-05, "loss": 0.6318, "step": 156945 }, { "epoch": 1.74, "learning_rate": 2.1035595122095254e-05, "loss": 0.5551, "step": 156950 }, { "epoch": 1.74, "learning_rate": 2.1034672394956746e-05, "loss": 0.6658, "step": 156955 }, { "epoch": 1.74, "learning_rate": 2.103374966781823e-05, "loss": 0.632, "step": 156960 }, { "epoch": 1.74, "learning_rate": 2.1032826940679718e-05, "loss": 0.642, "step": 156965 }, { "epoch": 1.74, "learning_rate": 2.1031904213541206e-05, "loss": 0.5862, "step": 156970 }, { "epoch": 1.74, "learning_rate": 2.1030981486402694e-05, "loss": 0.6137, "step": 156975 }, { "epoch": 1.74, "learning_rate": 2.103005875926418e-05, "loss": 0.6203, "step": 156980 }, { "epoch": 1.74, "learning_rate": 2.102913603212567e-05, "loss": 0.616, "step": 156985 }, { "epoch": 1.74, "learning_rate": 2.1028213304987157e-05, "loss": 0.6136, "step": 156990 }, { "epoch": 1.74, "learning_rate": 2.1027290577848642e-05, "loss": 0.6565, "step": 156995 }, { "epoch": 1.74, "learning_rate": 2.1026367850710133e-05, "loss": 0.6404, "step": 157000 }, { "epoch": 1.74, "eval_loss": 0.6168511509895325, "eval_runtime": 69.1557, "eval_samples_per_second": 28.92, "eval_steps_per_second": 14.46, "step": 157000 }, { "epoch": 1.74, "learning_rate": 2.1025445123571617e-05, "loss": 0.5975, "step": 157005 }, { "epoch": 1.74, "learning_rate": 2.102452239643311e-05, "loss": 0.6209, "step": 157010 }, { "epoch": 1.74, "learning_rate": 2.1023599669294593e-05, "loss": 0.6774, "step": 157015 }, { "epoch": 1.74, "learning_rate": 2.1022676942156084e-05, "loss": 0.6192, "step": 157020 }, { "epoch": 1.74, "learning_rate": 2.102175421501757e-05, "loss": 0.6152, "step": 157025 }, { "epoch": 1.74, "learning_rate": 2.102083148787906e-05, "loss": 0.5993, "step": 157030 }, { "epoch": 1.74, "learning_rate": 2.1019908760740545e-05, "loss": 0.6497, "step": 157035 }, { "epoch": 1.74, "learning_rate": 2.1018986033602032e-05, "loss": 0.6491, "step": 157040 }, { "epoch": 1.74, "learning_rate": 2.101806330646352e-05, "loss": 0.6859, "step": 157045 }, { "epoch": 1.74, "learning_rate": 2.1017140579325008e-05, "loss": 0.6664, "step": 157050 }, { "epoch": 1.74, "learning_rate": 2.1016217852186496e-05, "loss": 0.6531, "step": 157055 }, { "epoch": 1.74, "learning_rate": 2.101529512504798e-05, "loss": 0.6832, "step": 157060 }, { "epoch": 1.74, "learning_rate": 2.101437239790947e-05, "loss": 0.6883, "step": 157065 }, { "epoch": 1.74, "learning_rate": 2.1013449670770956e-05, "loss": 0.639, "step": 157070 }, { "epoch": 1.74, "learning_rate": 2.1012526943632447e-05, "loss": 0.6159, "step": 157075 }, { "epoch": 1.74, "learning_rate": 2.1011604216493932e-05, "loss": 0.6584, "step": 157080 }, { "epoch": 1.74, "learning_rate": 2.1010681489355423e-05, "loss": 0.6519, "step": 157085 }, { "epoch": 1.74, "learning_rate": 2.1009758762216907e-05, "loss": 0.6259, "step": 157090 }, { "epoch": 1.74, "learning_rate": 2.1008836035078395e-05, "loss": 0.6534, "step": 157095 }, { "epoch": 1.74, "learning_rate": 2.1007913307939883e-05, "loss": 0.6413, "step": 157100 }, { "epoch": 1.74, "learning_rate": 2.100699058080137e-05, "loss": 0.6483, "step": 157105 }, { "epoch": 1.74, "learning_rate": 2.100606785366286e-05, "loss": 0.643, "step": 157110 }, { "epoch": 1.74, "learning_rate": 2.1005145126524343e-05, "loss": 0.6386, "step": 157115 }, { "epoch": 1.74, "learning_rate": 2.1004222399385835e-05, "loss": 0.5433, "step": 157120 }, { "epoch": 1.74, "learning_rate": 2.100329967224732e-05, "loss": 0.6576, "step": 157125 }, { "epoch": 1.74, "learning_rate": 2.100237694510881e-05, "loss": 0.668, "step": 157130 }, { "epoch": 1.74, "learning_rate": 2.1001454217970295e-05, "loss": 0.6099, "step": 157135 }, { "epoch": 1.74, "learning_rate": 2.1000531490831786e-05, "loss": 0.5856, "step": 157140 }, { "epoch": 1.74, "learning_rate": 2.099960876369327e-05, "loss": 0.625, "step": 157145 }, { "epoch": 1.74, "learning_rate": 2.099868603655476e-05, "loss": 0.5568, "step": 157150 }, { "epoch": 1.74, "learning_rate": 2.0997763309416246e-05, "loss": 0.6291, "step": 157155 }, { "epoch": 1.74, "learning_rate": 2.0996840582277734e-05, "loss": 0.6615, "step": 157160 }, { "epoch": 1.74, "learning_rate": 2.0995917855139222e-05, "loss": 0.6567, "step": 157165 }, { "epoch": 1.74, "learning_rate": 2.099499512800071e-05, "loss": 0.6235, "step": 157170 }, { "epoch": 1.74, "learning_rate": 2.0994072400862198e-05, "loss": 0.6737, "step": 157175 }, { "epoch": 1.74, "learning_rate": 2.0993149673723685e-05, "loss": 0.6589, "step": 157180 }, { "epoch": 1.74, "learning_rate": 2.0992226946585173e-05, "loss": 0.6609, "step": 157185 }, { "epoch": 1.74, "learning_rate": 2.099130421944666e-05, "loss": 0.6195, "step": 157190 }, { "epoch": 1.74, "learning_rate": 2.099038149230815e-05, "loss": 0.5893, "step": 157195 }, { "epoch": 1.74, "learning_rate": 2.0989458765169633e-05, "loss": 0.6334, "step": 157200 }, { "epoch": 1.74, "learning_rate": 2.0988536038031125e-05, "loss": 0.6487, "step": 157205 }, { "epoch": 1.74, "learning_rate": 2.098761331089261e-05, "loss": 0.6258, "step": 157210 }, { "epoch": 1.74, "learning_rate": 2.0986690583754097e-05, "loss": 0.6478, "step": 157215 }, { "epoch": 1.74, "learning_rate": 2.0985767856615585e-05, "loss": 0.6262, "step": 157220 }, { "epoch": 1.74, "learning_rate": 2.0984845129477073e-05, "loss": 0.6145, "step": 157225 }, { "epoch": 1.74, "learning_rate": 2.098392240233856e-05, "loss": 0.6351, "step": 157230 }, { "epoch": 1.74, "learning_rate": 2.0982999675200048e-05, "loss": 0.6349, "step": 157235 }, { "epoch": 1.74, "learning_rate": 2.0982076948061536e-05, "loss": 0.5859, "step": 157240 }, { "epoch": 1.74, "learning_rate": 2.0981154220923024e-05, "loss": 0.665, "step": 157245 }, { "epoch": 1.74, "learning_rate": 2.0980231493784512e-05, "loss": 0.6372, "step": 157250 }, { "epoch": 1.74, "learning_rate": 2.0979308766646e-05, "loss": 0.6391, "step": 157255 }, { "epoch": 1.74, "learning_rate": 2.0978386039507488e-05, "loss": 0.6399, "step": 157260 }, { "epoch": 1.74, "learning_rate": 2.0977463312368975e-05, "loss": 0.6095, "step": 157265 }, { "epoch": 1.74, "learning_rate": 2.097654058523046e-05, "loss": 0.6133, "step": 157270 }, { "epoch": 1.74, "learning_rate": 2.0975617858091948e-05, "loss": 0.6309, "step": 157275 }, { "epoch": 1.74, "learning_rate": 2.0974695130953436e-05, "loss": 0.5507, "step": 157280 }, { "epoch": 1.74, "learning_rate": 2.0973772403814923e-05, "loss": 0.658, "step": 157285 }, { "epoch": 1.74, "learning_rate": 2.097284967667641e-05, "loss": 0.6019, "step": 157290 }, { "epoch": 1.74, "learning_rate": 2.09719269495379e-05, "loss": 0.6567, "step": 157295 }, { "epoch": 1.74, "learning_rate": 2.0971004222399387e-05, "loss": 0.6299, "step": 157300 }, { "epoch": 1.74, "learning_rate": 2.0970081495260875e-05, "loss": 0.5991, "step": 157305 }, { "epoch": 1.74, "learning_rate": 2.0969158768122363e-05, "loss": 0.5999, "step": 157310 }, { "epoch": 1.74, "learning_rate": 2.096823604098385e-05, "loss": 0.6516, "step": 157315 }, { "epoch": 1.74, "learning_rate": 2.096731331384534e-05, "loss": 0.6327, "step": 157320 }, { "epoch": 1.74, "learning_rate": 2.0966390586706823e-05, "loss": 0.6102, "step": 157325 }, { "epoch": 1.74, "learning_rate": 2.0965467859568314e-05, "loss": 0.6225, "step": 157330 }, { "epoch": 1.74, "learning_rate": 2.09645451324298e-05, "loss": 0.648, "step": 157335 }, { "epoch": 1.74, "learning_rate": 2.096362240529129e-05, "loss": 0.6377, "step": 157340 }, { "epoch": 1.74, "learning_rate": 2.0962699678152774e-05, "loss": 0.6285, "step": 157345 }, { "epoch": 1.74, "learning_rate": 2.0961776951014262e-05, "loss": 0.6411, "step": 157350 }, { "epoch": 1.74, "learning_rate": 2.096085422387575e-05, "loss": 0.5727, "step": 157355 }, { "epoch": 1.74, "learning_rate": 2.0959931496737238e-05, "loss": 0.6053, "step": 157360 }, { "epoch": 1.74, "learning_rate": 2.0959008769598726e-05, "loss": 0.647, "step": 157365 }, { "epoch": 1.74, "learning_rate": 2.0958086042460213e-05, "loss": 0.668, "step": 157370 }, { "epoch": 1.74, "learning_rate": 2.09571633153217e-05, "loss": 0.5803, "step": 157375 }, { "epoch": 1.74, "learning_rate": 2.095624058818319e-05, "loss": 0.6267, "step": 157380 }, { "epoch": 1.74, "learning_rate": 2.0955317861044677e-05, "loss": 0.5984, "step": 157385 }, { "epoch": 1.74, "learning_rate": 2.095439513390616e-05, "loss": 0.6217, "step": 157390 }, { "epoch": 1.74, "learning_rate": 2.0953472406767653e-05, "loss": 0.6867, "step": 157395 }, { "epoch": 1.74, "learning_rate": 2.0952549679629137e-05, "loss": 0.6895, "step": 157400 }, { "epoch": 1.74, "learning_rate": 2.095162695249063e-05, "loss": 0.6223, "step": 157405 }, { "epoch": 1.74, "learning_rate": 2.0950704225352113e-05, "loss": 0.6205, "step": 157410 }, { "epoch": 1.74, "learning_rate": 2.0949781498213604e-05, "loss": 0.6495, "step": 157415 }, { "epoch": 1.74, "learning_rate": 2.094885877107509e-05, "loss": 0.6099, "step": 157420 }, { "epoch": 1.74, "learning_rate": 2.0947936043936576e-05, "loss": 0.6437, "step": 157425 }, { "epoch": 1.74, "learning_rate": 2.0947013316798064e-05, "loss": 0.6156, "step": 157430 }, { "epoch": 1.74, "learning_rate": 2.0946090589659552e-05, "loss": 0.6238, "step": 157435 }, { "epoch": 1.74, "learning_rate": 2.094516786252104e-05, "loss": 0.639, "step": 157440 }, { "epoch": 1.74, "learning_rate": 2.0944245135382524e-05, "loss": 0.6669, "step": 157445 }, { "epoch": 1.74, "learning_rate": 2.0943322408244016e-05, "loss": 0.6538, "step": 157450 }, { "epoch": 1.74, "learning_rate": 2.09423996811055e-05, "loss": 0.6548, "step": 157455 }, { "epoch": 1.74, "learning_rate": 2.094147695396699e-05, "loss": 0.6093, "step": 157460 }, { "epoch": 1.74, "learning_rate": 2.0940554226828476e-05, "loss": 0.6123, "step": 157465 }, { "epoch": 1.74, "learning_rate": 2.0939631499689967e-05, "loss": 0.6522, "step": 157470 }, { "epoch": 1.74, "learning_rate": 2.093870877255145e-05, "loss": 0.6207, "step": 157475 }, { "epoch": 1.74, "learning_rate": 2.093778604541294e-05, "loss": 0.637, "step": 157480 }, { "epoch": 1.74, "learning_rate": 2.0936863318274427e-05, "loss": 0.6588, "step": 157485 }, { "epoch": 1.74, "learning_rate": 2.0935940591135915e-05, "loss": 0.6048, "step": 157490 }, { "epoch": 1.74, "learning_rate": 2.0935017863997403e-05, "loss": 0.651, "step": 157495 }, { "epoch": 1.74, "learning_rate": 2.0934095136858887e-05, "loss": 0.5871, "step": 157500 }, { "epoch": 1.74, "learning_rate": 2.093317240972038e-05, "loss": 0.6029, "step": 157505 }, { "epoch": 1.74, "learning_rate": 2.0932249682581863e-05, "loss": 0.5647, "step": 157510 }, { "epoch": 1.74, "learning_rate": 2.0931326955443354e-05, "loss": 0.5907, "step": 157515 }, { "epoch": 1.74, "learning_rate": 2.093040422830484e-05, "loss": 0.5962, "step": 157520 }, { "epoch": 1.74, "learning_rate": 2.092948150116633e-05, "loss": 0.5971, "step": 157525 }, { "epoch": 1.74, "learning_rate": 2.0928558774027814e-05, "loss": 0.6013, "step": 157530 }, { "epoch": 1.74, "learning_rate": 2.0927636046889306e-05, "loss": 0.6759, "step": 157535 }, { "epoch": 1.74, "learning_rate": 2.092671331975079e-05, "loss": 0.6038, "step": 157540 }, { "epoch": 1.74, "learning_rate": 2.0925790592612278e-05, "loss": 0.6036, "step": 157545 }, { "epoch": 1.74, "learning_rate": 2.0924867865473766e-05, "loss": 0.6237, "step": 157550 }, { "epoch": 1.74, "learning_rate": 2.0923945138335254e-05, "loss": 0.6073, "step": 157555 }, { "epoch": 1.74, "learning_rate": 2.092302241119674e-05, "loss": 0.5783, "step": 157560 }, { "epoch": 1.74, "learning_rate": 2.092209968405823e-05, "loss": 0.6337, "step": 157565 }, { "epoch": 1.74, "learning_rate": 2.0921176956919717e-05, "loss": 0.5874, "step": 157570 }, { "epoch": 1.74, "learning_rate": 2.09202542297812e-05, "loss": 0.6297, "step": 157575 }, { "epoch": 1.74, "learning_rate": 2.0919331502642693e-05, "loss": 0.6435, "step": 157580 }, { "epoch": 1.74, "learning_rate": 2.0918408775504177e-05, "loss": 0.5974, "step": 157585 }, { "epoch": 1.74, "learning_rate": 2.091748604836567e-05, "loss": 0.6346, "step": 157590 }, { "epoch": 1.75, "learning_rate": 2.0916563321227153e-05, "loss": 0.6682, "step": 157595 }, { "epoch": 1.75, "learning_rate": 2.091564059408864e-05, "loss": 0.604, "step": 157600 }, { "epoch": 1.75, "learning_rate": 2.091471786695013e-05, "loss": 0.6044, "step": 157605 }, { "epoch": 1.75, "learning_rate": 2.0913795139811617e-05, "loss": 0.6903, "step": 157610 }, { "epoch": 1.75, "learning_rate": 2.0912872412673104e-05, "loss": 0.6642, "step": 157615 }, { "epoch": 1.75, "learning_rate": 2.0911949685534592e-05, "loss": 0.6398, "step": 157620 }, { "epoch": 1.75, "learning_rate": 2.091102695839608e-05, "loss": 0.6449, "step": 157625 }, { "epoch": 1.75, "learning_rate": 2.0910104231257568e-05, "loss": 0.6087, "step": 157630 }, { "epoch": 1.75, "learning_rate": 2.0909181504119056e-05, "loss": 0.601, "step": 157635 }, { "epoch": 1.75, "learning_rate": 2.0908258776980544e-05, "loss": 0.6231, "step": 157640 }, { "epoch": 1.75, "learning_rate": 2.090733604984203e-05, "loss": 0.6587, "step": 157645 }, { "epoch": 1.75, "learning_rate": 2.0906413322703516e-05, "loss": 0.5962, "step": 157650 }, { "epoch": 1.75, "learning_rate": 2.0905490595565004e-05, "loss": 0.6232, "step": 157655 }, { "epoch": 1.75, "learning_rate": 2.0904567868426492e-05, "loss": 0.5871, "step": 157660 }, { "epoch": 1.75, "learning_rate": 2.090364514128798e-05, "loss": 0.5681, "step": 157665 }, { "epoch": 1.75, "learning_rate": 2.0902722414149467e-05, "loss": 0.6389, "step": 157670 }, { "epoch": 1.75, "learning_rate": 2.0901799687010955e-05, "loss": 0.6309, "step": 157675 }, { "epoch": 1.75, "learning_rate": 2.0900876959872443e-05, "loss": 0.5764, "step": 157680 }, { "epoch": 1.75, "learning_rate": 2.089995423273393e-05, "loss": 0.6583, "step": 157685 }, { "epoch": 1.75, "learning_rate": 2.089903150559542e-05, "loss": 0.595, "step": 157690 }, { "epoch": 1.75, "learning_rate": 2.0898108778456907e-05, "loss": 0.587, "step": 157695 }, { "epoch": 1.75, "learning_rate": 2.0897186051318395e-05, "loss": 0.629, "step": 157700 }, { "epoch": 1.75, "learning_rate": 2.0896263324179882e-05, "loss": 0.6706, "step": 157705 }, { "epoch": 1.75, "learning_rate": 2.0895340597041367e-05, "loss": 0.6144, "step": 157710 }, { "epoch": 1.75, "learning_rate": 2.0894417869902858e-05, "loss": 0.6135, "step": 157715 }, { "epoch": 1.75, "learning_rate": 2.0893495142764343e-05, "loss": 0.6442, "step": 157720 }, { "epoch": 1.75, "learning_rate": 2.089257241562583e-05, "loss": 0.6271, "step": 157725 }, { "epoch": 1.75, "learning_rate": 2.0891649688487318e-05, "loss": 0.6482, "step": 157730 }, { "epoch": 1.75, "learning_rate": 2.0890726961348806e-05, "loss": 0.6665, "step": 157735 }, { "epoch": 1.75, "learning_rate": 2.0889804234210294e-05, "loss": 0.6616, "step": 157740 }, { "epoch": 1.75, "learning_rate": 2.0888881507071782e-05, "loss": 0.6544, "step": 157745 }, { "epoch": 1.75, "learning_rate": 2.088795877993327e-05, "loss": 0.6298, "step": 157750 }, { "epoch": 1.75, "learning_rate": 2.0887036052794757e-05, "loss": 0.6567, "step": 157755 }, { "epoch": 1.75, "learning_rate": 2.0886113325656245e-05, "loss": 0.6608, "step": 157760 }, { "epoch": 1.75, "learning_rate": 2.0885190598517733e-05, "loss": 0.6599, "step": 157765 }, { "epoch": 1.75, "learning_rate": 2.088426787137922e-05, "loss": 0.5973, "step": 157770 }, { "epoch": 1.75, "learning_rate": 2.0883345144240705e-05, "loss": 0.6194, "step": 157775 }, { "epoch": 1.75, "learning_rate": 2.0882422417102197e-05, "loss": 0.6094, "step": 157780 }, { "epoch": 1.75, "learning_rate": 2.088149968996368e-05, "loss": 0.6479, "step": 157785 }, { "epoch": 1.75, "learning_rate": 2.0880576962825172e-05, "loss": 0.6086, "step": 157790 }, { "epoch": 1.75, "learning_rate": 2.0879654235686657e-05, "loss": 0.6734, "step": 157795 }, { "epoch": 1.75, "learning_rate": 2.0878731508548145e-05, "loss": 0.5983, "step": 157800 }, { "epoch": 1.75, "learning_rate": 2.0877808781409633e-05, "loss": 0.6457, "step": 157805 }, { "epoch": 1.75, "learning_rate": 2.087688605427112e-05, "loss": 0.6441, "step": 157810 }, { "epoch": 1.75, "learning_rate": 2.0875963327132608e-05, "loss": 0.6147, "step": 157815 }, { "epoch": 1.75, "learning_rate": 2.0875040599994096e-05, "loss": 0.5726, "step": 157820 }, { "epoch": 1.75, "learning_rate": 2.0874117872855584e-05, "loss": 0.6062, "step": 157825 }, { "epoch": 1.75, "learning_rate": 2.087319514571707e-05, "loss": 0.5771, "step": 157830 }, { "epoch": 1.75, "learning_rate": 2.087227241857856e-05, "loss": 0.635, "step": 157835 }, { "epoch": 1.75, "learning_rate": 2.0871349691440044e-05, "loss": 0.6212, "step": 157840 }, { "epoch": 1.75, "learning_rate": 2.0870426964301535e-05, "loss": 0.6694, "step": 157845 }, { "epoch": 1.75, "learning_rate": 2.086950423716302e-05, "loss": 0.5986, "step": 157850 }, { "epoch": 1.75, "learning_rate": 2.086858151002451e-05, "loss": 0.6112, "step": 157855 }, { "epoch": 1.75, "learning_rate": 2.0867658782885996e-05, "loss": 0.6053, "step": 157860 }, { "epoch": 1.75, "learning_rate": 2.0866736055747483e-05, "loss": 0.6686, "step": 157865 }, { "epoch": 1.75, "learning_rate": 2.086581332860897e-05, "loss": 0.6405, "step": 157870 }, { "epoch": 1.75, "learning_rate": 2.086489060147046e-05, "loss": 0.5635, "step": 157875 }, { "epoch": 1.75, "learning_rate": 2.0863967874331947e-05, "loss": 0.6276, "step": 157880 }, { "epoch": 1.75, "learning_rate": 2.086304514719343e-05, "loss": 0.5953, "step": 157885 }, { "epoch": 1.75, "learning_rate": 2.0862122420054923e-05, "loss": 0.6329, "step": 157890 }, { "epoch": 1.75, "learning_rate": 2.0861199692916407e-05, "loss": 0.657, "step": 157895 }, { "epoch": 1.75, "learning_rate": 2.0860276965777898e-05, "loss": 0.682, "step": 157900 }, { "epoch": 1.75, "learning_rate": 2.0859354238639383e-05, "loss": 0.6058, "step": 157905 }, { "epoch": 1.75, "learning_rate": 2.0858431511500874e-05, "loss": 0.5842, "step": 157910 }, { "epoch": 1.75, "learning_rate": 2.085750878436236e-05, "loss": 0.6492, "step": 157915 }, { "epoch": 1.75, "learning_rate": 2.085658605722385e-05, "loss": 0.643, "step": 157920 }, { "epoch": 1.75, "learning_rate": 2.0855663330085334e-05, "loss": 0.5927, "step": 157925 }, { "epoch": 1.75, "learning_rate": 2.0854740602946822e-05, "loss": 0.6345, "step": 157930 }, { "epoch": 1.75, "learning_rate": 2.085381787580831e-05, "loss": 0.6308, "step": 157935 }, { "epoch": 1.75, "learning_rate": 2.0852895148669798e-05, "loss": 0.579, "step": 157940 }, { "epoch": 1.75, "learning_rate": 2.0851972421531286e-05, "loss": 0.6393, "step": 157945 }, { "epoch": 1.75, "learning_rate": 2.085104969439277e-05, "loss": 0.6084, "step": 157950 }, { "epoch": 1.75, "learning_rate": 2.085012696725426e-05, "loss": 0.5855, "step": 157955 }, { "epoch": 1.75, "learning_rate": 2.0849204240115746e-05, "loss": 0.6288, "step": 157960 }, { "epoch": 1.75, "learning_rate": 2.0848281512977237e-05, "loss": 0.6657, "step": 157965 }, { "epoch": 1.75, "learning_rate": 2.084735878583872e-05, "loss": 0.6053, "step": 157970 }, { "epoch": 1.75, "learning_rate": 2.0846436058700213e-05, "loss": 0.6032, "step": 157975 }, { "epoch": 1.75, "learning_rate": 2.0845513331561697e-05, "loss": 0.6355, "step": 157980 }, { "epoch": 1.75, "learning_rate": 2.0844590604423185e-05, "loss": 0.6305, "step": 157985 }, { "epoch": 1.75, "learning_rate": 2.0843667877284673e-05, "loss": 0.687, "step": 157990 }, { "epoch": 1.75, "learning_rate": 2.084274515014616e-05, "loss": 0.6407, "step": 157995 }, { "epoch": 1.75, "learning_rate": 2.084182242300765e-05, "loss": 0.639, "step": 158000 }, { "epoch": 1.75, "eval_loss": 0.5996662378311157, "eval_runtime": 69.1206, "eval_samples_per_second": 28.935, "eval_steps_per_second": 14.467, "step": 158000 }, { "epoch": 1.75, "learning_rate": 2.0840899695869136e-05, "loss": 0.6049, "step": 158005 }, { "epoch": 1.75, "learning_rate": 2.0839976968730624e-05, "loss": 0.6095, "step": 158010 }, { "epoch": 1.75, "learning_rate": 2.0839054241592112e-05, "loss": 0.67, "step": 158015 }, { "epoch": 1.75, "learning_rate": 2.08381315144536e-05, "loss": 0.63, "step": 158020 }, { "epoch": 1.75, "learning_rate": 2.0837208787315088e-05, "loss": 0.609, "step": 158025 }, { "epoch": 1.75, "learning_rate": 2.0836286060176576e-05, "loss": 0.6297, "step": 158030 }, { "epoch": 1.75, "learning_rate": 2.083536333303806e-05, "loss": 0.629, "step": 158035 }, { "epoch": 1.75, "learning_rate": 2.0834440605899548e-05, "loss": 0.5973, "step": 158040 }, { "epoch": 1.75, "learning_rate": 2.0833517878761036e-05, "loss": 0.6082, "step": 158045 }, { "epoch": 1.75, "learning_rate": 2.0832595151622524e-05, "loss": 0.6644, "step": 158050 }, { "epoch": 1.75, "learning_rate": 2.083167242448401e-05, "loss": 0.6675, "step": 158055 }, { "epoch": 1.75, "learning_rate": 2.08307496973455e-05, "loss": 0.5931, "step": 158060 }, { "epoch": 1.75, "learning_rate": 2.0829826970206987e-05, "loss": 0.6194, "step": 158065 }, { "epoch": 1.75, "learning_rate": 2.0828904243068475e-05, "loss": 0.6026, "step": 158070 }, { "epoch": 1.75, "learning_rate": 2.0827981515929963e-05, "loss": 0.6795, "step": 158075 }, { "epoch": 1.75, "learning_rate": 2.082705878879145e-05, "loss": 0.542, "step": 158080 }, { "epoch": 1.75, "learning_rate": 2.082613606165294e-05, "loss": 0.6312, "step": 158085 }, { "epoch": 1.75, "learning_rate": 2.0825213334514426e-05, "loss": 0.6003, "step": 158090 }, { "epoch": 1.75, "learning_rate": 2.082429060737591e-05, "loss": 0.6167, "step": 158095 }, { "epoch": 1.75, "learning_rate": 2.0823367880237402e-05, "loss": 0.5988, "step": 158100 }, { "epoch": 1.75, "learning_rate": 2.0822445153098887e-05, "loss": 0.6272, "step": 158105 }, { "epoch": 1.75, "learning_rate": 2.0821522425960374e-05, "loss": 0.6304, "step": 158110 }, { "epoch": 1.75, "learning_rate": 2.0820599698821862e-05, "loss": 0.6218, "step": 158115 }, { "epoch": 1.75, "learning_rate": 2.081967697168335e-05, "loss": 0.6244, "step": 158120 }, { "epoch": 1.75, "learning_rate": 2.0818754244544838e-05, "loss": 0.6305, "step": 158125 }, { "epoch": 1.75, "learning_rate": 2.0817831517406326e-05, "loss": 0.6369, "step": 158130 }, { "epoch": 1.75, "learning_rate": 2.0816908790267814e-05, "loss": 0.6785, "step": 158135 }, { "epoch": 1.75, "learning_rate": 2.08159860631293e-05, "loss": 0.6867, "step": 158140 }, { "epoch": 1.75, "learning_rate": 2.081506333599079e-05, "loss": 0.6039, "step": 158145 }, { "epoch": 1.75, "learning_rate": 2.0814140608852277e-05, "loss": 0.6341, "step": 158150 }, { "epoch": 1.75, "learning_rate": 2.0813217881713765e-05, "loss": 0.6132, "step": 158155 }, { "epoch": 1.75, "learning_rate": 2.081229515457525e-05, "loss": 0.7072, "step": 158160 }, { "epoch": 1.75, "learning_rate": 2.081137242743674e-05, "loss": 0.684, "step": 158165 }, { "epoch": 1.75, "learning_rate": 2.0810449700298225e-05, "loss": 0.6361, "step": 158170 }, { "epoch": 1.75, "learning_rate": 2.0809526973159716e-05, "loss": 0.5998, "step": 158175 }, { "epoch": 1.75, "learning_rate": 2.08086042460212e-05, "loss": 0.592, "step": 158180 }, { "epoch": 1.75, "learning_rate": 2.080768151888269e-05, "loss": 0.577, "step": 158185 }, { "epoch": 1.75, "learning_rate": 2.0806758791744177e-05, "loss": 0.6102, "step": 158190 }, { "epoch": 1.75, "learning_rate": 2.0805836064605664e-05, "loss": 0.6387, "step": 158195 }, { "epoch": 1.75, "learning_rate": 2.0804913337467152e-05, "loss": 0.6012, "step": 158200 }, { "epoch": 1.75, "learning_rate": 2.080399061032864e-05, "loss": 0.6433, "step": 158205 }, { "epoch": 1.75, "learning_rate": 2.0803067883190128e-05, "loss": 0.5684, "step": 158210 }, { "epoch": 1.75, "learning_rate": 2.0802145156051612e-05, "loss": 0.6149, "step": 158215 }, { "epoch": 1.75, "learning_rate": 2.0801222428913104e-05, "loss": 0.5861, "step": 158220 }, { "epoch": 1.75, "learning_rate": 2.0800299701774588e-05, "loss": 0.5724, "step": 158225 }, { "epoch": 1.75, "learning_rate": 2.079937697463608e-05, "loss": 0.6345, "step": 158230 }, { "epoch": 1.75, "learning_rate": 2.0798454247497564e-05, "loss": 0.6123, "step": 158235 }, { "epoch": 1.75, "learning_rate": 2.0797531520359055e-05, "loss": 0.6226, "step": 158240 }, { "epoch": 1.75, "learning_rate": 2.079660879322054e-05, "loss": 0.6333, "step": 158245 }, { "epoch": 1.75, "learning_rate": 2.079568606608203e-05, "loss": 0.6054, "step": 158250 }, { "epoch": 1.75, "learning_rate": 2.0794763338943515e-05, "loss": 0.6398, "step": 158255 }, { "epoch": 1.75, "learning_rate": 2.0793840611805003e-05, "loss": 0.6133, "step": 158260 }, { "epoch": 1.75, "learning_rate": 2.079291788466649e-05, "loss": 0.6151, "step": 158265 }, { "epoch": 1.75, "learning_rate": 2.0791995157527975e-05, "loss": 0.6493, "step": 158270 }, { "epoch": 1.75, "learning_rate": 2.0791072430389467e-05, "loss": 0.5995, "step": 158275 }, { "epoch": 1.75, "learning_rate": 2.079014970325095e-05, "loss": 0.6536, "step": 158280 }, { "epoch": 1.75, "learning_rate": 2.0789226976112442e-05, "loss": 0.6107, "step": 158285 }, { "epoch": 1.75, "learning_rate": 2.0788304248973927e-05, "loss": 0.6627, "step": 158290 }, { "epoch": 1.75, "learning_rate": 2.0787381521835418e-05, "loss": 0.6409, "step": 158295 }, { "epoch": 1.75, "learning_rate": 2.0786458794696902e-05, "loss": 0.6042, "step": 158300 }, { "epoch": 1.75, "learning_rate": 2.0785536067558394e-05, "loss": 0.6235, "step": 158305 }, { "epoch": 1.75, "learning_rate": 2.0784613340419878e-05, "loss": 0.674, "step": 158310 }, { "epoch": 1.75, "learning_rate": 2.0783690613281366e-05, "loss": 0.6773, "step": 158315 }, { "epoch": 1.75, "learning_rate": 2.0782767886142854e-05, "loss": 0.6877, "step": 158320 }, { "epoch": 1.75, "learning_rate": 2.0781845159004342e-05, "loss": 0.6137, "step": 158325 }, { "epoch": 1.75, "learning_rate": 2.078092243186583e-05, "loss": 0.6315, "step": 158330 }, { "epoch": 1.75, "learning_rate": 2.0779999704727314e-05, "loss": 0.6724, "step": 158335 }, { "epoch": 1.75, "learning_rate": 2.0779076977588805e-05, "loss": 0.6171, "step": 158340 }, { "epoch": 1.75, "learning_rate": 2.077815425045029e-05, "loss": 0.5679, "step": 158345 }, { "epoch": 1.75, "learning_rate": 2.077723152331178e-05, "loss": 0.59, "step": 158350 }, { "epoch": 1.75, "learning_rate": 2.0776308796173265e-05, "loss": 0.6608, "step": 158355 }, { "epoch": 1.75, "learning_rate": 2.0775386069034757e-05, "loss": 0.6446, "step": 158360 }, { "epoch": 1.75, "learning_rate": 2.077446334189624e-05, "loss": 0.6689, "step": 158365 }, { "epoch": 1.75, "learning_rate": 2.077354061475773e-05, "loss": 0.6869, "step": 158370 }, { "epoch": 1.75, "learning_rate": 2.0772617887619217e-05, "loss": 0.649, "step": 158375 }, { "epoch": 1.75, "learning_rate": 2.0771695160480705e-05, "loss": 0.6161, "step": 158380 }, { "epoch": 1.75, "learning_rate": 2.0770772433342193e-05, "loss": 0.6548, "step": 158385 }, { "epoch": 1.75, "learning_rate": 2.076984970620368e-05, "loss": 0.6574, "step": 158390 }, { "epoch": 1.75, "learning_rate": 2.0768926979065168e-05, "loss": 0.6627, "step": 158395 }, { "epoch": 1.75, "learning_rate": 2.0768004251926656e-05, "loss": 0.6347, "step": 158400 }, { "epoch": 1.75, "learning_rate": 2.0767081524788144e-05, "loss": 0.6746, "step": 158405 }, { "epoch": 1.75, "learning_rate": 2.076615879764963e-05, "loss": 0.6305, "step": 158410 }, { "epoch": 1.75, "learning_rate": 2.076523607051112e-05, "loss": 0.5918, "step": 158415 }, { "epoch": 1.75, "learning_rate": 2.0764313343372604e-05, "loss": 0.5549, "step": 158420 }, { "epoch": 1.75, "learning_rate": 2.0763390616234092e-05, "loss": 0.6364, "step": 158425 }, { "epoch": 1.75, "learning_rate": 2.076246788909558e-05, "loss": 0.5875, "step": 158430 }, { "epoch": 1.75, "learning_rate": 2.0761545161957068e-05, "loss": 0.6358, "step": 158435 }, { "epoch": 1.75, "learning_rate": 2.0760622434818555e-05, "loss": 0.6199, "step": 158440 }, { "epoch": 1.75, "learning_rate": 2.0759699707680043e-05, "loss": 0.6239, "step": 158445 }, { "epoch": 1.75, "learning_rate": 2.075877698054153e-05, "loss": 0.5736, "step": 158450 }, { "epoch": 1.75, "learning_rate": 2.075785425340302e-05, "loss": 0.6116, "step": 158455 }, { "epoch": 1.75, "learning_rate": 2.0756931526264507e-05, "loss": 0.6366, "step": 158460 }, { "epoch": 1.75, "learning_rate": 2.0756008799125995e-05, "loss": 0.6177, "step": 158465 }, { "epoch": 1.75, "learning_rate": 2.0755086071987483e-05, "loss": 0.6357, "step": 158470 }, { "epoch": 1.75, "learning_rate": 2.075416334484897e-05, "loss": 0.6242, "step": 158475 }, { "epoch": 1.75, "learning_rate": 2.0753240617710458e-05, "loss": 0.6155, "step": 158480 }, { "epoch": 1.75, "learning_rate": 2.0752317890571943e-05, "loss": 0.6039, "step": 158485 }, { "epoch": 1.75, "learning_rate": 2.075139516343343e-05, "loss": 0.6231, "step": 158490 }, { "epoch": 1.75, "learning_rate": 2.075047243629492e-05, "loss": 0.6422, "step": 158495 }, { "epoch": 1.76, "learning_rate": 2.0749549709156406e-05, "loss": 0.67, "step": 158500 }, { "epoch": 1.76, "learning_rate": 2.0748626982017894e-05, "loss": 0.6375, "step": 158505 }, { "epoch": 1.76, "learning_rate": 2.0747704254879382e-05, "loss": 0.6043, "step": 158510 }, { "epoch": 1.76, "learning_rate": 2.074678152774087e-05, "loss": 0.6457, "step": 158515 }, { "epoch": 1.76, "learning_rate": 2.0745858800602358e-05, "loss": 0.6442, "step": 158520 }, { "epoch": 1.76, "learning_rate": 2.0744936073463846e-05, "loss": 0.624, "step": 158525 }, { "epoch": 1.76, "learning_rate": 2.0744013346325333e-05, "loss": 0.5752, "step": 158530 }, { "epoch": 1.76, "learning_rate": 2.074309061918682e-05, "loss": 0.6521, "step": 158535 }, { "epoch": 1.76, "learning_rate": 2.074216789204831e-05, "loss": 0.6188, "step": 158540 }, { "epoch": 1.76, "learning_rate": 2.0741245164909794e-05, "loss": 0.6603, "step": 158545 }, { "epoch": 1.76, "learning_rate": 2.0740322437771285e-05, "loss": 0.6731, "step": 158550 }, { "epoch": 1.76, "learning_rate": 2.073939971063277e-05, "loss": 0.6143, "step": 158555 }, { "epoch": 1.76, "learning_rate": 2.0738476983494257e-05, "loss": 0.556, "step": 158560 }, { "epoch": 1.76, "learning_rate": 2.0737554256355745e-05, "loss": 0.6573, "step": 158565 }, { "epoch": 1.76, "learning_rate": 2.0736631529217233e-05, "loss": 0.6672, "step": 158570 }, { "epoch": 1.76, "learning_rate": 2.073570880207872e-05, "loss": 0.5935, "step": 158575 }, { "epoch": 1.76, "learning_rate": 2.073478607494021e-05, "loss": 0.6181, "step": 158580 }, { "epoch": 1.76, "learning_rate": 2.0733863347801696e-05, "loss": 0.609, "step": 158585 }, { "epoch": 1.76, "learning_rate": 2.0732940620663184e-05, "loss": 0.656, "step": 158590 }, { "epoch": 1.76, "learning_rate": 2.0732017893524672e-05, "loss": 0.6112, "step": 158595 }, { "epoch": 1.76, "learning_rate": 2.0731095166386156e-05, "loss": 0.6584, "step": 158600 }, { "epoch": 1.76, "learning_rate": 2.0730172439247648e-05, "loss": 0.5892, "step": 158605 }, { "epoch": 1.76, "learning_rate": 2.0729249712109132e-05, "loss": 0.6895, "step": 158610 }, { "epoch": 1.76, "learning_rate": 2.0728326984970623e-05, "loss": 0.6233, "step": 158615 }, { "epoch": 1.76, "learning_rate": 2.0727404257832108e-05, "loss": 0.6678, "step": 158620 }, { "epoch": 1.76, "learning_rate": 2.07264815306936e-05, "loss": 0.6304, "step": 158625 }, { "epoch": 1.76, "learning_rate": 2.0725558803555084e-05, "loss": 0.6748, "step": 158630 }, { "epoch": 1.76, "learning_rate": 2.072463607641657e-05, "loss": 0.6301, "step": 158635 }, { "epoch": 1.76, "learning_rate": 2.072371334927806e-05, "loss": 0.5667, "step": 158640 }, { "epoch": 1.76, "learning_rate": 2.0722790622139547e-05, "loss": 0.6797, "step": 158645 }, { "epoch": 1.76, "learning_rate": 2.0721867895001035e-05, "loss": 0.6046, "step": 158650 }, { "epoch": 1.76, "learning_rate": 2.072094516786252e-05, "loss": 0.6239, "step": 158655 }, { "epoch": 1.76, "learning_rate": 2.072002244072401e-05, "loss": 0.6152, "step": 158660 }, { "epoch": 1.76, "learning_rate": 2.0719099713585495e-05, "loss": 0.638, "step": 158665 }, { "epoch": 1.76, "learning_rate": 2.0718176986446986e-05, "loss": 0.6479, "step": 158670 }, { "epoch": 1.76, "learning_rate": 2.071725425930847e-05, "loss": 0.5889, "step": 158675 }, { "epoch": 1.76, "learning_rate": 2.0716331532169962e-05, "loss": 0.6151, "step": 158680 }, { "epoch": 1.76, "learning_rate": 2.0715408805031446e-05, "loss": 0.6342, "step": 158685 }, { "epoch": 1.76, "learning_rate": 2.0714486077892938e-05, "loss": 0.6109, "step": 158690 }, { "epoch": 1.76, "learning_rate": 2.0713563350754422e-05, "loss": 0.6562, "step": 158695 }, { "epoch": 1.76, "learning_rate": 2.071264062361591e-05, "loss": 0.6096, "step": 158700 }, { "epoch": 1.76, "learning_rate": 2.0711717896477398e-05, "loss": 0.5994, "step": 158705 }, { "epoch": 1.76, "learning_rate": 2.0710795169338886e-05, "loss": 0.5955, "step": 158710 }, { "epoch": 1.76, "learning_rate": 2.0709872442200374e-05, "loss": 0.6619, "step": 158715 }, { "epoch": 1.76, "learning_rate": 2.0708949715061858e-05, "loss": 0.6191, "step": 158720 }, { "epoch": 1.76, "learning_rate": 2.070802698792335e-05, "loss": 0.682, "step": 158725 }, { "epoch": 1.76, "learning_rate": 2.0707104260784834e-05, "loss": 0.6025, "step": 158730 }, { "epoch": 1.76, "learning_rate": 2.0706181533646325e-05, "loss": 0.6566, "step": 158735 }, { "epoch": 1.76, "learning_rate": 2.070525880650781e-05, "loss": 0.5796, "step": 158740 }, { "epoch": 1.76, "learning_rate": 2.07043360793693e-05, "loss": 0.6006, "step": 158745 }, { "epoch": 1.76, "learning_rate": 2.0703413352230785e-05, "loss": 0.6213, "step": 158750 }, { "epoch": 1.76, "learning_rate": 2.0702490625092273e-05, "loss": 0.6556, "step": 158755 }, { "epoch": 1.76, "learning_rate": 2.070156789795376e-05, "loss": 0.6146, "step": 158760 }, { "epoch": 1.76, "learning_rate": 2.070064517081525e-05, "loss": 0.6842, "step": 158765 }, { "epoch": 1.76, "learning_rate": 2.0699722443676737e-05, "loss": 0.6726, "step": 158770 }, { "epoch": 1.76, "learning_rate": 2.0698799716538224e-05, "loss": 0.644, "step": 158775 }, { "epoch": 1.76, "learning_rate": 2.0697876989399712e-05, "loss": 0.6018, "step": 158780 }, { "epoch": 1.76, "learning_rate": 2.0696954262261197e-05, "loss": 0.5979, "step": 158785 }, { "epoch": 1.76, "learning_rate": 2.0696031535122688e-05, "loss": 0.5927, "step": 158790 }, { "epoch": 1.76, "learning_rate": 2.0695108807984172e-05, "loss": 0.6447, "step": 158795 }, { "epoch": 1.76, "learning_rate": 2.0694186080845664e-05, "loss": 0.6291, "step": 158800 }, { "epoch": 1.76, "learning_rate": 2.0693263353707148e-05, "loss": 0.6283, "step": 158805 }, { "epoch": 1.76, "learning_rate": 2.0692340626568636e-05, "loss": 0.626, "step": 158810 }, { "epoch": 1.76, "learning_rate": 2.0691417899430124e-05, "loss": 0.5897, "step": 158815 }, { "epoch": 1.76, "learning_rate": 2.069049517229161e-05, "loss": 0.6284, "step": 158820 }, { "epoch": 1.76, "learning_rate": 2.06895724451531e-05, "loss": 0.5559, "step": 158825 }, { "epoch": 1.76, "learning_rate": 2.0688649718014587e-05, "loss": 0.6507, "step": 158830 }, { "epoch": 1.76, "learning_rate": 2.0687726990876075e-05, "loss": 0.5963, "step": 158835 }, { "epoch": 1.76, "learning_rate": 2.0686804263737563e-05, "loss": 0.6565, "step": 158840 }, { "epoch": 1.76, "learning_rate": 2.068588153659905e-05, "loss": 0.6125, "step": 158845 }, { "epoch": 1.76, "learning_rate": 2.068495880946054e-05, "loss": 0.6458, "step": 158850 }, { "epoch": 1.76, "learning_rate": 2.0684036082322027e-05, "loss": 0.6292, "step": 158855 }, { "epoch": 1.76, "learning_rate": 2.0683113355183514e-05, "loss": 0.5484, "step": 158860 }, { "epoch": 1.76, "learning_rate": 2.0682190628045002e-05, "loss": 0.6521, "step": 158865 }, { "epoch": 1.76, "learning_rate": 2.0681267900906487e-05, "loss": 0.6951, "step": 158870 }, { "epoch": 1.76, "learning_rate": 2.0680345173767975e-05, "loss": 0.6841, "step": 158875 }, { "epoch": 1.76, "learning_rate": 2.0679422446629462e-05, "loss": 0.595, "step": 158880 }, { "epoch": 1.76, "learning_rate": 2.067849971949095e-05, "loss": 0.6036, "step": 158885 }, { "epoch": 1.76, "learning_rate": 2.0677576992352438e-05, "loss": 0.6631, "step": 158890 }, { "epoch": 1.76, "learning_rate": 2.0676654265213926e-05, "loss": 0.6467, "step": 158895 }, { "epoch": 1.76, "learning_rate": 2.0675731538075414e-05, "loss": 0.628, "step": 158900 }, { "epoch": 1.76, "learning_rate": 2.06748088109369e-05, "loss": 0.5879, "step": 158905 }, { "epoch": 1.76, "learning_rate": 2.067388608379839e-05, "loss": 0.6569, "step": 158910 }, { "epoch": 1.76, "learning_rate": 2.0672963356659877e-05, "loss": 0.6461, "step": 158915 }, { "epoch": 1.76, "learning_rate": 2.0672040629521365e-05, "loss": 0.6741, "step": 158920 }, { "epoch": 1.76, "learning_rate": 2.0671117902382853e-05, "loss": 0.6222, "step": 158925 }, { "epoch": 1.76, "learning_rate": 2.0670195175244338e-05, "loss": 0.61, "step": 158930 }, { "epoch": 1.76, "learning_rate": 2.066927244810583e-05, "loss": 0.6684, "step": 158935 }, { "epoch": 1.76, "learning_rate": 2.0668349720967313e-05, "loss": 0.5968, "step": 158940 }, { "epoch": 1.76, "learning_rate": 2.06674269938288e-05, "loss": 0.713, "step": 158945 }, { "epoch": 1.76, "learning_rate": 2.066650426669029e-05, "loss": 0.6351, "step": 158950 }, { "epoch": 1.76, "learning_rate": 2.0665581539551777e-05, "loss": 0.6087, "step": 158955 }, { "epoch": 1.76, "learning_rate": 2.0664658812413265e-05, "loss": 0.6288, "step": 158960 }, { "epoch": 1.76, "learning_rate": 2.0663736085274752e-05, "loss": 0.6238, "step": 158965 }, { "epoch": 1.76, "learning_rate": 2.066281335813624e-05, "loss": 0.5712, "step": 158970 }, { "epoch": 1.76, "learning_rate": 2.0661890630997728e-05, "loss": 0.6774, "step": 158975 }, { "epoch": 1.76, "learning_rate": 2.0660967903859216e-05, "loss": 0.631, "step": 158980 }, { "epoch": 1.76, "learning_rate": 2.06600451767207e-05, "loss": 0.6547, "step": 158985 }, { "epoch": 1.76, "learning_rate": 2.0659122449582192e-05, "loss": 0.6386, "step": 158990 }, { "epoch": 1.76, "learning_rate": 2.0658199722443676e-05, "loss": 0.6228, "step": 158995 }, { "epoch": 1.76, "learning_rate": 2.0657276995305167e-05, "loss": 0.6433, "step": 159000 }, { "epoch": 1.76, "eval_loss": 0.5989229679107666, "eval_runtime": 69.081, "eval_samples_per_second": 28.952, "eval_steps_per_second": 14.476, "step": 159000 }, { "epoch": 1.76, "learning_rate": 2.0656354268166652e-05, "loss": 0.6152, "step": 159005 }, { "epoch": 1.76, "learning_rate": 2.0655431541028143e-05, "loss": 0.6185, "step": 159010 }, { "epoch": 1.76, "learning_rate": 2.0654508813889628e-05, "loss": 0.6187, "step": 159015 }, { "epoch": 1.76, "learning_rate": 2.0653586086751115e-05, "loss": 0.6115, "step": 159020 }, { "epoch": 1.76, "learning_rate": 2.0652663359612603e-05, "loss": 0.5847, "step": 159025 }, { "epoch": 1.76, "learning_rate": 2.065174063247409e-05, "loss": 0.657, "step": 159030 }, { "epoch": 1.76, "learning_rate": 2.065081790533558e-05, "loss": 0.62, "step": 159035 }, { "epoch": 1.76, "learning_rate": 2.0649895178197063e-05, "loss": 0.6416, "step": 159040 }, { "epoch": 1.76, "learning_rate": 2.0648972451058555e-05, "loss": 0.5955, "step": 159045 }, { "epoch": 1.76, "learning_rate": 2.064804972392004e-05, "loss": 0.6412, "step": 159050 }, { "epoch": 1.76, "learning_rate": 2.064712699678153e-05, "loss": 0.631, "step": 159055 }, { "epoch": 1.76, "learning_rate": 2.0646204269643015e-05, "loss": 0.6068, "step": 159060 }, { "epoch": 1.76, "learning_rate": 2.0645281542504506e-05, "loss": 0.6138, "step": 159065 }, { "epoch": 1.76, "learning_rate": 2.064435881536599e-05, "loss": 0.6409, "step": 159070 }, { "epoch": 1.76, "learning_rate": 2.0643436088227482e-05, "loss": 0.6355, "step": 159075 }, { "epoch": 1.76, "learning_rate": 2.0642513361088966e-05, "loss": 0.604, "step": 159080 }, { "epoch": 1.76, "learning_rate": 2.0641590633950454e-05, "loss": 0.5772, "step": 159085 }, { "epoch": 1.76, "learning_rate": 2.0640667906811942e-05, "loss": 0.6476, "step": 159090 }, { "epoch": 1.76, "learning_rate": 2.063974517967343e-05, "loss": 0.657, "step": 159095 }, { "epoch": 1.76, "learning_rate": 2.0638822452534918e-05, "loss": 0.6242, "step": 159100 }, { "epoch": 1.76, "learning_rate": 2.0637899725396402e-05, "loss": 0.6051, "step": 159105 }, { "epoch": 1.76, "learning_rate": 2.0636976998257893e-05, "loss": 0.6171, "step": 159110 }, { "epoch": 1.76, "learning_rate": 2.0636054271119378e-05, "loss": 0.6086, "step": 159115 }, { "epoch": 1.76, "learning_rate": 2.063513154398087e-05, "loss": 0.6422, "step": 159120 }, { "epoch": 1.76, "learning_rate": 2.0634208816842353e-05, "loss": 0.6254, "step": 159125 }, { "epoch": 1.76, "learning_rate": 2.0633286089703845e-05, "loss": 0.6246, "step": 159130 }, { "epoch": 1.76, "learning_rate": 2.063236336256533e-05, "loss": 0.599, "step": 159135 }, { "epoch": 1.76, "learning_rate": 2.0631440635426817e-05, "loss": 0.6223, "step": 159140 }, { "epoch": 1.76, "learning_rate": 2.0630517908288305e-05, "loss": 0.6321, "step": 159145 }, { "epoch": 1.76, "learning_rate": 2.0629595181149793e-05, "loss": 0.6186, "step": 159150 }, { "epoch": 1.76, "learning_rate": 2.062867245401128e-05, "loss": 0.6059, "step": 159155 }, { "epoch": 1.76, "learning_rate": 2.062774972687277e-05, "loss": 0.6353, "step": 159160 }, { "epoch": 1.76, "learning_rate": 2.0626826999734256e-05, "loss": 0.6557, "step": 159165 }, { "epoch": 1.76, "learning_rate": 2.062590427259574e-05, "loss": 0.6086, "step": 159170 }, { "epoch": 1.76, "learning_rate": 2.0624981545457232e-05, "loss": 0.6029, "step": 159175 }, { "epoch": 1.76, "learning_rate": 2.0624058818318716e-05, "loss": 0.5847, "step": 159180 }, { "epoch": 1.76, "learning_rate": 2.0623136091180208e-05, "loss": 0.6006, "step": 159185 }, { "epoch": 1.76, "learning_rate": 2.0622213364041692e-05, "loss": 0.6415, "step": 159190 }, { "epoch": 1.76, "learning_rate": 2.062129063690318e-05, "loss": 0.5767, "step": 159195 }, { "epoch": 1.76, "learning_rate": 2.0620367909764668e-05, "loss": 0.5957, "step": 159200 }, { "epoch": 1.76, "learning_rate": 2.0619445182626156e-05, "loss": 0.5918, "step": 159205 }, { "epoch": 1.76, "learning_rate": 2.0618522455487644e-05, "loss": 0.6724, "step": 159210 }, { "epoch": 1.76, "learning_rate": 2.061759972834913e-05, "loss": 0.662, "step": 159215 }, { "epoch": 1.76, "learning_rate": 2.061667700121062e-05, "loss": 0.6021, "step": 159220 }, { "epoch": 1.76, "learning_rate": 2.0615754274072107e-05, "loss": 0.6565, "step": 159225 }, { "epoch": 1.76, "learning_rate": 2.0614831546933595e-05, "loss": 0.6292, "step": 159230 }, { "epoch": 1.76, "learning_rate": 2.0613908819795083e-05, "loss": 0.6199, "step": 159235 }, { "epoch": 1.76, "learning_rate": 2.061298609265657e-05, "loss": 0.6584, "step": 159240 }, { "epoch": 1.76, "learning_rate": 2.0612063365518055e-05, "loss": 0.5761, "step": 159245 }, { "epoch": 1.76, "learning_rate": 2.0611140638379546e-05, "loss": 0.5866, "step": 159250 }, { "epoch": 1.76, "learning_rate": 2.061021791124103e-05, "loss": 0.6577, "step": 159255 }, { "epoch": 1.76, "learning_rate": 2.060929518410252e-05, "loss": 0.6461, "step": 159260 }, { "epoch": 1.76, "learning_rate": 2.0608372456964006e-05, "loss": 0.6341, "step": 159265 }, { "epoch": 1.76, "learning_rate": 2.0607449729825494e-05, "loss": 0.604, "step": 159270 }, { "epoch": 1.76, "learning_rate": 2.0606527002686982e-05, "loss": 0.6023, "step": 159275 }, { "epoch": 1.76, "learning_rate": 2.060560427554847e-05, "loss": 0.5931, "step": 159280 }, { "epoch": 1.76, "learning_rate": 2.0604681548409958e-05, "loss": 0.5942, "step": 159285 }, { "epoch": 1.76, "learning_rate": 2.0603758821271446e-05, "loss": 0.6278, "step": 159290 }, { "epoch": 1.76, "learning_rate": 2.0602836094132934e-05, "loss": 0.6187, "step": 159295 }, { "epoch": 1.76, "learning_rate": 2.060191336699442e-05, "loss": 0.6141, "step": 159300 }, { "epoch": 1.76, "learning_rate": 2.060099063985591e-05, "loss": 0.6608, "step": 159305 }, { "epoch": 1.76, "learning_rate": 2.0600067912717397e-05, "loss": 0.6386, "step": 159310 }, { "epoch": 1.76, "learning_rate": 2.059914518557888e-05, "loss": 0.6227, "step": 159315 }, { "epoch": 1.76, "learning_rate": 2.059822245844037e-05, "loss": 0.6382, "step": 159320 }, { "epoch": 1.76, "learning_rate": 2.0597299731301857e-05, "loss": 0.6498, "step": 159325 }, { "epoch": 1.76, "learning_rate": 2.0596377004163345e-05, "loss": 0.6386, "step": 159330 }, { "epoch": 1.76, "learning_rate": 2.0595454277024833e-05, "loss": 0.6842, "step": 159335 }, { "epoch": 1.76, "learning_rate": 2.059453154988632e-05, "loss": 0.675, "step": 159340 }, { "epoch": 1.76, "learning_rate": 2.059360882274781e-05, "loss": 0.6548, "step": 159345 }, { "epoch": 1.76, "learning_rate": 2.0592686095609296e-05, "loss": 0.633, "step": 159350 }, { "epoch": 1.76, "learning_rate": 2.0591763368470784e-05, "loss": 0.5949, "step": 159355 }, { "epoch": 1.76, "learning_rate": 2.0590840641332272e-05, "loss": 0.65, "step": 159360 }, { "epoch": 1.76, "learning_rate": 2.058991791419376e-05, "loss": 0.6832, "step": 159365 }, { "epoch": 1.76, "learning_rate": 2.0588995187055244e-05, "loss": 0.6271, "step": 159370 }, { "epoch": 1.76, "learning_rate": 2.0588072459916736e-05, "loss": 0.6056, "step": 159375 }, { "epoch": 1.76, "learning_rate": 2.058714973277822e-05, "loss": 0.5923, "step": 159380 }, { "epoch": 1.76, "learning_rate": 2.058622700563971e-05, "loss": 0.5983, "step": 159385 }, { "epoch": 1.76, "learning_rate": 2.0585304278501196e-05, "loss": 0.647, "step": 159390 }, { "epoch": 1.76, "learning_rate": 2.0584381551362684e-05, "loss": 0.607, "step": 159395 }, { "epoch": 1.76, "learning_rate": 2.058345882422417e-05, "loss": 0.6414, "step": 159400 }, { "epoch": 1.77, "learning_rate": 2.058253609708566e-05, "loss": 0.6593, "step": 159405 }, { "epoch": 1.77, "learning_rate": 2.0581613369947147e-05, "loss": 0.5861, "step": 159410 }, { "epoch": 1.77, "learning_rate": 2.0580690642808635e-05, "loss": 0.6448, "step": 159415 }, { "epoch": 1.77, "learning_rate": 2.0579767915670123e-05, "loss": 0.6606, "step": 159420 }, { "epoch": 1.77, "learning_rate": 2.057884518853161e-05, "loss": 0.5811, "step": 159425 }, { "epoch": 1.77, "learning_rate": 2.05779224613931e-05, "loss": 0.6256, "step": 159430 }, { "epoch": 1.77, "learning_rate": 2.0576999734254583e-05, "loss": 0.6628, "step": 159435 }, { "epoch": 1.77, "learning_rate": 2.0576077007116074e-05, "loss": 0.6429, "step": 159440 }, { "epoch": 1.77, "learning_rate": 2.057515427997756e-05, "loss": 0.6539, "step": 159445 }, { "epoch": 1.77, "learning_rate": 2.057423155283905e-05, "loss": 0.6528, "step": 159450 }, { "epoch": 1.77, "learning_rate": 2.0573308825700535e-05, "loss": 0.6532, "step": 159455 }, { "epoch": 1.77, "learning_rate": 2.0572386098562026e-05, "loss": 0.6252, "step": 159460 }, { "epoch": 1.77, "learning_rate": 2.057146337142351e-05, "loss": 0.6343, "step": 159465 }, { "epoch": 1.77, "learning_rate": 2.0570540644284998e-05, "loss": 0.5705, "step": 159470 }, { "epoch": 1.77, "learning_rate": 2.0569617917146486e-05, "loss": 0.5622, "step": 159475 }, { "epoch": 1.77, "learning_rate": 2.0568695190007974e-05, "loss": 0.627, "step": 159480 }, { "epoch": 1.77, "learning_rate": 2.056777246286946e-05, "loss": 0.6236, "step": 159485 }, { "epoch": 1.77, "learning_rate": 2.0566849735730946e-05, "loss": 0.6055, "step": 159490 }, { "epoch": 1.77, "learning_rate": 2.0565927008592437e-05, "loss": 0.5944, "step": 159495 }, { "epoch": 1.77, "learning_rate": 2.0565004281453922e-05, "loss": 0.682, "step": 159500 }, { "epoch": 1.77, "learning_rate": 2.0564081554315413e-05, "loss": 0.6166, "step": 159505 }, { "epoch": 1.77, "learning_rate": 2.0563158827176897e-05, "loss": 0.6926, "step": 159510 }, { "epoch": 1.77, "learning_rate": 2.056223610003839e-05, "loss": 0.6166, "step": 159515 }, { "epoch": 1.77, "learning_rate": 2.0561313372899873e-05, "loss": 0.6372, "step": 159520 }, { "epoch": 1.77, "learning_rate": 2.056039064576136e-05, "loss": 0.5872, "step": 159525 }, { "epoch": 1.77, "learning_rate": 2.055946791862285e-05, "loss": 0.6636, "step": 159530 }, { "epoch": 1.77, "learning_rate": 2.0558545191484337e-05, "loss": 0.6543, "step": 159535 }, { "epoch": 1.77, "learning_rate": 2.0557622464345825e-05, "loss": 0.5692, "step": 159540 }, { "epoch": 1.77, "learning_rate": 2.055669973720731e-05, "loss": 0.6005, "step": 159545 }, { "epoch": 1.77, "learning_rate": 2.05557770100688e-05, "loss": 0.5652, "step": 159550 }, { "epoch": 1.77, "learning_rate": 2.0554854282930285e-05, "loss": 0.616, "step": 159555 }, { "epoch": 1.77, "learning_rate": 2.0553931555791776e-05, "loss": 0.6614, "step": 159560 }, { "epoch": 1.77, "learning_rate": 2.055300882865326e-05, "loss": 0.6414, "step": 159565 }, { "epoch": 1.77, "learning_rate": 2.055208610151475e-05, "loss": 0.6356, "step": 159570 }, { "epoch": 1.77, "learning_rate": 2.0551163374376236e-05, "loss": 0.6121, "step": 159575 }, { "epoch": 1.77, "learning_rate": 2.0550240647237727e-05, "loss": 0.6347, "step": 159580 }, { "epoch": 1.77, "learning_rate": 2.0549317920099212e-05, "loss": 0.6422, "step": 159585 }, { "epoch": 1.77, "learning_rate": 2.05483951929607e-05, "loss": 0.616, "step": 159590 }, { "epoch": 1.77, "learning_rate": 2.0547472465822188e-05, "loss": 0.629, "step": 159595 }, { "epoch": 1.77, "learning_rate": 2.0546549738683675e-05, "loss": 0.6206, "step": 159600 }, { "epoch": 1.77, "learning_rate": 2.0545627011545163e-05, "loss": 0.5588, "step": 159605 }, { "epoch": 1.77, "learning_rate": 2.054470428440665e-05, "loss": 0.6586, "step": 159610 }, { "epoch": 1.77, "learning_rate": 2.054378155726814e-05, "loss": 0.6422, "step": 159615 }, { "epoch": 1.77, "learning_rate": 2.0542858830129623e-05, "loss": 0.6052, "step": 159620 }, { "epoch": 1.77, "learning_rate": 2.0541936102991115e-05, "loss": 0.5964, "step": 159625 }, { "epoch": 1.77, "learning_rate": 2.05410133758526e-05, "loss": 0.662, "step": 159630 }, { "epoch": 1.77, "learning_rate": 2.054009064871409e-05, "loss": 0.5336, "step": 159635 }, { "epoch": 1.77, "learning_rate": 2.0539167921575575e-05, "loss": 0.6911, "step": 159640 }, { "epoch": 1.77, "learning_rate": 2.0538245194437063e-05, "loss": 0.6567, "step": 159645 }, { "epoch": 1.77, "learning_rate": 2.053732246729855e-05, "loss": 0.5842, "step": 159650 }, { "epoch": 1.77, "learning_rate": 2.053639974016004e-05, "loss": 0.6168, "step": 159655 }, { "epoch": 1.77, "learning_rate": 2.0535477013021526e-05, "loss": 0.6761, "step": 159660 }, { "epoch": 1.77, "learning_rate": 2.0534554285883014e-05, "loss": 0.6394, "step": 159665 }, { "epoch": 1.77, "learning_rate": 2.0533631558744502e-05, "loss": 0.5969, "step": 159670 }, { "epoch": 1.77, "learning_rate": 2.053270883160599e-05, "loss": 0.649, "step": 159675 }, { "epoch": 1.77, "learning_rate": 2.0531786104467478e-05, "loss": 0.6004, "step": 159680 }, { "epoch": 1.77, "learning_rate": 2.0530863377328965e-05, "loss": 0.6189, "step": 159685 }, { "epoch": 1.77, "learning_rate": 2.0529940650190453e-05, "loss": 0.6212, "step": 159690 }, { "epoch": 1.77, "learning_rate": 2.052901792305194e-05, "loss": 0.625, "step": 159695 }, { "epoch": 1.77, "learning_rate": 2.0528095195913426e-05, "loss": 0.6037, "step": 159700 }, { "epoch": 1.77, "learning_rate": 2.0527172468774913e-05, "loss": 0.6353, "step": 159705 }, { "epoch": 1.77, "learning_rate": 2.05262497416364e-05, "loss": 0.5656, "step": 159710 }, { "epoch": 1.77, "learning_rate": 2.052532701449789e-05, "loss": 0.6059, "step": 159715 }, { "epoch": 1.77, "learning_rate": 2.0524404287359377e-05, "loss": 0.5926, "step": 159720 }, { "epoch": 1.77, "learning_rate": 2.0523481560220865e-05, "loss": 0.6445, "step": 159725 }, { "epoch": 1.77, "learning_rate": 2.0522558833082353e-05, "loss": 0.6252, "step": 159730 }, { "epoch": 1.77, "learning_rate": 2.052163610594384e-05, "loss": 0.648, "step": 159735 }, { "epoch": 1.77, "learning_rate": 2.052071337880533e-05, "loss": 0.6626, "step": 159740 }, { "epoch": 1.77, "learning_rate": 2.0519790651666816e-05, "loss": 0.6102, "step": 159745 }, { "epoch": 1.77, "learning_rate": 2.0518867924528304e-05, "loss": 0.6525, "step": 159750 }, { "epoch": 1.77, "learning_rate": 2.051794519738979e-05, "loss": 0.5615, "step": 159755 }, { "epoch": 1.77, "learning_rate": 2.051702247025128e-05, "loss": 0.6374, "step": 159760 }, { "epoch": 1.77, "learning_rate": 2.0516099743112764e-05, "loss": 0.6768, "step": 159765 }, { "epoch": 1.77, "learning_rate": 2.0515177015974255e-05, "loss": 0.6059, "step": 159770 }, { "epoch": 1.77, "learning_rate": 2.051425428883574e-05, "loss": 0.6567, "step": 159775 }, { "epoch": 1.77, "learning_rate": 2.0513331561697228e-05, "loss": 0.591, "step": 159780 }, { "epoch": 1.77, "learning_rate": 2.0512408834558716e-05, "loss": 0.6291, "step": 159785 }, { "epoch": 1.77, "learning_rate": 2.0511486107420203e-05, "loss": 0.6513, "step": 159790 }, { "epoch": 1.77, "learning_rate": 2.051056338028169e-05, "loss": 0.6057, "step": 159795 }, { "epoch": 1.77, "learning_rate": 2.050964065314318e-05, "loss": 0.5989, "step": 159800 }, { "epoch": 1.77, "learning_rate": 2.0508717926004667e-05, "loss": 0.5705, "step": 159805 }, { "epoch": 1.77, "learning_rate": 2.0507795198866155e-05, "loss": 0.6479, "step": 159810 }, { "epoch": 1.77, "learning_rate": 2.0506872471727643e-05, "loss": 0.6108, "step": 159815 }, { "epoch": 1.77, "learning_rate": 2.0505949744589127e-05, "loss": 0.602, "step": 159820 }, { "epoch": 1.77, "learning_rate": 2.050502701745062e-05, "loss": 0.6003, "step": 159825 }, { "epoch": 1.77, "learning_rate": 2.0504104290312103e-05, "loss": 0.61, "step": 159830 }, { "epoch": 1.77, "learning_rate": 2.0503181563173594e-05, "loss": 0.6234, "step": 159835 }, { "epoch": 1.77, "learning_rate": 2.050225883603508e-05, "loss": 0.5916, "step": 159840 }, { "epoch": 1.77, "learning_rate": 2.050133610889657e-05, "loss": 0.6237, "step": 159845 }, { "epoch": 1.77, "learning_rate": 2.0500413381758054e-05, "loss": 0.6181, "step": 159850 }, { "epoch": 1.77, "learning_rate": 2.0499490654619542e-05, "loss": 0.6793, "step": 159855 }, { "epoch": 1.77, "learning_rate": 2.049856792748103e-05, "loss": 0.6113, "step": 159860 }, { "epoch": 1.77, "learning_rate": 2.0497645200342518e-05, "loss": 0.6514, "step": 159865 }, { "epoch": 1.77, "learning_rate": 2.0496722473204006e-05, "loss": 0.6072, "step": 159870 }, { "epoch": 1.77, "learning_rate": 2.049579974606549e-05, "loss": 0.6128, "step": 159875 }, { "epoch": 1.77, "learning_rate": 2.049487701892698e-05, "loss": 0.6335, "step": 159880 }, { "epoch": 1.77, "learning_rate": 2.0493954291788466e-05, "loss": 0.6556, "step": 159885 }, { "epoch": 1.77, "learning_rate": 2.0493031564649957e-05, "loss": 0.6986, "step": 159890 }, { "epoch": 1.77, "learning_rate": 2.049210883751144e-05, "loss": 0.5823, "step": 159895 }, { "epoch": 1.77, "learning_rate": 2.0491186110372933e-05, "loss": 0.6199, "step": 159900 }, { "epoch": 1.77, "learning_rate": 2.0490263383234417e-05, "loss": 0.6092, "step": 159905 }, { "epoch": 1.77, "learning_rate": 2.0489340656095905e-05, "loss": 0.5732, "step": 159910 }, { "epoch": 1.77, "learning_rate": 2.0488417928957393e-05, "loss": 0.5739, "step": 159915 }, { "epoch": 1.77, "learning_rate": 2.048749520181888e-05, "loss": 0.6161, "step": 159920 }, { "epoch": 1.77, "learning_rate": 2.048657247468037e-05, "loss": 0.6739, "step": 159925 }, { "epoch": 1.77, "learning_rate": 2.0485649747541853e-05, "loss": 0.619, "step": 159930 }, { "epoch": 1.77, "learning_rate": 2.0484727020403344e-05, "loss": 0.5715, "step": 159935 }, { "epoch": 1.77, "learning_rate": 2.048380429326483e-05, "loss": 0.5698, "step": 159940 }, { "epoch": 1.77, "learning_rate": 2.048288156612632e-05, "loss": 0.5613, "step": 159945 }, { "epoch": 1.77, "learning_rate": 2.0481958838987804e-05, "loss": 0.6334, "step": 159950 }, { "epoch": 1.77, "learning_rate": 2.0481036111849296e-05, "loss": 0.5737, "step": 159955 }, { "epoch": 1.77, "learning_rate": 2.048011338471078e-05, "loss": 0.6024, "step": 159960 }, { "epoch": 1.77, "learning_rate": 2.047919065757227e-05, "loss": 0.5917, "step": 159965 }, { "epoch": 1.77, "learning_rate": 2.0478267930433756e-05, "loss": 0.6359, "step": 159970 }, { "epoch": 1.77, "learning_rate": 2.0477345203295244e-05, "loss": 0.6252, "step": 159975 }, { "epoch": 1.77, "learning_rate": 2.047642247615673e-05, "loss": 0.6481, "step": 159980 }, { "epoch": 1.77, "learning_rate": 2.047549974901822e-05, "loss": 0.5984, "step": 159985 }, { "epoch": 1.77, "learning_rate": 2.0474577021879707e-05, "loss": 0.5953, "step": 159990 }, { "epoch": 1.77, "learning_rate": 2.0473654294741195e-05, "loss": 0.6515, "step": 159995 }, { "epoch": 1.77, "learning_rate": 2.0472731567602683e-05, "loss": 0.5574, "step": 160000 }, { "epoch": 1.77, "eval_loss": 0.5795533061027527, "eval_runtime": 69.112, "eval_samples_per_second": 28.939, "eval_steps_per_second": 14.469, "step": 160000 }, { "epoch": 1.77, "learning_rate": 2.0471808840464167e-05, "loss": 0.6036, "step": 160005 }, { "epoch": 1.77, "learning_rate": 2.047088611332566e-05, "loss": 0.6148, "step": 160010 }, { "epoch": 1.77, "learning_rate": 2.0469963386187143e-05, "loss": 0.6527, "step": 160015 }, { "epoch": 1.77, "learning_rate": 2.0469040659048634e-05, "loss": 0.5979, "step": 160020 }, { "epoch": 1.77, "learning_rate": 2.046811793191012e-05, "loss": 0.6211, "step": 160025 }, { "epoch": 1.77, "learning_rate": 2.0467195204771607e-05, "loss": 0.5972, "step": 160030 }, { "epoch": 1.77, "learning_rate": 2.0466272477633094e-05, "loss": 0.6296, "step": 160035 }, { "epoch": 1.77, "learning_rate": 2.0465349750494582e-05, "loss": 0.5967, "step": 160040 }, { "epoch": 1.77, "learning_rate": 2.046442702335607e-05, "loss": 0.6244, "step": 160045 }, { "epoch": 1.77, "learning_rate": 2.0463504296217558e-05, "loss": 0.648, "step": 160050 }, { "epoch": 1.77, "learning_rate": 2.0462581569079046e-05, "loss": 0.601, "step": 160055 }, { "epoch": 1.77, "learning_rate": 2.0461658841940534e-05, "loss": 0.6132, "step": 160060 }, { "epoch": 1.77, "learning_rate": 2.046073611480202e-05, "loss": 0.6028, "step": 160065 }, { "epoch": 1.77, "learning_rate": 2.045981338766351e-05, "loss": 0.6077, "step": 160070 }, { "epoch": 1.77, "learning_rate": 2.0458890660524997e-05, "loss": 0.6601, "step": 160075 }, { "epoch": 1.77, "learning_rate": 2.0457967933386482e-05, "loss": 0.6402, "step": 160080 }, { "epoch": 1.77, "learning_rate": 2.045704520624797e-05, "loss": 0.5339, "step": 160085 }, { "epoch": 1.77, "learning_rate": 2.0456122479109457e-05, "loss": 0.5958, "step": 160090 }, { "epoch": 1.77, "learning_rate": 2.0455199751970945e-05, "loss": 0.6431, "step": 160095 }, { "epoch": 1.77, "learning_rate": 2.0454277024832433e-05, "loss": 0.6379, "step": 160100 }, { "epoch": 1.77, "learning_rate": 2.045335429769392e-05, "loss": 0.627, "step": 160105 }, { "epoch": 1.77, "learning_rate": 2.045243157055541e-05, "loss": 0.6112, "step": 160110 }, { "epoch": 1.77, "learning_rate": 2.0451508843416897e-05, "loss": 0.6682, "step": 160115 }, { "epoch": 1.77, "learning_rate": 2.0450586116278385e-05, "loss": 0.6908, "step": 160120 }, { "epoch": 1.77, "learning_rate": 2.0449663389139872e-05, "loss": 0.6427, "step": 160125 }, { "epoch": 1.77, "learning_rate": 2.044874066200136e-05, "loss": 0.6106, "step": 160130 }, { "epoch": 1.77, "learning_rate": 2.0447817934862848e-05, "loss": 0.6268, "step": 160135 }, { "epoch": 1.77, "learning_rate": 2.0446895207724333e-05, "loss": 0.6825, "step": 160140 }, { "epoch": 1.77, "learning_rate": 2.0445972480585824e-05, "loss": 0.5814, "step": 160145 }, { "epoch": 1.77, "learning_rate": 2.0445049753447308e-05, "loss": 0.6107, "step": 160150 }, { "epoch": 1.77, "learning_rate": 2.0444127026308796e-05, "loss": 0.578, "step": 160155 }, { "epoch": 1.77, "learning_rate": 2.0443204299170284e-05, "loss": 0.5942, "step": 160160 }, { "epoch": 1.77, "learning_rate": 2.0442281572031772e-05, "loss": 0.5763, "step": 160165 }, { "epoch": 1.77, "learning_rate": 2.044135884489326e-05, "loss": 0.6329, "step": 160170 }, { "epoch": 1.77, "learning_rate": 2.0440436117754747e-05, "loss": 0.6263, "step": 160175 }, { "epoch": 1.77, "learning_rate": 2.0439513390616235e-05, "loss": 0.6123, "step": 160180 }, { "epoch": 1.77, "learning_rate": 2.0438590663477723e-05, "loss": 0.5578, "step": 160185 }, { "epoch": 1.77, "learning_rate": 2.043766793633921e-05, "loss": 0.6253, "step": 160190 }, { "epoch": 1.77, "learning_rate": 2.04367452092007e-05, "loss": 0.6077, "step": 160195 }, { "epoch": 1.77, "learning_rate": 2.0435822482062187e-05, "loss": 0.6188, "step": 160200 }, { "epoch": 1.77, "learning_rate": 2.043489975492367e-05, "loss": 0.6004, "step": 160205 }, { "epoch": 1.77, "learning_rate": 2.0433977027785162e-05, "loss": 0.5797, "step": 160210 }, { "epoch": 1.77, "learning_rate": 2.0433054300646647e-05, "loss": 0.6109, "step": 160215 }, { "epoch": 1.77, "learning_rate": 2.0432131573508138e-05, "loss": 0.5495, "step": 160220 }, { "epoch": 1.77, "learning_rate": 2.0431208846369623e-05, "loss": 0.6167, "step": 160225 }, { "epoch": 1.77, "learning_rate": 2.043028611923111e-05, "loss": 0.579, "step": 160230 }, { "epoch": 1.77, "learning_rate": 2.0429363392092598e-05, "loss": 0.5897, "step": 160235 }, { "epoch": 1.77, "learning_rate": 2.0428440664954086e-05, "loss": 0.6124, "step": 160240 }, { "epoch": 1.77, "learning_rate": 2.0427517937815574e-05, "loss": 0.6192, "step": 160245 }, { "epoch": 1.77, "learning_rate": 2.0426595210677062e-05, "loss": 0.6456, "step": 160250 }, { "epoch": 1.77, "learning_rate": 2.042567248353855e-05, "loss": 0.5807, "step": 160255 }, { "epoch": 1.77, "learning_rate": 2.0424749756400034e-05, "loss": 0.6745, "step": 160260 }, { "epoch": 1.77, "learning_rate": 2.0423827029261525e-05, "loss": 0.6094, "step": 160265 }, { "epoch": 1.77, "learning_rate": 2.042290430212301e-05, "loss": 0.6148, "step": 160270 }, { "epoch": 1.77, "learning_rate": 2.04219815749845e-05, "loss": 0.6658, "step": 160275 }, { "epoch": 1.77, "learning_rate": 2.0421058847845986e-05, "loss": 0.6212, "step": 160280 }, { "epoch": 1.77, "learning_rate": 2.0420136120707477e-05, "loss": 0.6384, "step": 160285 }, { "epoch": 1.77, "learning_rate": 2.041921339356896e-05, "loss": 0.6281, "step": 160290 }, { "epoch": 1.77, "learning_rate": 2.041829066643045e-05, "loss": 0.6383, "step": 160295 }, { "epoch": 1.77, "learning_rate": 2.0417367939291937e-05, "loss": 0.6087, "step": 160300 }, { "epoch": 1.78, "learning_rate": 2.0416445212153425e-05, "loss": 0.6, "step": 160305 }, { "epoch": 1.78, "learning_rate": 2.0415522485014913e-05, "loss": 0.6445, "step": 160310 }, { "epoch": 1.78, "learning_rate": 2.0414599757876397e-05, "loss": 0.6153, "step": 160315 }, { "epoch": 1.78, "learning_rate": 2.041367703073789e-05, "loss": 0.5848, "step": 160320 }, { "epoch": 1.78, "learning_rate": 2.0412754303599373e-05, "loss": 0.615, "step": 160325 }, { "epoch": 1.78, "learning_rate": 2.0411831576460864e-05, "loss": 0.6419, "step": 160330 }, { "epoch": 1.78, "learning_rate": 2.041090884932235e-05, "loss": 0.6149, "step": 160335 }, { "epoch": 1.78, "learning_rate": 2.040998612218384e-05, "loss": 0.6499, "step": 160340 }, { "epoch": 1.78, "learning_rate": 2.0409063395045324e-05, "loss": 0.6701, "step": 160345 }, { "epoch": 1.78, "learning_rate": 2.0408140667906815e-05, "loss": 0.6545, "step": 160350 }, { "epoch": 1.78, "learning_rate": 2.04072179407683e-05, "loss": 0.6666, "step": 160355 }, { "epoch": 1.78, "learning_rate": 2.0406295213629788e-05, "loss": 0.5728, "step": 160360 }, { "epoch": 1.78, "learning_rate": 2.0405372486491276e-05, "loss": 0.5816, "step": 160365 }, { "epoch": 1.78, "learning_rate": 2.0404449759352763e-05, "loss": 0.5647, "step": 160370 }, { "epoch": 1.78, "learning_rate": 2.040352703221425e-05, "loss": 0.622, "step": 160375 }, { "epoch": 1.78, "learning_rate": 2.0402604305075736e-05, "loss": 0.5999, "step": 160380 }, { "epoch": 1.78, "learning_rate": 2.0401681577937227e-05, "loss": 0.6292, "step": 160385 }, { "epoch": 1.78, "learning_rate": 2.040075885079871e-05, "loss": 0.582, "step": 160390 }, { "epoch": 1.78, "learning_rate": 2.0399836123660203e-05, "loss": 0.6414, "step": 160395 }, { "epoch": 1.78, "learning_rate": 2.0398913396521687e-05, "loss": 0.6582, "step": 160400 }, { "epoch": 1.78, "learning_rate": 2.039799066938318e-05, "loss": 0.6517, "step": 160405 }, { "epoch": 1.78, "learning_rate": 2.0397067942244663e-05, "loss": 0.6702, "step": 160410 }, { "epoch": 1.78, "learning_rate": 2.039614521510615e-05, "loss": 0.6344, "step": 160415 }, { "epoch": 1.78, "learning_rate": 2.039522248796764e-05, "loss": 0.63, "step": 160420 }, { "epoch": 1.78, "learning_rate": 2.0394299760829126e-05, "loss": 0.6658, "step": 160425 }, { "epoch": 1.78, "learning_rate": 2.0393377033690614e-05, "loss": 0.632, "step": 160430 }, { "epoch": 1.78, "learning_rate": 2.0392454306552102e-05, "loss": 0.6255, "step": 160435 }, { "epoch": 1.78, "learning_rate": 2.039153157941359e-05, "loss": 0.5856, "step": 160440 }, { "epoch": 1.78, "learning_rate": 2.0390608852275078e-05, "loss": 0.6171, "step": 160445 }, { "epoch": 1.78, "learning_rate": 2.0389686125136566e-05, "loss": 0.6482, "step": 160450 }, { "epoch": 1.78, "learning_rate": 2.038876339799805e-05, "loss": 0.6579, "step": 160455 }, { "epoch": 1.78, "learning_rate": 2.038784067085954e-05, "loss": 0.6383, "step": 160460 }, { "epoch": 1.78, "learning_rate": 2.0386917943721026e-05, "loss": 0.6409, "step": 160465 }, { "epoch": 1.78, "learning_rate": 2.0385995216582514e-05, "loss": 0.6613, "step": 160470 }, { "epoch": 1.78, "learning_rate": 2.0385072489444e-05, "loss": 0.6283, "step": 160475 }, { "epoch": 1.78, "learning_rate": 2.038414976230549e-05, "loss": 0.6072, "step": 160480 }, { "epoch": 1.78, "learning_rate": 2.0383227035166977e-05, "loss": 0.6282, "step": 160485 }, { "epoch": 1.78, "learning_rate": 2.0382304308028465e-05, "loss": 0.6166, "step": 160490 }, { "epoch": 1.78, "learning_rate": 2.0381381580889953e-05, "loss": 0.6306, "step": 160495 }, { "epoch": 1.78, "learning_rate": 2.038045885375144e-05, "loss": 0.6271, "step": 160500 }, { "epoch": 1.78, "learning_rate": 2.037953612661293e-05, "loss": 0.633, "step": 160505 }, { "epoch": 1.78, "learning_rate": 2.0378613399474416e-05, "loss": 0.6393, "step": 160510 }, { "epoch": 1.78, "learning_rate": 2.0377690672335904e-05, "loss": 0.6292, "step": 160515 }, { "epoch": 1.78, "learning_rate": 2.0376767945197392e-05, "loss": 0.6208, "step": 160520 }, { "epoch": 1.78, "learning_rate": 2.037584521805888e-05, "loss": 0.6266, "step": 160525 }, { "epoch": 1.78, "learning_rate": 2.0374922490920368e-05, "loss": 0.5908, "step": 160530 }, { "epoch": 1.78, "learning_rate": 2.0373999763781852e-05, "loss": 0.5664, "step": 160535 }, { "epoch": 1.78, "learning_rate": 2.037307703664334e-05, "loss": 0.5787, "step": 160540 }, { "epoch": 1.78, "learning_rate": 2.0372154309504828e-05, "loss": 0.632, "step": 160545 }, { "epoch": 1.78, "learning_rate": 2.0371231582366316e-05, "loss": 0.6458, "step": 160550 }, { "epoch": 1.78, "learning_rate": 2.0370308855227804e-05, "loss": 0.6601, "step": 160555 }, { "epoch": 1.78, "learning_rate": 2.036938612808929e-05, "loss": 0.6599, "step": 160560 }, { "epoch": 1.78, "learning_rate": 2.036846340095078e-05, "loss": 0.6689, "step": 160565 }, { "epoch": 1.78, "learning_rate": 2.0367540673812267e-05, "loss": 0.6059, "step": 160570 }, { "epoch": 1.78, "learning_rate": 2.0366617946673755e-05, "loss": 0.6472, "step": 160575 }, { "epoch": 1.78, "learning_rate": 2.0365695219535243e-05, "loss": 0.6525, "step": 160580 }, { "epoch": 1.78, "learning_rate": 2.036477249239673e-05, "loss": 0.6626, "step": 160585 }, { "epoch": 1.78, "learning_rate": 2.0363849765258215e-05, "loss": 0.6381, "step": 160590 }, { "epoch": 1.78, "learning_rate": 2.0362927038119706e-05, "loss": 0.6048, "step": 160595 }, { "epoch": 1.78, "learning_rate": 2.036200431098119e-05, "loss": 0.6506, "step": 160600 }, { "epoch": 1.78, "learning_rate": 2.0361081583842682e-05, "loss": 0.623, "step": 160605 }, { "epoch": 1.78, "learning_rate": 2.0360158856704167e-05, "loss": 0.6299, "step": 160610 }, { "epoch": 1.78, "learning_rate": 2.0359236129565654e-05, "loss": 0.6031, "step": 160615 }, { "epoch": 1.78, "learning_rate": 2.0358313402427142e-05, "loss": 0.6645, "step": 160620 }, { "epoch": 1.78, "learning_rate": 2.035739067528863e-05, "loss": 0.6166, "step": 160625 }, { "epoch": 1.78, "learning_rate": 2.0356467948150118e-05, "loss": 0.6084, "step": 160630 }, { "epoch": 1.78, "learning_rate": 2.0355545221011606e-05, "loss": 0.6448, "step": 160635 }, { "epoch": 1.78, "learning_rate": 2.0354622493873094e-05, "loss": 0.5948, "step": 160640 }, { "epoch": 1.78, "learning_rate": 2.0353699766734578e-05, "loss": 0.5973, "step": 160645 }, { "epoch": 1.78, "learning_rate": 2.035277703959607e-05, "loss": 0.6585, "step": 160650 }, { "epoch": 1.78, "learning_rate": 2.0351854312457554e-05, "loss": 0.5757, "step": 160655 }, { "epoch": 1.78, "learning_rate": 2.0350931585319045e-05, "loss": 0.6269, "step": 160660 }, { "epoch": 1.78, "learning_rate": 2.035000885818053e-05, "loss": 0.5751, "step": 160665 }, { "epoch": 1.78, "learning_rate": 2.034908613104202e-05, "loss": 0.6359, "step": 160670 }, { "epoch": 1.78, "learning_rate": 2.0348163403903505e-05, "loss": 0.6318, "step": 160675 }, { "epoch": 1.78, "learning_rate": 2.0347240676764996e-05, "loss": 0.6114, "step": 160680 }, { "epoch": 1.78, "learning_rate": 2.034631794962648e-05, "loss": 0.6312, "step": 160685 }, { "epoch": 1.78, "learning_rate": 2.034539522248797e-05, "loss": 0.5736, "step": 160690 }, { "epoch": 1.78, "learning_rate": 2.0344472495349457e-05, "loss": 0.6466, "step": 160695 }, { "epoch": 1.78, "learning_rate": 2.034354976821094e-05, "loss": 0.613, "step": 160700 }, { "epoch": 1.78, "learning_rate": 2.0342627041072432e-05, "loss": 0.6177, "step": 160705 }, { "epoch": 1.78, "learning_rate": 2.0341704313933917e-05, "loss": 0.5903, "step": 160710 }, { "epoch": 1.78, "learning_rate": 2.0340781586795408e-05, "loss": 0.5784, "step": 160715 }, { "epoch": 1.78, "learning_rate": 2.0339858859656892e-05, "loss": 0.5967, "step": 160720 }, { "epoch": 1.78, "learning_rate": 2.0338936132518384e-05, "loss": 0.6139, "step": 160725 }, { "epoch": 1.78, "learning_rate": 2.0338013405379868e-05, "loss": 0.6068, "step": 160730 }, { "epoch": 1.78, "learning_rate": 2.033709067824136e-05, "loss": 0.6021, "step": 160735 }, { "epoch": 1.78, "learning_rate": 2.0336167951102844e-05, "loss": 0.6441, "step": 160740 }, { "epoch": 1.78, "learning_rate": 2.0335245223964332e-05, "loss": 0.6061, "step": 160745 }, { "epoch": 1.78, "learning_rate": 2.033432249682582e-05, "loss": 0.6237, "step": 160750 }, { "epoch": 1.78, "learning_rate": 2.0333399769687307e-05, "loss": 0.6069, "step": 160755 }, { "epoch": 1.78, "learning_rate": 2.0332477042548795e-05, "loss": 0.5947, "step": 160760 }, { "epoch": 1.78, "learning_rate": 2.033155431541028e-05, "loss": 0.6001, "step": 160765 }, { "epoch": 1.78, "learning_rate": 2.033063158827177e-05, "loss": 0.584, "step": 160770 }, { "epoch": 1.78, "learning_rate": 2.0329708861133255e-05, "loss": 0.6167, "step": 160775 }, { "epoch": 1.78, "learning_rate": 2.0328786133994747e-05, "loss": 0.6076, "step": 160780 }, { "epoch": 1.78, "learning_rate": 2.032786340685623e-05, "loss": 0.6535, "step": 160785 }, { "epoch": 1.78, "learning_rate": 2.0326940679717722e-05, "loss": 0.6172, "step": 160790 }, { "epoch": 1.78, "learning_rate": 2.0326017952579207e-05, "loss": 0.6529, "step": 160795 }, { "epoch": 1.78, "learning_rate": 2.0325095225440695e-05, "loss": 0.6251, "step": 160800 }, { "epoch": 1.78, "learning_rate": 2.0324172498302183e-05, "loss": 0.6228, "step": 160805 }, { "epoch": 1.78, "learning_rate": 2.032324977116367e-05, "loss": 0.6158, "step": 160810 }, { "epoch": 1.78, "learning_rate": 2.0322327044025158e-05, "loss": 0.5933, "step": 160815 }, { "epoch": 1.78, "learning_rate": 2.0321404316886646e-05, "loss": 0.6161, "step": 160820 }, { "epoch": 1.78, "learning_rate": 2.0320481589748134e-05, "loss": 0.5927, "step": 160825 }, { "epoch": 1.78, "learning_rate": 2.0319558862609622e-05, "loss": 0.6297, "step": 160830 }, { "epoch": 1.78, "learning_rate": 2.031863613547111e-05, "loss": 0.6485, "step": 160835 }, { "epoch": 1.78, "learning_rate": 2.0317713408332594e-05, "loss": 0.6101, "step": 160840 }, { "epoch": 1.78, "learning_rate": 2.0316790681194085e-05, "loss": 0.6084, "step": 160845 }, { "epoch": 1.78, "learning_rate": 2.031586795405557e-05, "loss": 0.6172, "step": 160850 }, { "epoch": 1.78, "learning_rate": 2.0314945226917058e-05, "loss": 0.5948, "step": 160855 }, { "epoch": 1.78, "learning_rate": 2.0314022499778545e-05, "loss": 0.6166, "step": 160860 }, { "epoch": 1.78, "learning_rate": 2.0313099772640033e-05, "loss": 0.6572, "step": 160865 }, { "epoch": 1.78, "learning_rate": 2.031217704550152e-05, "loss": 0.582, "step": 160870 }, { "epoch": 1.78, "learning_rate": 2.031125431836301e-05, "loss": 0.5766, "step": 160875 }, { "epoch": 1.78, "learning_rate": 2.0310331591224497e-05, "loss": 0.6095, "step": 160880 }, { "epoch": 1.78, "learning_rate": 2.0309408864085985e-05, "loss": 0.572, "step": 160885 }, { "epoch": 1.78, "learning_rate": 2.0308486136947473e-05, "loss": 0.6265, "step": 160890 }, { "epoch": 1.78, "learning_rate": 2.030756340980896e-05, "loss": 0.6223, "step": 160895 }, { "epoch": 1.78, "learning_rate": 2.0306640682670448e-05, "loss": 0.6099, "step": 160900 }, { "epoch": 1.78, "learning_rate": 2.0305717955531936e-05, "loss": 0.611, "step": 160905 }, { "epoch": 1.78, "learning_rate": 2.0304795228393424e-05, "loss": 0.6414, "step": 160910 }, { "epoch": 1.78, "learning_rate": 2.030387250125491e-05, "loss": 0.6312, "step": 160915 }, { "epoch": 1.78, "learning_rate": 2.0302949774116396e-05, "loss": 0.6066, "step": 160920 }, { "epoch": 1.78, "learning_rate": 2.0302027046977884e-05, "loss": 0.6139, "step": 160925 }, { "epoch": 1.78, "learning_rate": 2.0301104319839372e-05, "loss": 0.6537, "step": 160930 }, { "epoch": 1.78, "learning_rate": 2.030018159270086e-05, "loss": 0.5805, "step": 160935 }, { "epoch": 1.78, "learning_rate": 2.0299258865562348e-05, "loss": 0.5583, "step": 160940 }, { "epoch": 1.78, "learning_rate": 2.0298336138423836e-05, "loss": 0.6293, "step": 160945 }, { "epoch": 1.78, "learning_rate": 2.0297413411285323e-05, "loss": 0.6131, "step": 160950 }, { "epoch": 1.78, "learning_rate": 2.029649068414681e-05, "loss": 0.6612, "step": 160955 }, { "epoch": 1.78, "learning_rate": 2.02955679570083e-05, "loss": 0.6034, "step": 160960 }, { "epoch": 1.78, "learning_rate": 2.0294645229869787e-05, "loss": 0.5783, "step": 160965 }, { "epoch": 1.78, "learning_rate": 2.0293722502731275e-05, "loss": 0.6224, "step": 160970 }, { "epoch": 1.78, "learning_rate": 2.029279977559276e-05, "loss": 0.5967, "step": 160975 }, { "epoch": 1.78, "learning_rate": 2.029187704845425e-05, "loss": 0.6121, "step": 160980 }, { "epoch": 1.78, "learning_rate": 2.0290954321315735e-05, "loss": 0.6273, "step": 160985 }, { "epoch": 1.78, "learning_rate": 2.0290031594177223e-05, "loss": 0.6788, "step": 160990 }, { "epoch": 1.78, "learning_rate": 2.028910886703871e-05, "loss": 0.5842, "step": 160995 }, { "epoch": 1.78, "learning_rate": 2.02881861399002e-05, "loss": 0.5983, "step": 161000 }, { "epoch": 1.78, "eval_loss": 0.6036489009857178, "eval_runtime": 69.1289, "eval_samples_per_second": 28.931, "eval_steps_per_second": 14.466, "step": 161000 }, { "epoch": 1.78, "learning_rate": 2.0287263412761686e-05, "loss": 0.5727, "step": 161005 }, { "epoch": 1.78, "learning_rate": 2.0286340685623174e-05, "loss": 0.6968, "step": 161010 }, { "epoch": 1.78, "learning_rate": 2.0285417958484662e-05, "loss": 0.6508, "step": 161015 }, { "epoch": 1.78, "learning_rate": 2.028449523134615e-05, "loss": 0.6365, "step": 161020 }, { "epoch": 1.78, "learning_rate": 2.0283572504207638e-05, "loss": 0.5983, "step": 161025 }, { "epoch": 1.78, "learning_rate": 2.0282649777069122e-05, "loss": 0.6388, "step": 161030 }, { "epoch": 1.78, "learning_rate": 2.0281727049930613e-05, "loss": 0.6368, "step": 161035 }, { "epoch": 1.78, "learning_rate": 2.0280804322792098e-05, "loss": 0.6311, "step": 161040 }, { "epoch": 1.78, "learning_rate": 2.027988159565359e-05, "loss": 0.5942, "step": 161045 }, { "epoch": 1.78, "learning_rate": 2.0278958868515074e-05, "loss": 0.6408, "step": 161050 }, { "epoch": 1.78, "learning_rate": 2.0278036141376565e-05, "loss": 0.5692, "step": 161055 }, { "epoch": 1.78, "learning_rate": 2.027711341423805e-05, "loss": 0.6268, "step": 161060 }, { "epoch": 1.78, "learning_rate": 2.0276190687099537e-05, "loss": 0.5873, "step": 161065 }, { "epoch": 1.78, "learning_rate": 2.0275267959961025e-05, "loss": 0.5899, "step": 161070 }, { "epoch": 1.78, "learning_rate": 2.0274345232822513e-05, "loss": 0.6298, "step": 161075 }, { "epoch": 1.78, "learning_rate": 2.0273422505684e-05, "loss": 0.6469, "step": 161080 }, { "epoch": 1.78, "learning_rate": 2.0272499778545485e-05, "loss": 0.6077, "step": 161085 }, { "epoch": 1.78, "learning_rate": 2.0271577051406976e-05, "loss": 0.6537, "step": 161090 }, { "epoch": 1.78, "learning_rate": 2.027065432426846e-05, "loss": 0.63, "step": 161095 }, { "epoch": 1.78, "learning_rate": 2.0269731597129952e-05, "loss": 0.5914, "step": 161100 }, { "epoch": 1.78, "learning_rate": 2.0268808869991437e-05, "loss": 0.5862, "step": 161105 }, { "epoch": 1.78, "learning_rate": 2.0267886142852928e-05, "loss": 0.615, "step": 161110 }, { "epoch": 1.78, "learning_rate": 2.0266963415714412e-05, "loss": 0.641, "step": 161115 }, { "epoch": 1.78, "learning_rate": 2.0266040688575903e-05, "loss": 0.6053, "step": 161120 }, { "epoch": 1.78, "learning_rate": 2.0265117961437388e-05, "loss": 0.5999, "step": 161125 }, { "epoch": 1.78, "learning_rate": 2.0264195234298876e-05, "loss": 0.6126, "step": 161130 }, { "epoch": 1.78, "learning_rate": 2.0263272507160364e-05, "loss": 0.6121, "step": 161135 }, { "epoch": 1.78, "learning_rate": 2.026234978002185e-05, "loss": 0.6563, "step": 161140 }, { "epoch": 1.78, "learning_rate": 2.026142705288334e-05, "loss": 0.6242, "step": 161145 }, { "epoch": 1.78, "learning_rate": 2.0260504325744824e-05, "loss": 0.6708, "step": 161150 }, { "epoch": 1.78, "learning_rate": 2.0259581598606315e-05, "loss": 0.6238, "step": 161155 }, { "epoch": 1.78, "learning_rate": 2.02586588714678e-05, "loss": 0.6389, "step": 161160 }, { "epoch": 1.78, "learning_rate": 2.025773614432929e-05, "loss": 0.6017, "step": 161165 }, { "epoch": 1.78, "learning_rate": 2.0256813417190775e-05, "loss": 0.6156, "step": 161170 }, { "epoch": 1.78, "learning_rate": 2.0255890690052266e-05, "loss": 0.5949, "step": 161175 }, { "epoch": 1.78, "learning_rate": 2.025496796291375e-05, "loss": 0.622, "step": 161180 }, { "epoch": 1.78, "learning_rate": 2.025404523577524e-05, "loss": 0.6402, "step": 161185 }, { "epoch": 1.78, "learning_rate": 2.0253122508636727e-05, "loss": 0.5848, "step": 161190 }, { "epoch": 1.78, "learning_rate": 2.0252199781498214e-05, "loss": 0.6613, "step": 161195 }, { "epoch": 1.78, "learning_rate": 2.0251277054359702e-05, "loss": 0.5606, "step": 161200 }, { "epoch": 1.78, "learning_rate": 2.025035432722119e-05, "loss": 0.5605, "step": 161205 }, { "epoch": 1.79, "learning_rate": 2.0249431600082678e-05, "loss": 0.6191, "step": 161210 }, { "epoch": 1.79, "learning_rate": 2.0248508872944162e-05, "loss": 0.6121, "step": 161215 }, { "epoch": 1.79, "learning_rate": 2.0247586145805654e-05, "loss": 0.5953, "step": 161220 }, { "epoch": 1.79, "learning_rate": 2.0246663418667138e-05, "loss": 0.6015, "step": 161225 }, { "epoch": 1.79, "learning_rate": 2.024574069152863e-05, "loss": 0.5942, "step": 161230 }, { "epoch": 1.79, "learning_rate": 2.0244817964390114e-05, "loss": 0.6112, "step": 161235 }, { "epoch": 1.79, "learning_rate": 2.02438952372516e-05, "loss": 0.6198, "step": 161240 }, { "epoch": 1.79, "learning_rate": 2.024297251011309e-05, "loss": 0.5659, "step": 161245 }, { "epoch": 1.79, "learning_rate": 2.0242049782974577e-05, "loss": 0.5774, "step": 161250 }, { "epoch": 1.79, "learning_rate": 2.0241127055836065e-05, "loss": 0.6501, "step": 161255 }, { "epoch": 1.79, "learning_rate": 2.0240204328697553e-05, "loss": 0.5925, "step": 161260 }, { "epoch": 1.79, "learning_rate": 2.023928160155904e-05, "loss": 0.6448, "step": 161265 }, { "epoch": 1.79, "learning_rate": 2.023835887442053e-05, "loss": 0.6224, "step": 161270 }, { "epoch": 1.79, "learning_rate": 2.0237436147282017e-05, "loss": 0.6063, "step": 161275 }, { "epoch": 1.79, "learning_rate": 2.0236513420143504e-05, "loss": 0.6438, "step": 161280 }, { "epoch": 1.79, "learning_rate": 2.0235590693004992e-05, "loss": 0.5925, "step": 161285 }, { "epoch": 1.79, "learning_rate": 2.0234667965866477e-05, "loss": 0.6101, "step": 161290 }, { "epoch": 1.79, "learning_rate": 2.0233745238727968e-05, "loss": 0.5815, "step": 161295 }, { "epoch": 1.79, "learning_rate": 2.0232822511589452e-05, "loss": 0.6419, "step": 161300 }, { "epoch": 1.79, "learning_rate": 2.023189978445094e-05, "loss": 0.6082, "step": 161305 }, { "epoch": 1.79, "learning_rate": 2.0230977057312428e-05, "loss": 0.6388, "step": 161310 }, { "epoch": 1.79, "learning_rate": 2.0230054330173916e-05, "loss": 0.6227, "step": 161315 }, { "epoch": 1.79, "learning_rate": 2.0229131603035404e-05, "loss": 0.6635, "step": 161320 }, { "epoch": 1.79, "learning_rate": 2.022820887589689e-05, "loss": 0.6111, "step": 161325 }, { "epoch": 1.79, "learning_rate": 2.022728614875838e-05, "loss": 0.6354, "step": 161330 }, { "epoch": 1.79, "learning_rate": 2.0226363421619867e-05, "loss": 0.6011, "step": 161335 }, { "epoch": 1.79, "learning_rate": 2.0225440694481355e-05, "loss": 0.6372, "step": 161340 }, { "epoch": 1.79, "learning_rate": 2.0224517967342843e-05, "loss": 0.5937, "step": 161345 }, { "epoch": 1.79, "learning_rate": 2.022359524020433e-05, "loss": 0.6684, "step": 161350 }, { "epoch": 1.79, "learning_rate": 2.022267251306582e-05, "loss": 0.639, "step": 161355 }, { "epoch": 1.79, "learning_rate": 2.0221749785927303e-05, "loss": 0.6096, "step": 161360 }, { "epoch": 1.79, "learning_rate": 2.0220827058788794e-05, "loss": 0.6448, "step": 161365 }, { "epoch": 1.79, "learning_rate": 2.021990433165028e-05, "loss": 0.6036, "step": 161370 }, { "epoch": 1.79, "learning_rate": 2.0218981604511767e-05, "loss": 0.6057, "step": 161375 }, { "epoch": 1.79, "learning_rate": 2.0218058877373255e-05, "loss": 0.6117, "step": 161380 }, { "epoch": 1.79, "learning_rate": 2.0217136150234742e-05, "loss": 0.6414, "step": 161385 }, { "epoch": 1.79, "learning_rate": 2.021621342309623e-05, "loss": 0.6369, "step": 161390 }, { "epoch": 1.79, "learning_rate": 2.0215290695957718e-05, "loss": 0.5632, "step": 161395 }, { "epoch": 1.79, "learning_rate": 2.0214367968819206e-05, "loss": 0.5992, "step": 161400 }, { "epoch": 1.79, "learning_rate": 2.0213445241680694e-05, "loss": 0.6472, "step": 161405 }, { "epoch": 1.79, "learning_rate": 2.0212522514542182e-05, "loss": 0.6252, "step": 161410 }, { "epoch": 1.79, "learning_rate": 2.0211599787403666e-05, "loss": 0.6537, "step": 161415 }, { "epoch": 1.79, "learning_rate": 2.0210677060265157e-05, "loss": 0.6643, "step": 161420 }, { "epoch": 1.79, "learning_rate": 2.0209754333126642e-05, "loss": 0.5558, "step": 161425 }, { "epoch": 1.79, "learning_rate": 2.0208831605988133e-05, "loss": 0.6208, "step": 161430 }, { "epoch": 1.79, "learning_rate": 2.0207908878849618e-05, "loss": 0.6213, "step": 161435 }, { "epoch": 1.79, "learning_rate": 2.020698615171111e-05, "loss": 0.6123, "step": 161440 }, { "epoch": 1.79, "learning_rate": 2.0206063424572593e-05, "loss": 0.6377, "step": 161445 }, { "epoch": 1.79, "learning_rate": 2.020514069743408e-05, "loss": 0.5994, "step": 161450 }, { "epoch": 1.79, "learning_rate": 2.020421797029557e-05, "loss": 0.6688, "step": 161455 }, { "epoch": 1.79, "learning_rate": 2.0203295243157057e-05, "loss": 0.5992, "step": 161460 }, { "epoch": 1.79, "learning_rate": 2.0202372516018545e-05, "loss": 0.6178, "step": 161465 }, { "epoch": 1.79, "learning_rate": 2.020144978888003e-05, "loss": 0.6264, "step": 161470 }, { "epoch": 1.79, "learning_rate": 2.020052706174152e-05, "loss": 0.6586, "step": 161475 }, { "epoch": 1.79, "learning_rate": 2.0199604334603005e-05, "loss": 0.6033, "step": 161480 }, { "epoch": 1.79, "learning_rate": 2.0198681607464496e-05, "loss": 0.6128, "step": 161485 }, { "epoch": 1.79, "learning_rate": 2.019775888032598e-05, "loss": 0.6265, "step": 161490 }, { "epoch": 1.79, "learning_rate": 2.0196836153187472e-05, "loss": 0.6223, "step": 161495 }, { "epoch": 1.79, "learning_rate": 2.0195913426048956e-05, "loss": 0.6549, "step": 161500 }, { "epoch": 1.79, "learning_rate": 2.0194990698910447e-05, "loss": 0.6312, "step": 161505 }, { "epoch": 1.79, "learning_rate": 2.0194067971771932e-05, "loss": 0.6512, "step": 161510 }, { "epoch": 1.79, "learning_rate": 2.019314524463342e-05, "loss": 0.6289, "step": 161515 }, { "epoch": 1.79, "learning_rate": 2.0192222517494908e-05, "loss": 0.6474, "step": 161520 }, { "epoch": 1.79, "learning_rate": 2.0191299790356395e-05, "loss": 0.6237, "step": 161525 }, { "epoch": 1.79, "learning_rate": 2.0190377063217883e-05, "loss": 0.6772, "step": 161530 }, { "epoch": 1.79, "learning_rate": 2.0189454336079368e-05, "loss": 0.6121, "step": 161535 }, { "epoch": 1.79, "learning_rate": 2.018853160894086e-05, "loss": 0.5973, "step": 161540 }, { "epoch": 1.79, "learning_rate": 2.0187608881802343e-05, "loss": 0.6064, "step": 161545 }, { "epoch": 1.79, "learning_rate": 2.0186686154663835e-05, "loss": 0.5838, "step": 161550 }, { "epoch": 1.79, "learning_rate": 2.018576342752532e-05, "loss": 0.6356, "step": 161555 }, { "epoch": 1.79, "learning_rate": 2.018484070038681e-05, "loss": 0.6245, "step": 161560 }, { "epoch": 1.79, "learning_rate": 2.0183917973248295e-05, "loss": 0.6605, "step": 161565 }, { "epoch": 1.79, "learning_rate": 2.0182995246109783e-05, "loss": 0.6372, "step": 161570 }, { "epoch": 1.79, "learning_rate": 2.018207251897127e-05, "loss": 0.619, "step": 161575 }, { "epoch": 1.79, "learning_rate": 2.018114979183276e-05, "loss": 0.6619, "step": 161580 }, { "epoch": 1.79, "learning_rate": 2.0180227064694246e-05, "loss": 0.6072, "step": 161585 }, { "epoch": 1.79, "learning_rate": 2.0179304337555734e-05, "loss": 0.5908, "step": 161590 }, { "epoch": 1.79, "learning_rate": 2.0178381610417222e-05, "loss": 0.6805, "step": 161595 }, { "epoch": 1.79, "learning_rate": 2.0177458883278706e-05, "loss": 0.674, "step": 161600 }, { "epoch": 1.79, "learning_rate": 2.0176536156140198e-05, "loss": 0.6117, "step": 161605 }, { "epoch": 1.79, "learning_rate": 2.0175613429001682e-05, "loss": 0.5832, "step": 161610 }, { "epoch": 1.79, "learning_rate": 2.0174690701863173e-05, "loss": 0.6392, "step": 161615 }, { "epoch": 1.79, "learning_rate": 2.0173767974724658e-05, "loss": 0.6754, "step": 161620 }, { "epoch": 1.79, "learning_rate": 2.0172845247586146e-05, "loss": 0.6574, "step": 161625 }, { "epoch": 1.79, "learning_rate": 2.0171922520447634e-05, "loss": 0.6328, "step": 161630 }, { "epoch": 1.79, "learning_rate": 2.017099979330912e-05, "loss": 0.5974, "step": 161635 }, { "epoch": 1.79, "learning_rate": 2.017007706617061e-05, "loss": 0.6075, "step": 161640 }, { "epoch": 1.79, "learning_rate": 2.0169154339032097e-05, "loss": 0.6039, "step": 161645 }, { "epoch": 1.79, "learning_rate": 2.0168231611893585e-05, "loss": 0.6517, "step": 161650 }, { "epoch": 1.79, "learning_rate": 2.0167308884755073e-05, "loss": 0.6557, "step": 161655 }, { "epoch": 1.79, "learning_rate": 2.016638615761656e-05, "loss": 0.6141, "step": 161660 }, { "epoch": 1.79, "learning_rate": 2.016546343047805e-05, "loss": 0.588, "step": 161665 }, { "epoch": 1.79, "learning_rate": 2.0164540703339536e-05, "loss": 0.5986, "step": 161670 }, { "epoch": 1.79, "learning_rate": 2.016361797620102e-05, "loss": 0.621, "step": 161675 }, { "epoch": 1.79, "learning_rate": 2.0162695249062512e-05, "loss": 0.6406, "step": 161680 }, { "epoch": 1.79, "learning_rate": 2.0161772521923996e-05, "loss": 0.6004, "step": 161685 }, { "epoch": 1.79, "learning_rate": 2.0160849794785484e-05, "loss": 0.632, "step": 161690 }, { "epoch": 1.79, "learning_rate": 2.0159927067646972e-05, "loss": 0.6496, "step": 161695 }, { "epoch": 1.79, "learning_rate": 2.015900434050846e-05, "loss": 0.6366, "step": 161700 }, { "epoch": 1.79, "learning_rate": 2.0158081613369948e-05, "loss": 0.592, "step": 161705 }, { "epoch": 1.79, "learning_rate": 2.0157158886231436e-05, "loss": 0.6141, "step": 161710 }, { "epoch": 1.79, "learning_rate": 2.0156236159092924e-05, "loss": 0.6463, "step": 161715 }, { "epoch": 1.79, "learning_rate": 2.015531343195441e-05, "loss": 0.6245, "step": 161720 }, { "epoch": 1.79, "learning_rate": 2.01543907048159e-05, "loss": 0.6307, "step": 161725 }, { "epoch": 1.79, "learning_rate": 2.0153467977677387e-05, "loss": 0.6264, "step": 161730 }, { "epoch": 1.79, "learning_rate": 2.0152545250538875e-05, "loss": 0.6292, "step": 161735 }, { "epoch": 1.79, "learning_rate": 2.0151622523400363e-05, "loss": 0.6199, "step": 161740 }, { "epoch": 1.79, "learning_rate": 2.0150699796261847e-05, "loss": 0.6416, "step": 161745 }, { "epoch": 1.79, "learning_rate": 2.0149777069123335e-05, "loss": 0.6074, "step": 161750 }, { "epoch": 1.79, "learning_rate": 2.0148854341984823e-05, "loss": 0.6515, "step": 161755 }, { "epoch": 1.79, "learning_rate": 2.014793161484631e-05, "loss": 0.607, "step": 161760 }, { "epoch": 1.79, "learning_rate": 2.01470088877078e-05, "loss": 0.6034, "step": 161765 }, { "epoch": 1.79, "learning_rate": 2.0146086160569286e-05, "loss": 0.6123, "step": 161770 }, { "epoch": 1.79, "learning_rate": 2.0145163433430774e-05, "loss": 0.6131, "step": 161775 }, { "epoch": 1.79, "learning_rate": 2.0144240706292262e-05, "loss": 0.6564, "step": 161780 }, { "epoch": 1.79, "learning_rate": 2.014331797915375e-05, "loss": 0.6263, "step": 161785 }, { "epoch": 1.79, "learning_rate": 2.0142395252015238e-05, "loss": 0.6295, "step": 161790 }, { "epoch": 1.79, "learning_rate": 2.0141472524876726e-05, "loss": 0.6458, "step": 161795 }, { "epoch": 1.79, "learning_rate": 2.014054979773821e-05, "loss": 0.6613, "step": 161800 }, { "epoch": 1.79, "learning_rate": 2.01396270705997e-05, "loss": 0.6004, "step": 161805 }, { "epoch": 1.79, "learning_rate": 2.0138704343461186e-05, "loss": 0.6238, "step": 161810 }, { "epoch": 1.79, "learning_rate": 2.0137781616322677e-05, "loss": 0.6046, "step": 161815 }, { "epoch": 1.79, "learning_rate": 2.013685888918416e-05, "loss": 0.5981, "step": 161820 }, { "epoch": 1.79, "learning_rate": 2.013593616204565e-05, "loss": 0.6254, "step": 161825 }, { "epoch": 1.79, "learning_rate": 2.0135013434907137e-05, "loss": 0.6657, "step": 161830 }, { "epoch": 1.79, "learning_rate": 2.0134090707768625e-05, "loss": 0.6443, "step": 161835 }, { "epoch": 1.79, "learning_rate": 2.0133167980630113e-05, "loss": 0.5937, "step": 161840 }, { "epoch": 1.79, "learning_rate": 2.01322452534916e-05, "loss": 0.5898, "step": 161845 }, { "epoch": 1.79, "learning_rate": 2.013132252635309e-05, "loss": 0.6046, "step": 161850 }, { "epoch": 1.79, "learning_rate": 2.0130399799214577e-05, "loss": 0.6557, "step": 161855 }, { "epoch": 1.79, "learning_rate": 2.0129477072076064e-05, "loss": 0.5759, "step": 161860 }, { "epoch": 1.79, "learning_rate": 2.012855434493755e-05, "loss": 0.6768, "step": 161865 }, { "epoch": 1.79, "learning_rate": 2.012763161779904e-05, "loss": 0.6419, "step": 161870 }, { "epoch": 1.79, "learning_rate": 2.0126708890660525e-05, "loss": 0.6434, "step": 161875 }, { "epoch": 1.79, "learning_rate": 2.0125786163522016e-05, "loss": 0.5618, "step": 161880 }, { "epoch": 1.79, "learning_rate": 2.01248634363835e-05, "loss": 0.649, "step": 161885 }, { "epoch": 1.79, "learning_rate": 2.012394070924499e-05, "loss": 0.5973, "step": 161890 }, { "epoch": 1.79, "learning_rate": 2.0123017982106476e-05, "loss": 0.6277, "step": 161895 }, { "epoch": 1.79, "learning_rate": 2.0122095254967964e-05, "loss": 0.6456, "step": 161900 }, { "epoch": 1.79, "learning_rate": 2.012117252782945e-05, "loss": 0.6451, "step": 161905 }, { "epoch": 1.79, "learning_rate": 2.012024980069094e-05, "loss": 0.6573, "step": 161910 }, { "epoch": 1.79, "learning_rate": 2.0119327073552427e-05, "loss": 0.6136, "step": 161915 }, { "epoch": 1.79, "learning_rate": 2.0118404346413912e-05, "loss": 0.6283, "step": 161920 }, { "epoch": 1.79, "learning_rate": 2.0117481619275403e-05, "loss": 0.6542, "step": 161925 }, { "epoch": 1.79, "learning_rate": 2.0116558892136887e-05, "loss": 0.6567, "step": 161930 }, { "epoch": 1.79, "learning_rate": 2.011563616499838e-05, "loss": 0.6219, "step": 161935 }, { "epoch": 1.79, "learning_rate": 2.0114713437859863e-05, "loss": 0.6164, "step": 161940 }, { "epoch": 1.79, "learning_rate": 2.0113790710721354e-05, "loss": 0.6611, "step": 161945 }, { "epoch": 1.79, "learning_rate": 2.011286798358284e-05, "loss": 0.6237, "step": 161950 }, { "epoch": 1.79, "learning_rate": 2.0111945256444327e-05, "loss": 0.6425, "step": 161955 }, { "epoch": 1.79, "learning_rate": 2.0111022529305815e-05, "loss": 0.6331, "step": 161960 }, { "epoch": 1.79, "learning_rate": 2.0110099802167302e-05, "loss": 0.6104, "step": 161965 }, { "epoch": 1.79, "learning_rate": 2.010917707502879e-05, "loss": 0.6017, "step": 161970 }, { "epoch": 1.79, "learning_rate": 2.0108254347890275e-05, "loss": 0.6165, "step": 161975 }, { "epoch": 1.79, "learning_rate": 2.0107331620751766e-05, "loss": 0.5987, "step": 161980 }, { "epoch": 1.79, "learning_rate": 2.010640889361325e-05, "loss": 0.5707, "step": 161985 }, { "epoch": 1.79, "learning_rate": 2.010548616647474e-05, "loss": 0.6232, "step": 161990 }, { "epoch": 1.79, "learning_rate": 2.0104563439336226e-05, "loss": 0.6619, "step": 161995 }, { "epoch": 1.79, "learning_rate": 2.0103640712197717e-05, "loss": 0.6532, "step": 162000 }, { "epoch": 1.79, "eval_loss": 0.5887871384620667, "eval_runtime": 69.0916, "eval_samples_per_second": 28.947, "eval_steps_per_second": 14.474, "step": 162000 }, { "epoch": 1.79, "learning_rate": 2.0102717985059202e-05, "loss": 0.6427, "step": 162005 }, { "epoch": 1.79, "learning_rate": 2.0101795257920693e-05, "loss": 0.6519, "step": 162010 }, { "epoch": 1.79, "learning_rate": 2.0100872530782178e-05, "loss": 0.6545, "step": 162015 }, { "epoch": 1.79, "learning_rate": 2.0099949803643665e-05, "loss": 0.5979, "step": 162020 }, { "epoch": 1.79, "learning_rate": 2.0099027076505153e-05, "loss": 0.6122, "step": 162025 }, { "epoch": 1.79, "learning_rate": 2.009810434936664e-05, "loss": 0.632, "step": 162030 }, { "epoch": 1.79, "learning_rate": 2.009718162222813e-05, "loss": 0.5853, "step": 162035 }, { "epoch": 1.79, "learning_rate": 2.0096258895089617e-05, "loss": 0.5874, "step": 162040 }, { "epoch": 1.79, "learning_rate": 2.0095336167951105e-05, "loss": 0.609, "step": 162045 }, { "epoch": 1.79, "learning_rate": 2.009441344081259e-05, "loss": 0.5714, "step": 162050 }, { "epoch": 1.79, "learning_rate": 2.009349071367408e-05, "loss": 0.6496, "step": 162055 }, { "epoch": 1.79, "learning_rate": 2.0092567986535565e-05, "loss": 0.6737, "step": 162060 }, { "epoch": 1.79, "learning_rate": 2.0091645259397056e-05, "loss": 0.6135, "step": 162065 }, { "epoch": 1.79, "learning_rate": 2.009072253225854e-05, "loss": 0.6065, "step": 162070 }, { "epoch": 1.79, "learning_rate": 2.008979980512003e-05, "loss": 0.6224, "step": 162075 }, { "epoch": 1.79, "learning_rate": 2.0088877077981516e-05, "loss": 0.5995, "step": 162080 }, { "epoch": 1.79, "learning_rate": 2.0087954350843004e-05, "loss": 0.5978, "step": 162085 }, { "epoch": 1.79, "learning_rate": 2.0087031623704492e-05, "loss": 0.6414, "step": 162090 }, { "epoch": 1.79, "learning_rate": 2.008610889656598e-05, "loss": 0.5792, "step": 162095 }, { "epoch": 1.79, "learning_rate": 2.0085186169427468e-05, "loss": 0.6345, "step": 162100 }, { "epoch": 1.79, "learning_rate": 2.0084263442288955e-05, "loss": 0.6156, "step": 162105 }, { "epoch": 1.79, "learning_rate": 2.0083340715150443e-05, "loss": 0.631, "step": 162110 }, { "epoch": 1.8, "learning_rate": 2.008241798801193e-05, "loss": 0.5949, "step": 162115 }, { "epoch": 1.8, "learning_rate": 2.008149526087342e-05, "loss": 0.619, "step": 162120 }, { "epoch": 1.8, "learning_rate": 2.0080572533734903e-05, "loss": 0.646, "step": 162125 }, { "epoch": 1.8, "learning_rate": 2.007964980659639e-05, "loss": 0.6578, "step": 162130 }, { "epoch": 1.8, "learning_rate": 2.007872707945788e-05, "loss": 0.632, "step": 162135 }, { "epoch": 1.8, "learning_rate": 2.0077804352319367e-05, "loss": 0.656, "step": 162140 }, { "epoch": 1.8, "learning_rate": 2.0076881625180855e-05, "loss": 0.6261, "step": 162145 }, { "epoch": 1.8, "learning_rate": 2.0075958898042343e-05, "loss": 0.6214, "step": 162150 }, { "epoch": 1.8, "learning_rate": 2.007503617090383e-05, "loss": 0.6445, "step": 162155 }, { "epoch": 1.8, "learning_rate": 2.007411344376532e-05, "loss": 0.6311, "step": 162160 }, { "epoch": 1.8, "learning_rate": 2.0073190716626806e-05, "loss": 0.6673, "step": 162165 }, { "epoch": 1.8, "learning_rate": 2.0072267989488294e-05, "loss": 0.6023, "step": 162170 }, { "epoch": 1.8, "learning_rate": 2.0071345262349782e-05, "loss": 0.6917, "step": 162175 }, { "epoch": 1.8, "learning_rate": 2.007042253521127e-05, "loss": 0.6546, "step": 162180 }, { "epoch": 1.8, "learning_rate": 2.0069499808072754e-05, "loss": 0.6035, "step": 162185 }, { "epoch": 1.8, "learning_rate": 2.0068577080934245e-05, "loss": 0.6454, "step": 162190 }, { "epoch": 1.8, "learning_rate": 2.006765435379573e-05, "loss": 0.6563, "step": 162195 }, { "epoch": 1.8, "learning_rate": 2.006673162665722e-05, "loss": 0.574, "step": 162200 }, { "epoch": 1.8, "learning_rate": 2.0065808899518706e-05, "loss": 0.6285, "step": 162205 }, { "epoch": 1.8, "learning_rate": 2.0064886172380193e-05, "loss": 0.6166, "step": 162210 }, { "epoch": 1.8, "learning_rate": 2.006396344524168e-05, "loss": 0.6096, "step": 162215 }, { "epoch": 1.8, "learning_rate": 2.006304071810317e-05, "loss": 0.6049, "step": 162220 }, { "epoch": 1.8, "learning_rate": 2.0062117990964657e-05, "loss": 0.6246, "step": 162225 }, { "epoch": 1.8, "learning_rate": 2.0061195263826145e-05, "loss": 0.5519, "step": 162230 }, { "epoch": 1.8, "learning_rate": 2.0060272536687633e-05, "loss": 0.6438, "step": 162235 }, { "epoch": 1.8, "learning_rate": 2.005934980954912e-05, "loss": 0.5721, "step": 162240 }, { "epoch": 1.8, "learning_rate": 2.005842708241061e-05, "loss": 0.633, "step": 162245 }, { "epoch": 1.8, "learning_rate": 2.0057504355272093e-05, "loss": 0.625, "step": 162250 }, { "epoch": 1.8, "learning_rate": 2.0056581628133584e-05, "loss": 0.6017, "step": 162255 }, { "epoch": 1.8, "learning_rate": 2.005565890099507e-05, "loss": 0.6337, "step": 162260 }, { "epoch": 1.8, "learning_rate": 2.005473617385656e-05, "loss": 0.5945, "step": 162265 }, { "epoch": 1.8, "learning_rate": 2.0053813446718044e-05, "loss": 0.5812, "step": 162270 }, { "epoch": 1.8, "learning_rate": 2.0052890719579535e-05, "loss": 0.5575, "step": 162275 }, { "epoch": 1.8, "learning_rate": 2.005196799244102e-05, "loss": 0.5989, "step": 162280 }, { "epoch": 1.8, "learning_rate": 2.0051045265302508e-05, "loss": 0.6864, "step": 162285 }, { "epoch": 1.8, "learning_rate": 2.0050122538163996e-05, "loss": 0.6504, "step": 162290 }, { "epoch": 1.8, "learning_rate": 2.0049199811025484e-05, "loss": 0.5988, "step": 162295 }, { "epoch": 1.8, "learning_rate": 2.004827708388697e-05, "loss": 0.6403, "step": 162300 }, { "epoch": 1.8, "learning_rate": 2.0047354356748456e-05, "loss": 0.6519, "step": 162305 }, { "epoch": 1.8, "learning_rate": 2.0046431629609947e-05, "loss": 0.5937, "step": 162310 }, { "epoch": 1.8, "learning_rate": 2.004550890247143e-05, "loss": 0.6039, "step": 162315 }, { "epoch": 1.8, "learning_rate": 2.0044586175332923e-05, "loss": 0.6391, "step": 162320 }, { "epoch": 1.8, "learning_rate": 2.0043663448194407e-05, "loss": 0.5966, "step": 162325 }, { "epoch": 1.8, "learning_rate": 2.00427407210559e-05, "loss": 0.6566, "step": 162330 }, { "epoch": 1.8, "learning_rate": 2.0041817993917383e-05, "loss": 0.64, "step": 162335 }, { "epoch": 1.8, "learning_rate": 2.004089526677887e-05, "loss": 0.6614, "step": 162340 }, { "epoch": 1.8, "learning_rate": 2.003997253964036e-05, "loss": 0.5844, "step": 162345 }, { "epoch": 1.8, "learning_rate": 2.0039049812501846e-05, "loss": 0.6374, "step": 162350 }, { "epoch": 1.8, "learning_rate": 2.0038127085363334e-05, "loss": 0.6398, "step": 162355 }, { "epoch": 1.8, "learning_rate": 2.003720435822482e-05, "loss": 0.6255, "step": 162360 }, { "epoch": 1.8, "learning_rate": 2.003628163108631e-05, "loss": 0.6187, "step": 162365 }, { "epoch": 1.8, "learning_rate": 2.0035358903947794e-05, "loss": 0.6582, "step": 162370 }, { "epoch": 1.8, "learning_rate": 2.0034436176809286e-05, "loss": 0.6181, "step": 162375 }, { "epoch": 1.8, "learning_rate": 2.003351344967077e-05, "loss": 0.6458, "step": 162380 }, { "epoch": 1.8, "learning_rate": 2.003259072253226e-05, "loss": 0.5962, "step": 162385 }, { "epoch": 1.8, "learning_rate": 2.0031667995393746e-05, "loss": 0.6011, "step": 162390 }, { "epoch": 1.8, "learning_rate": 2.0030745268255237e-05, "loss": 0.6222, "step": 162395 }, { "epoch": 1.8, "learning_rate": 2.002982254111672e-05, "loss": 0.6475, "step": 162400 }, { "epoch": 1.8, "learning_rate": 2.002889981397821e-05, "loss": 0.563, "step": 162405 }, { "epoch": 1.8, "learning_rate": 2.0027977086839697e-05, "loss": 0.6999, "step": 162410 }, { "epoch": 1.8, "learning_rate": 2.0027054359701185e-05, "loss": 0.5847, "step": 162415 }, { "epoch": 1.8, "learning_rate": 2.0026131632562673e-05, "loss": 0.6128, "step": 162420 }, { "epoch": 1.8, "learning_rate": 2.002520890542416e-05, "loss": 0.6756, "step": 162425 }, { "epoch": 1.8, "learning_rate": 2.002428617828565e-05, "loss": 0.5777, "step": 162430 }, { "epoch": 1.8, "learning_rate": 2.0023363451147133e-05, "loss": 0.6484, "step": 162435 }, { "epoch": 1.8, "learning_rate": 2.0022440724008624e-05, "loss": 0.609, "step": 162440 }, { "epoch": 1.8, "learning_rate": 2.002151799687011e-05, "loss": 0.6371, "step": 162445 }, { "epoch": 1.8, "learning_rate": 2.00205952697316e-05, "loss": 0.5991, "step": 162450 }, { "epoch": 1.8, "learning_rate": 2.0019672542593084e-05, "loss": 0.6352, "step": 162455 }, { "epoch": 1.8, "learning_rate": 2.0018749815454572e-05, "loss": 0.5991, "step": 162460 }, { "epoch": 1.8, "learning_rate": 2.001782708831606e-05, "loss": 0.6245, "step": 162465 }, { "epoch": 1.8, "learning_rate": 2.0016904361177548e-05, "loss": 0.7033, "step": 162470 }, { "epoch": 1.8, "learning_rate": 2.0015981634039036e-05, "loss": 0.623, "step": 162475 }, { "epoch": 1.8, "learning_rate": 2.0015058906900524e-05, "loss": 0.5938, "step": 162480 }, { "epoch": 1.8, "learning_rate": 2.001413617976201e-05, "loss": 0.6241, "step": 162485 }, { "epoch": 1.8, "learning_rate": 2.00132134526235e-05, "loss": 0.6184, "step": 162490 }, { "epoch": 1.8, "learning_rate": 2.0012290725484987e-05, "loss": 0.5987, "step": 162495 }, { "epoch": 1.8, "learning_rate": 2.0011367998346475e-05, "loss": 0.5505, "step": 162500 }, { "epoch": 1.8, "learning_rate": 2.0010445271207963e-05, "loss": 0.6299, "step": 162505 }, { "epoch": 1.8, "learning_rate": 2.0009522544069447e-05, "loss": 0.6345, "step": 162510 }, { "epoch": 1.8, "learning_rate": 2.0008599816930935e-05, "loss": 0.5933, "step": 162515 }, { "epoch": 1.8, "learning_rate": 2.0007677089792423e-05, "loss": 0.5972, "step": 162520 }, { "epoch": 1.8, "learning_rate": 2.000675436265391e-05, "loss": 0.6277, "step": 162525 }, { "epoch": 1.8, "learning_rate": 2.00058316355154e-05, "loss": 0.6318, "step": 162530 }, { "epoch": 1.8, "learning_rate": 2.0004908908376887e-05, "loss": 0.6583, "step": 162535 }, { "epoch": 1.8, "learning_rate": 2.0003986181238375e-05, "loss": 0.6802, "step": 162540 }, { "epoch": 1.8, "learning_rate": 2.0003063454099862e-05, "loss": 0.6345, "step": 162545 }, { "epoch": 1.8, "learning_rate": 2.000214072696135e-05, "loss": 0.6151, "step": 162550 }, { "epoch": 1.8, "learning_rate": 2.0001217999822838e-05, "loss": 0.6285, "step": 162555 }, { "epoch": 1.8, "learning_rate": 2.0000295272684326e-05, "loss": 0.6215, "step": 162560 }, { "epoch": 1.8, "learning_rate": 1.9999372545545814e-05, "loss": 0.6198, "step": 162565 }, { "epoch": 1.8, "learning_rate": 1.9998449818407298e-05, "loss": 0.6186, "step": 162570 }, { "epoch": 1.8, "learning_rate": 1.999752709126879e-05, "loss": 0.6118, "step": 162575 }, { "epoch": 1.8, "learning_rate": 1.9996604364130274e-05, "loss": 0.59, "step": 162580 }, { "epoch": 1.8, "learning_rate": 1.9995681636991762e-05, "loss": 0.6689, "step": 162585 }, { "epoch": 1.8, "learning_rate": 1.999475890985325e-05, "loss": 0.6351, "step": 162590 }, { "epoch": 1.8, "learning_rate": 1.9993836182714737e-05, "loss": 0.6275, "step": 162595 }, { "epoch": 1.8, "learning_rate": 1.9992913455576225e-05, "loss": 0.6255, "step": 162600 }, { "epoch": 1.8, "learning_rate": 1.9991990728437713e-05, "loss": 0.6354, "step": 162605 }, { "epoch": 1.8, "learning_rate": 1.99910680012992e-05, "loss": 0.6216, "step": 162610 }, { "epoch": 1.8, "learning_rate": 1.999014527416069e-05, "loss": 0.6029, "step": 162615 }, { "epoch": 1.8, "learning_rate": 1.9989222547022177e-05, "loss": 0.6095, "step": 162620 }, { "epoch": 1.8, "learning_rate": 1.9988299819883665e-05, "loss": 0.644, "step": 162625 }, { "epoch": 1.8, "learning_rate": 1.9987377092745152e-05, "loss": 0.6282, "step": 162630 }, { "epoch": 1.8, "learning_rate": 1.9986454365606637e-05, "loss": 0.6459, "step": 162635 }, { "epoch": 1.8, "learning_rate": 1.9985531638468128e-05, "loss": 0.6278, "step": 162640 }, { "epoch": 1.8, "learning_rate": 1.9984608911329613e-05, "loss": 0.6136, "step": 162645 }, { "epoch": 1.8, "learning_rate": 1.9983686184191104e-05, "loss": 0.6057, "step": 162650 }, { "epoch": 1.8, "learning_rate": 1.9982763457052588e-05, "loss": 0.6037, "step": 162655 }, { "epoch": 1.8, "learning_rate": 1.9981840729914076e-05, "loss": 0.6024, "step": 162660 }, { "epoch": 1.8, "learning_rate": 1.9980918002775564e-05, "loss": 0.6315, "step": 162665 }, { "epoch": 1.8, "learning_rate": 1.9979995275637052e-05, "loss": 0.6378, "step": 162670 }, { "epoch": 1.8, "learning_rate": 1.997907254849854e-05, "loss": 0.6075, "step": 162675 }, { "epoch": 1.8, "learning_rate": 1.9978149821360028e-05, "loss": 0.6169, "step": 162680 }, { "epoch": 1.8, "learning_rate": 1.9977227094221515e-05, "loss": 0.6798, "step": 162685 }, { "epoch": 1.8, "learning_rate": 1.9976304367083e-05, "loss": 0.6576, "step": 162690 }, { "epoch": 1.8, "learning_rate": 1.997538163994449e-05, "loss": 0.6532, "step": 162695 }, { "epoch": 1.8, "learning_rate": 1.9974458912805976e-05, "loss": 0.6757, "step": 162700 }, { "epoch": 1.8, "learning_rate": 1.9973536185667467e-05, "loss": 0.6438, "step": 162705 }, { "epoch": 1.8, "learning_rate": 1.997261345852895e-05, "loss": 0.6477, "step": 162710 }, { "epoch": 1.8, "learning_rate": 1.9971690731390442e-05, "loss": 0.6634, "step": 162715 }, { "epoch": 1.8, "learning_rate": 1.9970768004251927e-05, "loss": 0.6782, "step": 162720 }, { "epoch": 1.8, "learning_rate": 1.9969845277113415e-05, "loss": 0.6126, "step": 162725 }, { "epoch": 1.8, "learning_rate": 1.9968922549974903e-05, "loss": 0.6463, "step": 162730 }, { "epoch": 1.8, "learning_rate": 1.996799982283639e-05, "loss": 0.5956, "step": 162735 }, { "epoch": 1.8, "learning_rate": 1.996707709569788e-05, "loss": 0.6468, "step": 162740 }, { "epoch": 1.8, "learning_rate": 1.9966154368559363e-05, "loss": 0.6659, "step": 162745 }, { "epoch": 1.8, "learning_rate": 1.9965231641420854e-05, "loss": 0.6191, "step": 162750 }, { "epoch": 1.8, "learning_rate": 1.996430891428234e-05, "loss": 0.612, "step": 162755 }, { "epoch": 1.8, "learning_rate": 1.996338618714383e-05, "loss": 0.5966, "step": 162760 }, { "epoch": 1.8, "learning_rate": 1.9962463460005314e-05, "loss": 0.6609, "step": 162765 }, { "epoch": 1.8, "learning_rate": 1.9961540732866805e-05, "loss": 0.6706, "step": 162770 }, { "epoch": 1.8, "learning_rate": 1.996061800572829e-05, "loss": 0.6133, "step": 162775 }, { "epoch": 1.8, "learning_rate": 1.995969527858978e-05, "loss": 0.5771, "step": 162780 }, { "epoch": 1.8, "learning_rate": 1.9958772551451266e-05, "loss": 0.6037, "step": 162785 }, { "epoch": 1.8, "learning_rate": 1.9957849824312753e-05, "loss": 0.6637, "step": 162790 }, { "epoch": 1.8, "learning_rate": 1.995692709717424e-05, "loss": 0.5931, "step": 162795 }, { "epoch": 1.8, "learning_rate": 1.995600437003573e-05, "loss": 0.5761, "step": 162800 }, { "epoch": 1.8, "learning_rate": 1.9955081642897217e-05, "loss": 0.5805, "step": 162805 }, { "epoch": 1.8, "learning_rate": 1.99541589157587e-05, "loss": 0.6251, "step": 162810 }, { "epoch": 1.8, "learning_rate": 1.9953236188620193e-05, "loss": 0.6616, "step": 162815 }, { "epoch": 1.8, "learning_rate": 1.9952313461481677e-05, "loss": 0.6233, "step": 162820 }, { "epoch": 1.8, "learning_rate": 1.995139073434317e-05, "loss": 0.6246, "step": 162825 }, { "epoch": 1.8, "learning_rate": 1.9950468007204653e-05, "loss": 0.6062, "step": 162830 }, { "epoch": 1.8, "learning_rate": 1.9949545280066144e-05, "loss": 0.6497, "step": 162835 }, { "epoch": 1.8, "learning_rate": 1.994862255292763e-05, "loss": 0.628, "step": 162840 }, { "epoch": 1.8, "learning_rate": 1.9947699825789116e-05, "loss": 0.6429, "step": 162845 }, { "epoch": 1.8, "learning_rate": 1.9946777098650604e-05, "loss": 0.606, "step": 162850 }, { "epoch": 1.8, "learning_rate": 1.9945854371512092e-05, "loss": 0.5717, "step": 162855 }, { "epoch": 1.8, "learning_rate": 1.994493164437358e-05, "loss": 0.6188, "step": 162860 }, { "epoch": 1.8, "learning_rate": 1.9944008917235068e-05, "loss": 0.5756, "step": 162865 }, { "epoch": 1.8, "learning_rate": 1.9943086190096556e-05, "loss": 0.6271, "step": 162870 }, { "epoch": 1.8, "learning_rate": 1.9942163462958043e-05, "loss": 0.5867, "step": 162875 }, { "epoch": 1.8, "learning_rate": 1.994124073581953e-05, "loss": 0.6453, "step": 162880 }, { "epoch": 1.8, "learning_rate": 1.9940318008681016e-05, "loss": 0.6735, "step": 162885 }, { "epoch": 1.8, "learning_rate": 1.9939395281542507e-05, "loss": 0.646, "step": 162890 }, { "epoch": 1.8, "learning_rate": 1.993847255440399e-05, "loss": 0.6062, "step": 162895 }, { "epoch": 1.8, "learning_rate": 1.993754982726548e-05, "loss": 0.6683, "step": 162900 }, { "epoch": 1.8, "learning_rate": 1.9936627100126967e-05, "loss": 0.6228, "step": 162905 }, { "epoch": 1.8, "learning_rate": 1.9935704372988455e-05, "loss": 0.628, "step": 162910 }, { "epoch": 1.8, "learning_rate": 1.9934781645849943e-05, "loss": 0.6489, "step": 162915 }, { "epoch": 1.8, "learning_rate": 1.993385891871143e-05, "loss": 0.6023, "step": 162920 }, { "epoch": 1.8, "learning_rate": 1.993293619157292e-05, "loss": 0.6242, "step": 162925 }, { "epoch": 1.8, "learning_rate": 1.9932013464434406e-05, "loss": 0.6183, "step": 162930 }, { "epoch": 1.8, "learning_rate": 1.9931090737295894e-05, "loss": 0.6168, "step": 162935 }, { "epoch": 1.8, "learning_rate": 1.9930168010157382e-05, "loss": 0.6467, "step": 162940 }, { "epoch": 1.8, "learning_rate": 1.992924528301887e-05, "loss": 0.6139, "step": 162945 }, { "epoch": 1.8, "learning_rate": 1.9928322555880358e-05, "loss": 0.6161, "step": 162950 }, { "epoch": 1.8, "learning_rate": 1.9927399828741846e-05, "loss": 0.5852, "step": 162955 }, { "epoch": 1.8, "learning_rate": 1.992647710160333e-05, "loss": 0.6446, "step": 162960 }, { "epoch": 1.8, "learning_rate": 1.9925554374464818e-05, "loss": 0.6422, "step": 162965 }, { "epoch": 1.8, "learning_rate": 1.9924631647326306e-05, "loss": 0.6785, "step": 162970 }, { "epoch": 1.8, "learning_rate": 1.9923708920187794e-05, "loss": 0.6343, "step": 162975 }, { "epoch": 1.8, "learning_rate": 1.992278619304928e-05, "loss": 0.6439, "step": 162980 }, { "epoch": 1.8, "learning_rate": 1.992186346591077e-05, "loss": 0.6073, "step": 162985 }, { "epoch": 1.8, "learning_rate": 1.9920940738772257e-05, "loss": 0.6335, "step": 162990 }, { "epoch": 1.8, "learning_rate": 1.9920018011633745e-05, "loss": 0.5883, "step": 162995 }, { "epoch": 1.8, "learning_rate": 1.9919095284495233e-05, "loss": 0.6679, "step": 163000 }, { "epoch": 1.8, "eval_loss": 0.6038133502006531, "eval_runtime": 69.1416, "eval_samples_per_second": 28.926, "eval_steps_per_second": 14.463, "step": 163000 }, { "epoch": 1.8, "learning_rate": 1.991817255735672e-05, "loss": 0.6762, "step": 163005 }, { "epoch": 1.8, "learning_rate": 1.991724983021821e-05, "loss": 0.5994, "step": 163010 }, { "epoch": 1.81, "learning_rate": 1.9916327103079696e-05, "loss": 0.6237, "step": 163015 }, { "epoch": 1.81, "learning_rate": 1.991540437594118e-05, "loss": 0.6, "step": 163020 }, { "epoch": 1.81, "learning_rate": 1.9914481648802672e-05, "loss": 0.624, "step": 163025 }, { "epoch": 1.81, "learning_rate": 1.9913558921664157e-05, "loss": 0.6015, "step": 163030 }, { "epoch": 1.81, "learning_rate": 1.9912636194525648e-05, "loss": 0.6233, "step": 163035 }, { "epoch": 1.81, "learning_rate": 1.9911713467387132e-05, "loss": 0.632, "step": 163040 }, { "epoch": 1.81, "learning_rate": 1.991079074024862e-05, "loss": 0.5935, "step": 163045 }, { "epoch": 1.81, "learning_rate": 1.9909868013110108e-05, "loss": 0.6127, "step": 163050 }, { "epoch": 1.81, "learning_rate": 1.9908945285971596e-05, "loss": 0.6198, "step": 163055 }, { "epoch": 1.81, "learning_rate": 1.9908022558833084e-05, "loss": 0.6368, "step": 163060 }, { "epoch": 1.81, "learning_rate": 1.990709983169457e-05, "loss": 0.6937, "step": 163065 }, { "epoch": 1.81, "learning_rate": 1.990617710455606e-05, "loss": 0.5973, "step": 163070 }, { "epoch": 1.81, "learning_rate": 1.9905254377417544e-05, "loss": 0.6323, "step": 163075 }, { "epoch": 1.81, "learning_rate": 1.9904331650279035e-05, "loss": 0.5711, "step": 163080 }, { "epoch": 1.81, "learning_rate": 1.990340892314052e-05, "loss": 0.6501, "step": 163085 }, { "epoch": 1.81, "learning_rate": 1.990248619600201e-05, "loss": 0.6437, "step": 163090 }, { "epoch": 1.81, "learning_rate": 1.9901563468863495e-05, "loss": 0.6117, "step": 163095 }, { "epoch": 1.81, "learning_rate": 1.9900640741724986e-05, "loss": 0.6006, "step": 163100 }, { "epoch": 1.81, "learning_rate": 1.989971801458647e-05, "loss": 0.6089, "step": 163105 }, { "epoch": 1.81, "learning_rate": 1.9898795287447962e-05, "loss": 0.5906, "step": 163110 }, { "epoch": 1.81, "learning_rate": 1.9897872560309447e-05, "loss": 0.6229, "step": 163115 }, { "epoch": 1.81, "learning_rate": 1.9896949833170934e-05, "loss": 0.6058, "step": 163120 }, { "epoch": 1.81, "learning_rate": 1.9896027106032422e-05, "loss": 0.5702, "step": 163125 }, { "epoch": 1.81, "learning_rate": 1.9895104378893907e-05, "loss": 0.6225, "step": 163130 }, { "epoch": 1.81, "learning_rate": 1.9894181651755398e-05, "loss": 0.5622, "step": 163135 }, { "epoch": 1.81, "learning_rate": 1.9893258924616882e-05, "loss": 0.5335, "step": 163140 }, { "epoch": 1.81, "learning_rate": 1.9892336197478374e-05, "loss": 0.5678, "step": 163145 }, { "epoch": 1.81, "learning_rate": 1.9891413470339858e-05, "loss": 0.6323, "step": 163150 }, { "epoch": 1.81, "learning_rate": 1.989049074320135e-05, "loss": 0.5839, "step": 163155 }, { "epoch": 1.81, "learning_rate": 1.9889568016062834e-05, "loss": 0.609, "step": 163160 }, { "epoch": 1.81, "learning_rate": 1.9888645288924325e-05, "loss": 0.6641, "step": 163165 }, { "epoch": 1.81, "learning_rate": 1.988772256178581e-05, "loss": 0.5439, "step": 163170 }, { "epoch": 1.81, "learning_rate": 1.9886799834647297e-05, "loss": 0.6154, "step": 163175 }, { "epoch": 1.81, "learning_rate": 1.9885877107508785e-05, "loss": 0.678, "step": 163180 }, { "epoch": 1.81, "learning_rate": 1.9884954380370273e-05, "loss": 0.6187, "step": 163185 }, { "epoch": 1.81, "learning_rate": 1.988403165323176e-05, "loss": 0.603, "step": 163190 }, { "epoch": 1.81, "learning_rate": 1.9883108926093245e-05, "loss": 0.6183, "step": 163195 }, { "epoch": 1.81, "learning_rate": 1.9882186198954737e-05, "loss": 0.6768, "step": 163200 }, { "epoch": 1.81, "learning_rate": 1.988126347181622e-05, "loss": 0.5779, "step": 163205 }, { "epoch": 1.81, "learning_rate": 1.9880340744677712e-05, "loss": 0.6098, "step": 163210 }, { "epoch": 1.81, "learning_rate": 1.9879418017539197e-05, "loss": 0.6788, "step": 163215 }, { "epoch": 1.81, "learning_rate": 1.9878495290400688e-05, "loss": 0.5797, "step": 163220 }, { "epoch": 1.81, "learning_rate": 1.9877572563262173e-05, "loss": 0.6464, "step": 163225 }, { "epoch": 1.81, "learning_rate": 1.987664983612366e-05, "loss": 0.649, "step": 163230 }, { "epoch": 1.81, "learning_rate": 1.9875727108985148e-05, "loss": 0.5367, "step": 163235 }, { "epoch": 1.81, "learning_rate": 1.9874804381846636e-05, "loss": 0.6161, "step": 163240 }, { "epoch": 1.81, "learning_rate": 1.9873881654708124e-05, "loss": 0.6524, "step": 163245 }, { "epoch": 1.81, "learning_rate": 1.9872958927569612e-05, "loss": 0.5892, "step": 163250 }, { "epoch": 1.81, "learning_rate": 1.98720362004311e-05, "loss": 0.6591, "step": 163255 }, { "epoch": 1.81, "learning_rate": 1.9871113473292587e-05, "loss": 0.6424, "step": 163260 }, { "epoch": 1.81, "learning_rate": 1.9870190746154075e-05, "loss": 0.6671, "step": 163265 }, { "epoch": 1.81, "learning_rate": 1.986926801901556e-05, "loss": 0.6549, "step": 163270 }, { "epoch": 1.81, "learning_rate": 1.986834529187705e-05, "loss": 0.6452, "step": 163275 }, { "epoch": 1.81, "learning_rate": 1.9867422564738535e-05, "loss": 0.6105, "step": 163280 }, { "epoch": 1.81, "learning_rate": 1.9866499837600023e-05, "loss": 0.6322, "step": 163285 }, { "epoch": 1.81, "learning_rate": 1.986557711046151e-05, "loss": 0.6478, "step": 163290 }, { "epoch": 1.81, "learning_rate": 1.9864654383323e-05, "loss": 0.6153, "step": 163295 }, { "epoch": 1.81, "learning_rate": 1.9863731656184487e-05, "loss": 0.6261, "step": 163300 }, { "epoch": 1.81, "learning_rate": 1.9862808929045975e-05, "loss": 0.6336, "step": 163305 }, { "epoch": 1.81, "learning_rate": 1.9861886201907463e-05, "loss": 0.6377, "step": 163310 }, { "epoch": 1.81, "learning_rate": 1.986096347476895e-05, "loss": 0.6147, "step": 163315 }, { "epoch": 1.81, "learning_rate": 1.9860040747630438e-05, "loss": 0.6779, "step": 163320 }, { "epoch": 1.81, "learning_rate": 1.9859118020491926e-05, "loss": 0.5961, "step": 163325 }, { "epoch": 1.81, "learning_rate": 1.9858195293353414e-05, "loss": 0.6217, "step": 163330 }, { "epoch": 1.81, "learning_rate": 1.9857272566214902e-05, "loss": 0.5817, "step": 163335 }, { "epoch": 1.81, "learning_rate": 1.985634983907639e-05, "loss": 0.5913, "step": 163340 }, { "epoch": 1.81, "learning_rate": 1.9855427111937874e-05, "loss": 0.594, "step": 163345 }, { "epoch": 1.81, "learning_rate": 1.9854504384799362e-05, "loss": 0.5827, "step": 163350 }, { "epoch": 1.81, "learning_rate": 1.985358165766085e-05, "loss": 0.6458, "step": 163355 }, { "epoch": 1.81, "learning_rate": 1.9852658930522338e-05, "loss": 0.6122, "step": 163360 }, { "epoch": 1.81, "learning_rate": 1.9851736203383826e-05, "loss": 0.621, "step": 163365 }, { "epoch": 1.81, "learning_rate": 1.9850813476245313e-05, "loss": 0.6135, "step": 163370 }, { "epoch": 1.81, "learning_rate": 1.98498907491068e-05, "loss": 0.6188, "step": 163375 }, { "epoch": 1.81, "learning_rate": 1.984896802196829e-05, "loss": 0.5686, "step": 163380 }, { "epoch": 1.81, "learning_rate": 1.9848045294829777e-05, "loss": 0.7017, "step": 163385 }, { "epoch": 1.81, "learning_rate": 1.9847122567691265e-05, "loss": 0.6096, "step": 163390 }, { "epoch": 1.81, "learning_rate": 1.9846199840552753e-05, "loss": 0.642, "step": 163395 }, { "epoch": 1.81, "learning_rate": 1.984527711341424e-05, "loss": 0.5992, "step": 163400 }, { "epoch": 1.81, "learning_rate": 1.9844354386275725e-05, "loss": 0.6106, "step": 163405 }, { "epoch": 1.81, "learning_rate": 1.9843431659137216e-05, "loss": 0.5942, "step": 163410 }, { "epoch": 1.81, "learning_rate": 1.98425089319987e-05, "loss": 0.6462, "step": 163415 }, { "epoch": 1.81, "learning_rate": 1.984158620486019e-05, "loss": 0.636, "step": 163420 }, { "epoch": 1.81, "learning_rate": 1.9840663477721676e-05, "loss": 0.5946, "step": 163425 }, { "epoch": 1.81, "learning_rate": 1.9839740750583164e-05, "loss": 0.6029, "step": 163430 }, { "epoch": 1.81, "learning_rate": 1.9838818023444652e-05, "loss": 0.5883, "step": 163435 }, { "epoch": 1.81, "learning_rate": 1.983789529630614e-05, "loss": 0.6293, "step": 163440 }, { "epoch": 1.81, "learning_rate": 1.9836972569167628e-05, "loss": 0.6483, "step": 163445 }, { "epoch": 1.81, "learning_rate": 1.9836049842029116e-05, "loss": 0.6715, "step": 163450 }, { "epoch": 1.81, "learning_rate": 1.9835127114890603e-05, "loss": 0.6319, "step": 163455 }, { "epoch": 1.81, "learning_rate": 1.9834204387752088e-05, "loss": 0.6473, "step": 163460 }, { "epoch": 1.81, "learning_rate": 1.983328166061358e-05, "loss": 0.6202, "step": 163465 }, { "epoch": 1.81, "learning_rate": 1.9832358933475064e-05, "loss": 0.6108, "step": 163470 }, { "epoch": 1.81, "learning_rate": 1.9831436206336555e-05, "loss": 0.6583, "step": 163475 }, { "epoch": 1.81, "learning_rate": 1.983051347919804e-05, "loss": 0.6518, "step": 163480 }, { "epoch": 1.81, "learning_rate": 1.982959075205953e-05, "loss": 0.6165, "step": 163485 }, { "epoch": 1.81, "learning_rate": 1.9828668024921015e-05, "loss": 0.6036, "step": 163490 }, { "epoch": 1.81, "learning_rate": 1.9827745297782503e-05, "loss": 0.6068, "step": 163495 }, { "epoch": 1.81, "learning_rate": 1.982682257064399e-05, "loss": 0.6071, "step": 163500 }, { "epoch": 1.81, "learning_rate": 1.982589984350548e-05, "loss": 0.5858, "step": 163505 }, { "epoch": 1.81, "learning_rate": 1.9824977116366966e-05, "loss": 0.6089, "step": 163510 }, { "epoch": 1.81, "learning_rate": 1.982405438922845e-05, "loss": 0.6812, "step": 163515 }, { "epoch": 1.81, "learning_rate": 1.9823131662089942e-05, "loss": 0.5805, "step": 163520 }, { "epoch": 1.81, "learning_rate": 1.9822208934951427e-05, "loss": 0.6219, "step": 163525 }, { "epoch": 1.81, "learning_rate": 1.9821286207812918e-05, "loss": 0.6113, "step": 163530 }, { "epoch": 1.81, "learning_rate": 1.9820363480674402e-05, "loss": 0.6022, "step": 163535 }, { "epoch": 1.81, "learning_rate": 1.9819440753535893e-05, "loss": 0.6133, "step": 163540 }, { "epoch": 1.81, "learning_rate": 1.9818518026397378e-05, "loss": 0.6144, "step": 163545 }, { "epoch": 1.81, "learning_rate": 1.981759529925887e-05, "loss": 0.6166, "step": 163550 }, { "epoch": 1.81, "learning_rate": 1.9816672572120354e-05, "loss": 0.6315, "step": 163555 }, { "epoch": 1.81, "learning_rate": 1.981574984498184e-05, "loss": 0.6023, "step": 163560 }, { "epoch": 1.81, "learning_rate": 1.981482711784333e-05, "loss": 0.6208, "step": 163565 }, { "epoch": 1.81, "learning_rate": 1.9813904390704817e-05, "loss": 0.6126, "step": 163570 }, { "epoch": 1.81, "learning_rate": 1.9812981663566305e-05, "loss": 0.6439, "step": 163575 }, { "epoch": 1.81, "learning_rate": 1.981205893642779e-05, "loss": 0.6238, "step": 163580 }, { "epoch": 1.81, "learning_rate": 1.981113620928928e-05, "loss": 0.6201, "step": 163585 }, { "epoch": 1.81, "learning_rate": 1.9810213482150765e-05, "loss": 0.6441, "step": 163590 }, { "epoch": 1.81, "learning_rate": 1.9809290755012256e-05, "loss": 0.6637, "step": 163595 }, { "epoch": 1.81, "learning_rate": 1.980836802787374e-05, "loss": 0.579, "step": 163600 }, { "epoch": 1.81, "learning_rate": 1.9807445300735232e-05, "loss": 0.5974, "step": 163605 }, { "epoch": 1.81, "learning_rate": 1.9806522573596717e-05, "loss": 0.589, "step": 163610 }, { "epoch": 1.81, "learning_rate": 1.9805599846458204e-05, "loss": 0.6245, "step": 163615 }, { "epoch": 1.81, "learning_rate": 1.9804677119319692e-05, "loss": 0.5674, "step": 163620 }, { "epoch": 1.81, "learning_rate": 1.980375439218118e-05, "loss": 0.6379, "step": 163625 }, { "epoch": 1.81, "learning_rate": 1.9802831665042668e-05, "loss": 0.6415, "step": 163630 }, { "epoch": 1.81, "learning_rate": 1.9801908937904156e-05, "loss": 0.6288, "step": 163635 }, { "epoch": 1.81, "learning_rate": 1.9800986210765644e-05, "loss": 0.5596, "step": 163640 }, { "epoch": 1.81, "learning_rate": 1.9800063483627128e-05, "loss": 0.6563, "step": 163645 }, { "epoch": 1.81, "learning_rate": 1.979914075648862e-05, "loss": 0.5978, "step": 163650 }, { "epoch": 1.81, "learning_rate": 1.9798218029350104e-05, "loss": 0.629, "step": 163655 }, { "epoch": 1.81, "learning_rate": 1.9797295302211595e-05, "loss": 0.5652, "step": 163660 }, { "epoch": 1.81, "learning_rate": 1.979637257507308e-05, "loss": 0.6335, "step": 163665 }, { "epoch": 1.81, "learning_rate": 1.9795449847934567e-05, "loss": 0.6391, "step": 163670 }, { "epoch": 1.81, "learning_rate": 1.9794527120796055e-05, "loss": 0.6133, "step": 163675 }, { "epoch": 1.81, "learning_rate": 1.9793604393657543e-05, "loss": 0.655, "step": 163680 }, { "epoch": 1.81, "learning_rate": 1.979268166651903e-05, "loss": 0.5643, "step": 163685 }, { "epoch": 1.81, "learning_rate": 1.979175893938052e-05, "loss": 0.6137, "step": 163690 }, { "epoch": 1.81, "learning_rate": 1.9790836212242007e-05, "loss": 0.6331, "step": 163695 }, { "epoch": 1.81, "learning_rate": 1.9789913485103494e-05, "loss": 0.6197, "step": 163700 }, { "epoch": 1.81, "learning_rate": 1.9788990757964982e-05, "loss": 0.6303, "step": 163705 }, { "epoch": 1.81, "learning_rate": 1.978806803082647e-05, "loss": 0.6186, "step": 163710 }, { "epoch": 1.81, "learning_rate": 1.9787145303687958e-05, "loss": 0.6343, "step": 163715 }, { "epoch": 1.81, "learning_rate": 1.9786222576549442e-05, "loss": 0.6611, "step": 163720 }, { "epoch": 1.81, "learning_rate": 1.9785299849410934e-05, "loss": 0.629, "step": 163725 }, { "epoch": 1.81, "learning_rate": 1.9784377122272418e-05, "loss": 0.5744, "step": 163730 }, { "epoch": 1.81, "learning_rate": 1.9783454395133906e-05, "loss": 0.6666, "step": 163735 }, { "epoch": 1.81, "learning_rate": 1.9782531667995394e-05, "loss": 0.5856, "step": 163740 }, { "epoch": 1.81, "learning_rate": 1.978160894085688e-05, "loss": 0.5983, "step": 163745 }, { "epoch": 1.81, "learning_rate": 1.978068621371837e-05, "loss": 0.6032, "step": 163750 }, { "epoch": 1.81, "learning_rate": 1.9779763486579857e-05, "loss": 0.6012, "step": 163755 }, { "epoch": 1.81, "learning_rate": 1.9778840759441345e-05, "loss": 0.5984, "step": 163760 }, { "epoch": 1.81, "learning_rate": 1.9777918032302833e-05, "loss": 0.6246, "step": 163765 }, { "epoch": 1.81, "learning_rate": 1.977699530516432e-05, "loss": 0.6265, "step": 163770 }, { "epoch": 1.81, "learning_rate": 1.977607257802581e-05, "loss": 0.6226, "step": 163775 }, { "epoch": 1.81, "learning_rate": 1.9775149850887297e-05, "loss": 0.6116, "step": 163780 }, { "epoch": 1.81, "learning_rate": 1.9774227123748784e-05, "loss": 0.6628, "step": 163785 }, { "epoch": 1.81, "learning_rate": 1.977330439661027e-05, "loss": 0.671, "step": 163790 }, { "epoch": 1.81, "learning_rate": 1.9772381669471757e-05, "loss": 0.6722, "step": 163795 }, { "epoch": 1.81, "learning_rate": 1.9771458942333245e-05, "loss": 0.6047, "step": 163800 }, { "epoch": 1.81, "learning_rate": 1.9770536215194732e-05, "loss": 0.6437, "step": 163805 }, { "epoch": 1.81, "learning_rate": 1.976961348805622e-05, "loss": 0.6314, "step": 163810 }, { "epoch": 1.81, "learning_rate": 1.9768690760917708e-05, "loss": 0.611, "step": 163815 }, { "epoch": 1.81, "learning_rate": 1.9767768033779196e-05, "loss": 0.6138, "step": 163820 }, { "epoch": 1.81, "learning_rate": 1.9766845306640684e-05, "loss": 0.6456, "step": 163825 }, { "epoch": 1.81, "learning_rate": 1.9765922579502172e-05, "loss": 0.6515, "step": 163830 }, { "epoch": 1.81, "learning_rate": 1.976499985236366e-05, "loss": 0.6687, "step": 163835 }, { "epoch": 1.81, "learning_rate": 1.9764077125225147e-05, "loss": 0.5784, "step": 163840 }, { "epoch": 1.81, "learning_rate": 1.9763154398086632e-05, "loss": 0.6248, "step": 163845 }, { "epoch": 1.81, "learning_rate": 1.9762231670948123e-05, "loss": 0.5967, "step": 163850 }, { "epoch": 1.81, "learning_rate": 1.9761308943809608e-05, "loss": 0.6521, "step": 163855 }, { "epoch": 1.81, "learning_rate": 1.97603862166711e-05, "loss": 0.5816, "step": 163860 }, { "epoch": 1.81, "learning_rate": 1.9759463489532583e-05, "loss": 0.6172, "step": 163865 }, { "epoch": 1.81, "learning_rate": 1.9758540762394075e-05, "loss": 0.6309, "step": 163870 }, { "epoch": 1.81, "learning_rate": 1.975761803525556e-05, "loss": 0.6468, "step": 163875 }, { "epoch": 1.81, "learning_rate": 1.9756695308117047e-05, "loss": 0.586, "step": 163880 }, { "epoch": 1.81, "learning_rate": 1.9755772580978535e-05, "loss": 0.5803, "step": 163885 }, { "epoch": 1.81, "learning_rate": 1.9754849853840023e-05, "loss": 0.6818, "step": 163890 }, { "epoch": 1.81, "learning_rate": 1.975392712670151e-05, "loss": 0.6489, "step": 163895 }, { "epoch": 1.81, "learning_rate": 1.9753004399562998e-05, "loss": 0.6825, "step": 163900 }, { "epoch": 1.81, "learning_rate": 1.9752081672424486e-05, "loss": 0.6193, "step": 163905 }, { "epoch": 1.81, "learning_rate": 1.975115894528597e-05, "loss": 0.6331, "step": 163910 }, { "epoch": 1.81, "learning_rate": 1.9750236218147462e-05, "loss": 0.6288, "step": 163915 }, { "epoch": 1.82, "learning_rate": 1.9749313491008946e-05, "loss": 0.6432, "step": 163920 }, { "epoch": 1.82, "learning_rate": 1.9748390763870437e-05, "loss": 0.636, "step": 163925 }, { "epoch": 1.82, "learning_rate": 1.9747468036731922e-05, "loss": 0.6184, "step": 163930 }, { "epoch": 1.82, "learning_rate": 1.9746545309593413e-05, "loss": 0.6238, "step": 163935 }, { "epoch": 1.82, "learning_rate": 1.9745622582454898e-05, "loss": 0.6048, "step": 163940 }, { "epoch": 1.82, "learning_rate": 1.9744699855316385e-05, "loss": 0.6282, "step": 163945 }, { "epoch": 1.82, "learning_rate": 1.9743777128177873e-05, "loss": 0.6304, "step": 163950 }, { "epoch": 1.82, "learning_rate": 1.974285440103936e-05, "loss": 0.6559, "step": 163955 }, { "epoch": 1.82, "learning_rate": 1.974193167390085e-05, "loss": 0.5944, "step": 163960 }, { "epoch": 1.82, "learning_rate": 1.9741008946762333e-05, "loss": 0.5851, "step": 163965 }, { "epoch": 1.82, "learning_rate": 1.9740086219623825e-05, "loss": 0.6174, "step": 163970 }, { "epoch": 1.82, "learning_rate": 1.973916349248531e-05, "loss": 0.6557, "step": 163975 }, { "epoch": 1.82, "learning_rate": 1.97382407653468e-05, "loss": 0.6248, "step": 163980 }, { "epoch": 1.82, "learning_rate": 1.9737318038208285e-05, "loss": 0.6251, "step": 163985 }, { "epoch": 1.82, "learning_rate": 1.9736395311069776e-05, "loss": 0.6557, "step": 163990 }, { "epoch": 1.82, "learning_rate": 1.973547258393126e-05, "loss": 0.5621, "step": 163995 }, { "epoch": 1.82, "learning_rate": 1.973454985679275e-05, "loss": 0.62, "step": 164000 }, { "epoch": 1.82, "eval_loss": 0.5984373688697815, "eval_runtime": 69.1516, "eval_samples_per_second": 28.922, "eval_steps_per_second": 14.461, "step": 164000 }, { "epoch": 1.82, "learning_rate": 1.9733627129654236e-05, "loss": 0.5849, "step": 164005 }, { "epoch": 1.82, "learning_rate": 1.9732704402515724e-05, "loss": 0.6382, "step": 164010 }, { "epoch": 1.82, "learning_rate": 1.9731781675377212e-05, "loss": 0.6153, "step": 164015 }, { "epoch": 1.82, "learning_rate": 1.97308589482387e-05, "loss": 0.5883, "step": 164020 }, { "epoch": 1.82, "learning_rate": 1.9729936221100188e-05, "loss": 0.6293, "step": 164025 }, { "epoch": 1.82, "learning_rate": 1.9729013493961672e-05, "loss": 0.6408, "step": 164030 }, { "epoch": 1.82, "learning_rate": 1.9728090766823163e-05, "loss": 0.5931, "step": 164035 }, { "epoch": 1.82, "learning_rate": 1.9727168039684648e-05, "loss": 0.6134, "step": 164040 }, { "epoch": 1.82, "learning_rate": 1.972624531254614e-05, "loss": 0.6305, "step": 164045 }, { "epoch": 1.82, "learning_rate": 1.9725322585407624e-05, "loss": 0.6568, "step": 164050 }, { "epoch": 1.82, "learning_rate": 1.9724399858269115e-05, "loss": 0.6313, "step": 164055 }, { "epoch": 1.82, "learning_rate": 1.97234771311306e-05, "loss": 0.6172, "step": 164060 }, { "epoch": 1.82, "learning_rate": 1.9722554403992087e-05, "loss": 0.6608, "step": 164065 }, { "epoch": 1.82, "learning_rate": 1.9721631676853575e-05, "loss": 0.5705, "step": 164070 }, { "epoch": 1.82, "learning_rate": 1.9720708949715063e-05, "loss": 0.5723, "step": 164075 }, { "epoch": 1.82, "learning_rate": 1.971978622257655e-05, "loss": 0.6481, "step": 164080 }, { "epoch": 1.82, "learning_rate": 1.971886349543804e-05, "loss": 0.6897, "step": 164085 }, { "epoch": 1.82, "learning_rate": 1.9717940768299526e-05, "loss": 0.6431, "step": 164090 }, { "epoch": 1.82, "learning_rate": 1.9717018041161014e-05, "loss": 0.6353, "step": 164095 }, { "epoch": 1.82, "learning_rate": 1.9716095314022502e-05, "loss": 0.6016, "step": 164100 }, { "epoch": 1.82, "learning_rate": 1.9715172586883986e-05, "loss": 0.6241, "step": 164105 }, { "epoch": 1.82, "learning_rate": 1.9714249859745478e-05, "loss": 0.5568, "step": 164110 }, { "epoch": 1.82, "learning_rate": 1.9713327132606962e-05, "loss": 0.5843, "step": 164115 }, { "epoch": 1.82, "learning_rate": 1.971240440546845e-05, "loss": 0.6275, "step": 164120 }, { "epoch": 1.82, "learning_rate": 1.9711481678329938e-05, "loss": 0.6388, "step": 164125 }, { "epoch": 1.82, "learning_rate": 1.9710558951191426e-05, "loss": 0.6184, "step": 164130 }, { "epoch": 1.82, "learning_rate": 1.9709636224052914e-05, "loss": 0.6712, "step": 164135 }, { "epoch": 1.82, "learning_rate": 1.97087134969144e-05, "loss": 0.662, "step": 164140 }, { "epoch": 1.82, "learning_rate": 1.970779076977589e-05, "loss": 0.6485, "step": 164145 }, { "epoch": 1.82, "learning_rate": 1.9706868042637377e-05, "loss": 0.6109, "step": 164150 }, { "epoch": 1.82, "learning_rate": 1.9705945315498865e-05, "loss": 0.7092, "step": 164155 }, { "epoch": 1.82, "learning_rate": 1.9705022588360353e-05, "loss": 0.5592, "step": 164160 }, { "epoch": 1.82, "learning_rate": 1.970409986122184e-05, "loss": 0.6153, "step": 164165 }, { "epoch": 1.82, "learning_rate": 1.970317713408333e-05, "loss": 0.669, "step": 164170 }, { "epoch": 1.82, "learning_rate": 1.9702254406944813e-05, "loss": 0.653, "step": 164175 }, { "epoch": 1.82, "learning_rate": 1.97013316798063e-05, "loss": 0.5898, "step": 164180 }, { "epoch": 1.82, "learning_rate": 1.970040895266779e-05, "loss": 0.6731, "step": 164185 }, { "epoch": 1.82, "learning_rate": 1.9699486225529277e-05, "loss": 0.5653, "step": 164190 }, { "epoch": 1.82, "learning_rate": 1.9698563498390764e-05, "loss": 0.5954, "step": 164195 }, { "epoch": 1.82, "learning_rate": 1.9697640771252252e-05, "loss": 0.5772, "step": 164200 }, { "epoch": 1.82, "learning_rate": 1.969671804411374e-05, "loss": 0.6135, "step": 164205 }, { "epoch": 1.82, "learning_rate": 1.9695795316975228e-05, "loss": 0.6195, "step": 164210 }, { "epoch": 1.82, "learning_rate": 1.9694872589836716e-05, "loss": 0.6275, "step": 164215 }, { "epoch": 1.82, "learning_rate": 1.9693949862698204e-05, "loss": 0.6165, "step": 164220 }, { "epoch": 1.82, "learning_rate": 1.969302713555969e-05, "loss": 0.6872, "step": 164225 }, { "epoch": 1.82, "learning_rate": 1.9692104408421176e-05, "loss": 0.6229, "step": 164230 }, { "epoch": 1.82, "learning_rate": 1.9691181681282667e-05, "loss": 0.6884, "step": 164235 }, { "epoch": 1.82, "learning_rate": 1.969025895414415e-05, "loss": 0.5721, "step": 164240 }, { "epoch": 1.82, "learning_rate": 1.9689336227005643e-05, "loss": 0.6583, "step": 164245 }, { "epoch": 1.82, "learning_rate": 1.9688413499867127e-05, "loss": 0.6324, "step": 164250 }, { "epoch": 1.82, "learning_rate": 1.9687490772728615e-05, "loss": 0.6113, "step": 164255 }, { "epoch": 1.82, "learning_rate": 1.9686568045590103e-05, "loss": 0.6281, "step": 164260 }, { "epoch": 1.82, "learning_rate": 1.968564531845159e-05, "loss": 0.6208, "step": 164265 }, { "epoch": 1.82, "learning_rate": 1.968472259131308e-05, "loss": 0.616, "step": 164270 }, { "epoch": 1.82, "learning_rate": 1.9683799864174567e-05, "loss": 0.5824, "step": 164275 }, { "epoch": 1.82, "learning_rate": 1.9682877137036054e-05, "loss": 0.6202, "step": 164280 }, { "epoch": 1.82, "learning_rate": 1.9681954409897542e-05, "loss": 0.6296, "step": 164285 }, { "epoch": 1.82, "learning_rate": 1.968103168275903e-05, "loss": 0.6518, "step": 164290 }, { "epoch": 1.82, "learning_rate": 1.9680108955620515e-05, "loss": 0.6116, "step": 164295 }, { "epoch": 1.82, "learning_rate": 1.9679186228482006e-05, "loss": 0.6475, "step": 164300 }, { "epoch": 1.82, "learning_rate": 1.967826350134349e-05, "loss": 0.6285, "step": 164305 }, { "epoch": 1.82, "learning_rate": 1.967734077420498e-05, "loss": 0.6489, "step": 164310 }, { "epoch": 1.82, "learning_rate": 1.9676418047066466e-05, "loss": 0.6535, "step": 164315 }, { "epoch": 1.82, "learning_rate": 1.9675495319927957e-05, "loss": 0.6335, "step": 164320 }, { "epoch": 1.82, "learning_rate": 1.967457259278944e-05, "loss": 0.6304, "step": 164325 }, { "epoch": 1.82, "learning_rate": 1.967364986565093e-05, "loss": 0.611, "step": 164330 }, { "epoch": 1.82, "learning_rate": 1.9672727138512417e-05, "loss": 0.6542, "step": 164335 }, { "epoch": 1.82, "learning_rate": 1.9671804411373905e-05, "loss": 0.6486, "step": 164340 }, { "epoch": 1.82, "learning_rate": 1.9670881684235393e-05, "loss": 0.6346, "step": 164345 }, { "epoch": 1.82, "learning_rate": 1.9669958957096878e-05, "loss": 0.6225, "step": 164350 }, { "epoch": 1.82, "learning_rate": 1.966903622995837e-05, "loss": 0.5978, "step": 164355 }, { "epoch": 1.82, "learning_rate": 1.9668113502819853e-05, "loss": 0.6267, "step": 164360 }, { "epoch": 1.82, "learning_rate": 1.9667190775681344e-05, "loss": 0.6621, "step": 164365 }, { "epoch": 1.82, "learning_rate": 1.966626804854283e-05, "loss": 0.6168, "step": 164370 }, { "epoch": 1.82, "learning_rate": 1.966534532140432e-05, "loss": 0.6406, "step": 164375 }, { "epoch": 1.82, "learning_rate": 1.9664422594265805e-05, "loss": 0.6545, "step": 164380 }, { "epoch": 1.82, "learning_rate": 1.9663499867127292e-05, "loss": 0.6403, "step": 164385 }, { "epoch": 1.82, "learning_rate": 1.966257713998878e-05, "loss": 0.5973, "step": 164390 }, { "epoch": 1.82, "learning_rate": 1.9661654412850268e-05, "loss": 0.5841, "step": 164395 }, { "epoch": 1.82, "learning_rate": 1.9660731685711756e-05, "loss": 0.635, "step": 164400 }, { "epoch": 1.82, "learning_rate": 1.965980895857324e-05, "loss": 0.6475, "step": 164405 }, { "epoch": 1.82, "learning_rate": 1.965888623143473e-05, "loss": 0.6459, "step": 164410 }, { "epoch": 1.82, "learning_rate": 1.9657963504296216e-05, "loss": 0.5931, "step": 164415 }, { "epoch": 1.82, "learning_rate": 1.9657040777157707e-05, "loss": 0.6639, "step": 164420 }, { "epoch": 1.82, "learning_rate": 1.9656118050019192e-05, "loss": 0.6131, "step": 164425 }, { "epoch": 1.82, "learning_rate": 1.9655195322880683e-05, "loss": 0.5824, "step": 164430 }, { "epoch": 1.82, "learning_rate": 1.9654272595742168e-05, "loss": 0.6373, "step": 164435 }, { "epoch": 1.82, "learning_rate": 1.965334986860366e-05, "loss": 0.6674, "step": 164440 }, { "epoch": 1.82, "learning_rate": 1.9652427141465143e-05, "loss": 0.6711, "step": 164445 }, { "epoch": 1.82, "learning_rate": 1.965150441432663e-05, "loss": 0.5782, "step": 164450 }, { "epoch": 1.82, "learning_rate": 1.965058168718812e-05, "loss": 0.6133, "step": 164455 }, { "epoch": 1.82, "learning_rate": 1.9649658960049607e-05, "loss": 0.6208, "step": 164460 }, { "epoch": 1.82, "learning_rate": 1.9648736232911095e-05, "loss": 0.649, "step": 164465 }, { "epoch": 1.82, "learning_rate": 1.9647813505772582e-05, "loss": 0.6154, "step": 164470 }, { "epoch": 1.82, "learning_rate": 1.964689077863407e-05, "loss": 0.6639, "step": 164475 }, { "epoch": 1.82, "learning_rate": 1.9645968051495555e-05, "loss": 0.6137, "step": 164480 }, { "epoch": 1.82, "learning_rate": 1.9645045324357046e-05, "loss": 0.5718, "step": 164485 }, { "epoch": 1.82, "learning_rate": 1.964412259721853e-05, "loss": 0.5901, "step": 164490 }, { "epoch": 1.82, "learning_rate": 1.9643199870080022e-05, "loss": 0.6499, "step": 164495 }, { "epoch": 1.82, "learning_rate": 1.9642277142941506e-05, "loss": 0.6425, "step": 164500 }, { "epoch": 1.82, "learning_rate": 1.9641354415802994e-05, "loss": 0.6165, "step": 164505 }, { "epoch": 1.82, "learning_rate": 1.9640431688664482e-05, "loss": 0.6724, "step": 164510 }, { "epoch": 1.82, "learning_rate": 1.963950896152597e-05, "loss": 0.5954, "step": 164515 }, { "epoch": 1.82, "learning_rate": 1.9638586234387458e-05, "loss": 0.6433, "step": 164520 }, { "epoch": 1.82, "learning_rate": 1.9637663507248945e-05, "loss": 0.6399, "step": 164525 }, { "epoch": 1.82, "learning_rate": 1.9636740780110433e-05, "loss": 0.6807, "step": 164530 }, { "epoch": 1.82, "learning_rate": 1.963581805297192e-05, "loss": 0.6317, "step": 164535 }, { "epoch": 1.82, "learning_rate": 1.963489532583341e-05, "loss": 0.5919, "step": 164540 }, { "epoch": 1.82, "learning_rate": 1.9633972598694897e-05, "loss": 0.6406, "step": 164545 }, { "epoch": 1.82, "learning_rate": 1.9633049871556385e-05, "loss": 0.647, "step": 164550 }, { "epoch": 1.82, "learning_rate": 1.963212714441787e-05, "loss": 0.5704, "step": 164555 }, { "epoch": 1.82, "learning_rate": 1.9631204417279357e-05, "loss": 0.6422, "step": 164560 }, { "epoch": 1.82, "learning_rate": 1.9630281690140845e-05, "loss": 0.6495, "step": 164565 }, { "epoch": 1.82, "learning_rate": 1.9629358963002333e-05, "loss": 0.6496, "step": 164570 }, { "epoch": 1.82, "learning_rate": 1.962843623586382e-05, "loss": 0.6788, "step": 164575 }, { "epoch": 1.82, "learning_rate": 1.962751350872531e-05, "loss": 0.6166, "step": 164580 }, { "epoch": 1.82, "learning_rate": 1.9626590781586796e-05, "loss": 0.6182, "step": 164585 }, { "epoch": 1.82, "learning_rate": 1.9625668054448284e-05, "loss": 0.5886, "step": 164590 }, { "epoch": 1.82, "learning_rate": 1.9624745327309772e-05, "loss": 0.6547, "step": 164595 }, { "epoch": 1.82, "learning_rate": 1.962382260017126e-05, "loss": 0.5673, "step": 164600 }, { "epoch": 1.82, "learning_rate": 1.9622899873032748e-05, "loss": 0.6015, "step": 164605 }, { "epoch": 1.82, "learning_rate": 1.9621977145894235e-05, "loss": 0.6345, "step": 164610 }, { "epoch": 1.82, "learning_rate": 1.962105441875572e-05, "loss": 0.658, "step": 164615 }, { "epoch": 1.82, "learning_rate": 1.962013169161721e-05, "loss": 0.619, "step": 164620 }, { "epoch": 1.82, "learning_rate": 1.9619208964478696e-05, "loss": 0.64, "step": 164625 }, { "epoch": 1.82, "learning_rate": 1.9618286237340183e-05, "loss": 0.6312, "step": 164630 }, { "epoch": 1.82, "learning_rate": 1.961736351020167e-05, "loss": 0.5816, "step": 164635 }, { "epoch": 1.82, "learning_rate": 1.961644078306316e-05, "loss": 0.6245, "step": 164640 }, { "epoch": 1.82, "learning_rate": 1.9615518055924647e-05, "loss": 0.5737, "step": 164645 }, { "epoch": 1.82, "learning_rate": 1.9614595328786135e-05, "loss": 0.6318, "step": 164650 }, { "epoch": 1.82, "learning_rate": 1.9613672601647623e-05, "loss": 0.5979, "step": 164655 }, { "epoch": 1.82, "learning_rate": 1.961274987450911e-05, "loss": 0.6091, "step": 164660 }, { "epoch": 1.82, "learning_rate": 1.96118271473706e-05, "loss": 0.608, "step": 164665 }, { "epoch": 1.82, "learning_rate": 1.9610904420232086e-05, "loss": 0.6532, "step": 164670 }, { "epoch": 1.82, "learning_rate": 1.9609981693093574e-05, "loss": 0.5821, "step": 164675 }, { "epoch": 1.82, "learning_rate": 1.960905896595506e-05, "loss": 0.6128, "step": 164680 }, { "epoch": 1.82, "learning_rate": 1.960813623881655e-05, "loss": 0.6133, "step": 164685 }, { "epoch": 1.82, "learning_rate": 1.9607213511678034e-05, "loss": 0.6302, "step": 164690 }, { "epoch": 1.82, "learning_rate": 1.9606290784539526e-05, "loss": 0.5868, "step": 164695 }, { "epoch": 1.82, "learning_rate": 1.960536805740101e-05, "loss": 0.6462, "step": 164700 }, { "epoch": 1.82, "learning_rate": 1.96044453302625e-05, "loss": 0.5508, "step": 164705 }, { "epoch": 1.82, "learning_rate": 1.9603522603123986e-05, "loss": 0.6029, "step": 164710 }, { "epoch": 1.82, "learning_rate": 1.9602599875985474e-05, "loss": 0.5992, "step": 164715 }, { "epoch": 1.82, "learning_rate": 1.960167714884696e-05, "loss": 0.6114, "step": 164720 }, { "epoch": 1.82, "learning_rate": 1.960075442170845e-05, "loss": 0.6238, "step": 164725 }, { "epoch": 1.82, "learning_rate": 1.9599831694569937e-05, "loss": 0.6296, "step": 164730 }, { "epoch": 1.82, "learning_rate": 1.959890896743142e-05, "loss": 0.6247, "step": 164735 }, { "epoch": 1.82, "learning_rate": 1.9597986240292913e-05, "loss": 0.6429, "step": 164740 }, { "epoch": 1.82, "learning_rate": 1.9597063513154397e-05, "loss": 0.6385, "step": 164745 }, { "epoch": 1.82, "learning_rate": 1.959614078601589e-05, "loss": 0.6228, "step": 164750 }, { "epoch": 1.82, "learning_rate": 1.9595218058877373e-05, "loss": 0.6112, "step": 164755 }, { "epoch": 1.82, "learning_rate": 1.9594295331738864e-05, "loss": 0.6133, "step": 164760 }, { "epoch": 1.82, "learning_rate": 1.959337260460035e-05, "loss": 0.681, "step": 164765 }, { "epoch": 1.82, "learning_rate": 1.9592449877461836e-05, "loss": 0.5669, "step": 164770 }, { "epoch": 1.82, "learning_rate": 1.9591527150323324e-05, "loss": 0.6115, "step": 164775 }, { "epoch": 1.82, "learning_rate": 1.9590604423184812e-05, "loss": 0.6339, "step": 164780 }, { "epoch": 1.82, "learning_rate": 1.95896816960463e-05, "loss": 0.5868, "step": 164785 }, { "epoch": 1.82, "learning_rate": 1.9588758968907784e-05, "loss": 0.5507, "step": 164790 }, { "epoch": 1.82, "learning_rate": 1.9587836241769276e-05, "loss": 0.6513, "step": 164795 }, { "epoch": 1.82, "learning_rate": 1.958691351463076e-05, "loss": 0.6028, "step": 164800 }, { "epoch": 1.82, "learning_rate": 1.958599078749225e-05, "loss": 0.5962, "step": 164805 }, { "epoch": 1.82, "learning_rate": 1.9585068060353736e-05, "loss": 0.6072, "step": 164810 }, { "epoch": 1.82, "learning_rate": 1.9584145333215227e-05, "loss": 0.6745, "step": 164815 }, { "epoch": 1.83, "learning_rate": 1.958322260607671e-05, "loss": 0.6012, "step": 164820 }, { "epoch": 1.83, "learning_rate": 1.9582299878938203e-05, "loss": 0.6059, "step": 164825 }, { "epoch": 1.83, "learning_rate": 1.9581377151799687e-05, "loss": 0.5779, "step": 164830 }, { "epoch": 1.83, "learning_rate": 1.9580454424661175e-05, "loss": 0.5948, "step": 164835 }, { "epoch": 1.83, "learning_rate": 1.9579531697522663e-05, "loss": 0.5737, "step": 164840 }, { "epoch": 1.83, "learning_rate": 1.957860897038415e-05, "loss": 0.5802, "step": 164845 }, { "epoch": 1.83, "learning_rate": 1.957768624324564e-05, "loss": 0.6637, "step": 164850 }, { "epoch": 1.83, "learning_rate": 1.9576763516107127e-05, "loss": 0.6286, "step": 164855 }, { "epoch": 1.83, "learning_rate": 1.9575840788968614e-05, "loss": 0.6768, "step": 164860 }, { "epoch": 1.83, "learning_rate": 1.95749180618301e-05, "loss": 0.6448, "step": 164865 }, { "epoch": 1.83, "learning_rate": 1.957399533469159e-05, "loss": 0.6228, "step": 164870 }, { "epoch": 1.83, "learning_rate": 1.9573072607553075e-05, "loss": 0.6005, "step": 164875 }, { "epoch": 1.83, "learning_rate": 1.9572149880414566e-05, "loss": 0.6036, "step": 164880 }, { "epoch": 1.83, "learning_rate": 1.957122715327605e-05, "loss": 0.6299, "step": 164885 }, { "epoch": 1.83, "learning_rate": 1.9570304426137538e-05, "loss": 0.6091, "step": 164890 }, { "epoch": 1.83, "learning_rate": 1.9569381698999026e-05, "loss": 0.6401, "step": 164895 }, { "epoch": 1.83, "learning_rate": 1.9568458971860514e-05, "loss": 0.6299, "step": 164900 }, { "epoch": 1.83, "learning_rate": 1.9567536244722e-05, "loss": 0.6165, "step": 164905 }, { "epoch": 1.83, "learning_rate": 1.956661351758349e-05, "loss": 0.6577, "step": 164910 }, { "epoch": 1.83, "learning_rate": 1.9565690790444977e-05, "loss": 0.5788, "step": 164915 }, { "epoch": 1.83, "learning_rate": 1.9564768063306465e-05, "loss": 0.6484, "step": 164920 }, { "epoch": 1.83, "learning_rate": 1.9563845336167953e-05, "loss": 0.6609, "step": 164925 }, { "epoch": 1.83, "learning_rate": 1.956292260902944e-05, "loss": 0.5751, "step": 164930 }, { "epoch": 1.83, "learning_rate": 1.956199988189093e-05, "loss": 0.6951, "step": 164935 }, { "epoch": 1.83, "learning_rate": 1.9561077154752413e-05, "loss": 0.62, "step": 164940 }, { "epoch": 1.83, "learning_rate": 1.95601544276139e-05, "loss": 0.5705, "step": 164945 }, { "epoch": 1.83, "learning_rate": 1.955923170047539e-05, "loss": 0.6625, "step": 164950 }, { "epoch": 1.83, "learning_rate": 1.9558308973336877e-05, "loss": 0.6195, "step": 164955 }, { "epoch": 1.83, "learning_rate": 1.9557386246198365e-05, "loss": 0.6678, "step": 164960 }, { "epoch": 1.83, "learning_rate": 1.9556463519059852e-05, "loss": 0.5853, "step": 164965 }, { "epoch": 1.83, "learning_rate": 1.955554079192134e-05, "loss": 0.5856, "step": 164970 }, { "epoch": 1.83, "learning_rate": 1.9554618064782828e-05, "loss": 0.5855, "step": 164975 }, { "epoch": 1.83, "learning_rate": 1.9553695337644316e-05, "loss": 0.621, "step": 164980 }, { "epoch": 1.83, "learning_rate": 1.9552772610505804e-05, "loss": 0.5944, "step": 164985 }, { "epoch": 1.83, "learning_rate": 1.955184988336729e-05, "loss": 0.6112, "step": 164990 }, { "epoch": 1.83, "learning_rate": 1.955092715622878e-05, "loss": 0.6165, "step": 164995 }, { "epoch": 1.83, "learning_rate": 1.9550004429090264e-05, "loss": 0.5541, "step": 165000 }, { "epoch": 1.83, "eval_loss": 0.6003308892250061, "eval_runtime": 69.1413, "eval_samples_per_second": 28.926, "eval_steps_per_second": 14.463, "step": 165000 }, { "epoch": 1.83, "learning_rate": 1.9549081701951755e-05, "loss": 0.614, "step": 165005 }, { "epoch": 1.83, "learning_rate": 1.954815897481324e-05, "loss": 0.6826, "step": 165010 }, { "epoch": 1.83, "learning_rate": 1.9547236247674727e-05, "loss": 0.582, "step": 165015 }, { "epoch": 1.83, "learning_rate": 1.9546313520536215e-05, "loss": 0.6318, "step": 165020 }, { "epoch": 1.83, "learning_rate": 1.9545390793397703e-05, "loss": 0.603, "step": 165025 }, { "epoch": 1.83, "learning_rate": 1.954446806625919e-05, "loss": 0.5828, "step": 165030 }, { "epoch": 1.83, "learning_rate": 1.954354533912068e-05, "loss": 0.6149, "step": 165035 }, { "epoch": 1.83, "learning_rate": 1.9542622611982167e-05, "loss": 0.6454, "step": 165040 }, { "epoch": 1.83, "learning_rate": 1.9541699884843655e-05, "loss": 0.6216, "step": 165045 }, { "epoch": 1.83, "learning_rate": 1.9540777157705142e-05, "loss": 0.6831, "step": 165050 }, { "epoch": 1.83, "learning_rate": 1.953985443056663e-05, "loss": 0.6466, "step": 165055 }, { "epoch": 1.83, "learning_rate": 1.9538931703428118e-05, "loss": 0.6205, "step": 165060 }, { "epoch": 1.83, "learning_rate": 1.9538008976289603e-05, "loss": 0.5897, "step": 165065 }, { "epoch": 1.83, "learning_rate": 1.9537086249151094e-05, "loss": 0.6283, "step": 165070 }, { "epoch": 1.83, "learning_rate": 1.9536163522012578e-05, "loss": 0.6388, "step": 165075 }, { "epoch": 1.83, "learning_rate": 1.953524079487407e-05, "loss": 0.5877, "step": 165080 }, { "epoch": 1.83, "learning_rate": 1.9534318067735554e-05, "loss": 0.5761, "step": 165085 }, { "epoch": 1.83, "learning_rate": 1.9533395340597042e-05, "loss": 0.6297, "step": 165090 }, { "epoch": 1.83, "learning_rate": 1.953247261345853e-05, "loss": 0.6671, "step": 165095 }, { "epoch": 1.83, "learning_rate": 1.9531549886320018e-05, "loss": 0.6068, "step": 165100 }, { "epoch": 1.83, "learning_rate": 1.9530627159181505e-05, "loss": 0.6348, "step": 165105 }, { "epoch": 1.83, "learning_rate": 1.9529704432042993e-05, "loss": 0.6039, "step": 165110 }, { "epoch": 1.83, "learning_rate": 1.952878170490448e-05, "loss": 0.5763, "step": 165115 }, { "epoch": 1.83, "learning_rate": 1.9527858977765966e-05, "loss": 0.5765, "step": 165120 }, { "epoch": 1.83, "learning_rate": 1.9526936250627457e-05, "loss": 0.6392, "step": 165125 }, { "epoch": 1.83, "learning_rate": 1.952601352348894e-05, "loss": 0.6418, "step": 165130 }, { "epoch": 1.83, "learning_rate": 1.9525090796350432e-05, "loss": 0.64, "step": 165135 }, { "epoch": 1.83, "learning_rate": 1.9524168069211917e-05, "loss": 0.6222, "step": 165140 }, { "epoch": 1.83, "learning_rate": 1.9523245342073408e-05, "loss": 0.6103, "step": 165145 }, { "epoch": 1.83, "learning_rate": 1.9522322614934893e-05, "loss": 0.6058, "step": 165150 }, { "epoch": 1.83, "learning_rate": 1.9521399887796384e-05, "loss": 0.5601, "step": 165155 }, { "epoch": 1.83, "learning_rate": 1.952047716065787e-05, "loss": 0.5592, "step": 165160 }, { "epoch": 1.83, "learning_rate": 1.9519554433519356e-05, "loss": 0.6392, "step": 165165 }, { "epoch": 1.83, "learning_rate": 1.9518631706380844e-05, "loss": 0.6462, "step": 165170 }, { "epoch": 1.83, "learning_rate": 1.951770897924233e-05, "loss": 0.6831, "step": 165175 }, { "epoch": 1.83, "learning_rate": 1.951678625210382e-05, "loss": 0.6218, "step": 165180 }, { "epoch": 1.83, "learning_rate": 1.9515863524965304e-05, "loss": 0.6137, "step": 165185 }, { "epoch": 1.83, "learning_rate": 1.9514940797826795e-05, "loss": 0.5824, "step": 165190 }, { "epoch": 1.83, "learning_rate": 1.951401807068828e-05, "loss": 0.6259, "step": 165195 }, { "epoch": 1.83, "learning_rate": 1.951309534354977e-05, "loss": 0.6187, "step": 165200 }, { "epoch": 1.83, "learning_rate": 1.9512172616411256e-05, "loss": 0.6761, "step": 165205 }, { "epoch": 1.83, "learning_rate": 1.9511249889272747e-05, "loss": 0.5842, "step": 165210 }, { "epoch": 1.83, "learning_rate": 1.951032716213423e-05, "loss": 0.6198, "step": 165215 }, { "epoch": 1.83, "learning_rate": 1.950940443499572e-05, "loss": 0.6046, "step": 165220 }, { "epoch": 1.83, "learning_rate": 1.9508481707857207e-05, "loss": 0.5833, "step": 165225 }, { "epoch": 1.83, "learning_rate": 1.9507558980718695e-05, "loss": 0.6656, "step": 165230 }, { "epoch": 1.83, "learning_rate": 1.9506636253580183e-05, "loss": 0.6456, "step": 165235 }, { "epoch": 1.83, "learning_rate": 1.9505713526441667e-05, "loss": 0.6576, "step": 165240 }, { "epoch": 1.83, "learning_rate": 1.950479079930316e-05, "loss": 0.6124, "step": 165245 }, { "epoch": 1.83, "learning_rate": 1.9503868072164643e-05, "loss": 0.6221, "step": 165250 }, { "epoch": 1.83, "learning_rate": 1.9502945345026134e-05, "loss": 0.5996, "step": 165255 }, { "epoch": 1.83, "learning_rate": 1.950202261788762e-05, "loss": 0.5879, "step": 165260 }, { "epoch": 1.83, "learning_rate": 1.950109989074911e-05, "loss": 0.5982, "step": 165265 }, { "epoch": 1.83, "learning_rate": 1.9500177163610594e-05, "loss": 0.5961, "step": 165270 }, { "epoch": 1.83, "learning_rate": 1.9499254436472082e-05, "loss": 0.61, "step": 165275 }, { "epoch": 1.83, "learning_rate": 1.949833170933357e-05, "loss": 0.6593, "step": 165280 }, { "epoch": 1.83, "learning_rate": 1.9497408982195058e-05, "loss": 0.6064, "step": 165285 }, { "epoch": 1.83, "learning_rate": 1.9496486255056546e-05, "loss": 0.5767, "step": 165290 }, { "epoch": 1.83, "learning_rate": 1.9495563527918033e-05, "loss": 0.6046, "step": 165295 }, { "epoch": 1.83, "learning_rate": 1.949464080077952e-05, "loss": 0.6318, "step": 165300 }, { "epoch": 1.83, "learning_rate": 1.949371807364101e-05, "loss": 0.5707, "step": 165305 }, { "epoch": 1.83, "learning_rate": 1.9492795346502497e-05, "loss": 0.6064, "step": 165310 }, { "epoch": 1.83, "learning_rate": 1.949187261936398e-05, "loss": 0.623, "step": 165315 }, { "epoch": 1.83, "learning_rate": 1.9490949892225473e-05, "loss": 0.6365, "step": 165320 }, { "epoch": 1.83, "learning_rate": 1.9490027165086957e-05, "loss": 0.6664, "step": 165325 }, { "epoch": 1.83, "learning_rate": 1.9489104437948445e-05, "loss": 0.6063, "step": 165330 }, { "epoch": 1.83, "learning_rate": 1.9488181710809933e-05, "loss": 0.6158, "step": 165335 }, { "epoch": 1.83, "learning_rate": 1.948725898367142e-05, "loss": 0.6824, "step": 165340 }, { "epoch": 1.83, "learning_rate": 1.948633625653291e-05, "loss": 0.6234, "step": 165345 }, { "epoch": 1.83, "learning_rate": 1.9485413529394396e-05, "loss": 0.6452, "step": 165350 }, { "epoch": 1.83, "learning_rate": 1.9484490802255884e-05, "loss": 0.6024, "step": 165355 }, { "epoch": 1.83, "learning_rate": 1.9483568075117372e-05, "loss": 0.5961, "step": 165360 }, { "epoch": 1.83, "learning_rate": 1.948264534797886e-05, "loss": 0.6457, "step": 165365 }, { "epoch": 1.83, "learning_rate": 1.9481722620840348e-05, "loss": 0.6488, "step": 165370 }, { "epoch": 1.83, "learning_rate": 1.9480799893701836e-05, "loss": 0.6971, "step": 165375 }, { "epoch": 1.83, "learning_rate": 1.9479877166563324e-05, "loss": 0.6281, "step": 165380 }, { "epoch": 1.83, "learning_rate": 1.947895443942481e-05, "loss": 0.6037, "step": 165385 }, { "epoch": 1.83, "learning_rate": 1.9478031712286296e-05, "loss": 0.5999, "step": 165390 }, { "epoch": 1.83, "learning_rate": 1.9477108985147784e-05, "loss": 0.5591, "step": 165395 }, { "epoch": 1.83, "learning_rate": 1.947618625800927e-05, "loss": 0.6133, "step": 165400 }, { "epoch": 1.83, "learning_rate": 1.947526353087076e-05, "loss": 0.6133, "step": 165405 }, { "epoch": 1.83, "learning_rate": 1.9474340803732247e-05, "loss": 0.6197, "step": 165410 }, { "epoch": 1.83, "learning_rate": 1.9473418076593735e-05, "loss": 0.6339, "step": 165415 }, { "epoch": 1.83, "learning_rate": 1.9472495349455223e-05, "loss": 0.6392, "step": 165420 }, { "epoch": 1.83, "learning_rate": 1.947157262231671e-05, "loss": 0.5729, "step": 165425 }, { "epoch": 1.83, "learning_rate": 1.94706498951782e-05, "loss": 0.637, "step": 165430 }, { "epoch": 1.83, "learning_rate": 1.9469727168039686e-05, "loss": 0.6111, "step": 165435 }, { "epoch": 1.83, "learning_rate": 1.9468804440901174e-05, "loss": 0.6206, "step": 165440 }, { "epoch": 1.83, "learning_rate": 1.9467881713762662e-05, "loss": 0.6417, "step": 165445 }, { "epoch": 1.83, "learning_rate": 1.9466958986624147e-05, "loss": 0.6183, "step": 165450 }, { "epoch": 1.83, "learning_rate": 1.9466036259485638e-05, "loss": 0.6104, "step": 165455 }, { "epoch": 1.83, "learning_rate": 1.9465113532347122e-05, "loss": 0.5624, "step": 165460 }, { "epoch": 1.83, "learning_rate": 1.946419080520861e-05, "loss": 0.5971, "step": 165465 }, { "epoch": 1.83, "learning_rate": 1.9463268078070098e-05, "loss": 0.6168, "step": 165470 }, { "epoch": 1.83, "learning_rate": 1.9462345350931586e-05, "loss": 0.6732, "step": 165475 }, { "epoch": 1.83, "learning_rate": 1.9461422623793074e-05, "loss": 0.6339, "step": 165480 }, { "epoch": 1.83, "learning_rate": 1.946049989665456e-05, "loss": 0.5794, "step": 165485 }, { "epoch": 1.83, "learning_rate": 1.945957716951605e-05, "loss": 0.6556, "step": 165490 }, { "epoch": 1.83, "learning_rate": 1.9458654442377537e-05, "loss": 0.5937, "step": 165495 }, { "epoch": 1.83, "learning_rate": 1.9457731715239025e-05, "loss": 0.6035, "step": 165500 }, { "epoch": 1.83, "learning_rate": 1.945680898810051e-05, "loss": 0.639, "step": 165505 }, { "epoch": 1.83, "learning_rate": 1.9455886260962e-05, "loss": 0.6098, "step": 165510 }, { "epoch": 1.83, "learning_rate": 1.9454963533823485e-05, "loss": 0.6208, "step": 165515 }, { "epoch": 1.83, "learning_rate": 1.9454040806684976e-05, "loss": 0.6333, "step": 165520 }, { "epoch": 1.83, "learning_rate": 1.945311807954646e-05, "loss": 0.6382, "step": 165525 }, { "epoch": 1.83, "learning_rate": 1.9452195352407952e-05, "loss": 0.6175, "step": 165530 }, { "epoch": 1.83, "learning_rate": 1.9451272625269437e-05, "loss": 0.606, "step": 165535 }, { "epoch": 1.83, "learning_rate": 1.9450349898130924e-05, "loss": 0.6036, "step": 165540 }, { "epoch": 1.83, "learning_rate": 1.9449427170992412e-05, "loss": 0.6369, "step": 165545 }, { "epoch": 1.83, "learning_rate": 1.94485044438539e-05, "loss": 0.5986, "step": 165550 }, { "epoch": 1.83, "learning_rate": 1.9447581716715388e-05, "loss": 0.6771, "step": 165555 }, { "epoch": 1.83, "learning_rate": 1.9446658989576873e-05, "loss": 0.6054, "step": 165560 }, { "epoch": 1.83, "learning_rate": 1.9445736262438364e-05, "loss": 0.6425, "step": 165565 }, { "epoch": 1.83, "learning_rate": 1.9444813535299848e-05, "loss": 0.5918, "step": 165570 }, { "epoch": 1.83, "learning_rate": 1.944389080816134e-05, "loss": 0.6107, "step": 165575 }, { "epoch": 1.83, "learning_rate": 1.9442968081022824e-05, "loss": 0.6055, "step": 165580 }, { "epoch": 1.83, "learning_rate": 1.9442045353884315e-05, "loss": 0.6245, "step": 165585 }, { "epoch": 1.83, "learning_rate": 1.94411226267458e-05, "loss": 0.6166, "step": 165590 }, { "epoch": 1.83, "learning_rate": 1.944019989960729e-05, "loss": 0.6256, "step": 165595 }, { "epoch": 1.83, "learning_rate": 1.9439277172468775e-05, "loss": 0.6006, "step": 165600 }, { "epoch": 1.83, "learning_rate": 1.9438354445330263e-05, "loss": 0.6084, "step": 165605 }, { "epoch": 1.83, "learning_rate": 1.943743171819175e-05, "loss": 0.5955, "step": 165610 }, { "epoch": 1.83, "learning_rate": 1.943650899105324e-05, "loss": 0.5906, "step": 165615 }, { "epoch": 1.83, "learning_rate": 1.9435586263914727e-05, "loss": 0.6512, "step": 165620 }, { "epoch": 1.83, "learning_rate": 1.943466353677621e-05, "loss": 0.6855, "step": 165625 }, { "epoch": 1.83, "learning_rate": 1.9433740809637702e-05, "loss": 0.5825, "step": 165630 }, { "epoch": 1.83, "learning_rate": 1.9432818082499187e-05, "loss": 0.6243, "step": 165635 }, { "epoch": 1.83, "learning_rate": 1.9431895355360678e-05, "loss": 0.5849, "step": 165640 }, { "epoch": 1.83, "learning_rate": 1.9430972628222163e-05, "loss": 0.6046, "step": 165645 }, { "epoch": 1.83, "learning_rate": 1.9430049901083654e-05, "loss": 0.5967, "step": 165650 }, { "epoch": 1.83, "learning_rate": 1.9429127173945138e-05, "loss": 0.6331, "step": 165655 }, { "epoch": 1.83, "learning_rate": 1.9428204446806626e-05, "loss": 0.5767, "step": 165660 }, { "epoch": 1.83, "learning_rate": 1.9427281719668114e-05, "loss": 0.5691, "step": 165665 }, { "epoch": 1.83, "learning_rate": 1.9426358992529602e-05, "loss": 0.594, "step": 165670 }, { "epoch": 1.83, "learning_rate": 1.942543626539109e-05, "loss": 0.5875, "step": 165675 }, { "epoch": 1.83, "learning_rate": 1.9424513538252577e-05, "loss": 0.6596, "step": 165680 }, { "epoch": 1.83, "learning_rate": 1.9423590811114065e-05, "loss": 0.6701, "step": 165685 }, { "epoch": 1.83, "learning_rate": 1.9422668083975553e-05, "loss": 0.6076, "step": 165690 }, { "epoch": 1.83, "learning_rate": 1.942174535683704e-05, "loss": 0.631, "step": 165695 }, { "epoch": 1.83, "learning_rate": 1.9420822629698525e-05, "loss": 0.6809, "step": 165700 }, { "epoch": 1.83, "learning_rate": 1.9419899902560017e-05, "loss": 0.6572, "step": 165705 }, { "epoch": 1.83, "learning_rate": 1.94189771754215e-05, "loss": 0.6548, "step": 165710 }, { "epoch": 1.83, "learning_rate": 1.941805444828299e-05, "loss": 0.6304, "step": 165715 }, { "epoch": 1.83, "learning_rate": 1.9417131721144477e-05, "loss": 0.5918, "step": 165720 }, { "epoch": 1.84, "learning_rate": 1.9416208994005965e-05, "loss": 0.591, "step": 165725 }, { "epoch": 1.84, "learning_rate": 1.9415286266867453e-05, "loss": 0.5814, "step": 165730 }, { "epoch": 1.84, "learning_rate": 1.941436353972894e-05, "loss": 0.5709, "step": 165735 }, { "epoch": 1.84, "learning_rate": 1.9413440812590428e-05, "loss": 0.5979, "step": 165740 }, { "epoch": 1.84, "learning_rate": 1.9412518085451916e-05, "loss": 0.6311, "step": 165745 }, { "epoch": 1.84, "learning_rate": 1.9411595358313404e-05, "loss": 0.6048, "step": 165750 }, { "epoch": 1.84, "learning_rate": 1.9410672631174892e-05, "loss": 0.5834, "step": 165755 }, { "epoch": 1.84, "learning_rate": 1.940974990403638e-05, "loss": 0.5971, "step": 165760 }, { "epoch": 1.84, "learning_rate": 1.9408827176897868e-05, "loss": 0.6032, "step": 165765 }, { "epoch": 1.84, "learning_rate": 1.9407904449759355e-05, "loss": 0.6567, "step": 165770 }, { "epoch": 1.84, "learning_rate": 1.940698172262084e-05, "loss": 0.6196, "step": 165775 }, { "epoch": 1.84, "learning_rate": 1.9406058995482328e-05, "loss": 0.5894, "step": 165780 }, { "epoch": 1.84, "learning_rate": 1.9405136268343816e-05, "loss": 0.5844, "step": 165785 }, { "epoch": 1.84, "learning_rate": 1.9404213541205303e-05, "loss": 0.6158, "step": 165790 }, { "epoch": 1.84, "learning_rate": 1.940329081406679e-05, "loss": 0.6333, "step": 165795 }, { "epoch": 1.84, "learning_rate": 1.940236808692828e-05, "loss": 0.5751, "step": 165800 }, { "epoch": 1.84, "learning_rate": 1.9401445359789767e-05, "loss": 0.6021, "step": 165805 }, { "epoch": 1.84, "learning_rate": 1.9400522632651255e-05, "loss": 0.648, "step": 165810 }, { "epoch": 1.84, "learning_rate": 1.9399599905512743e-05, "loss": 0.6069, "step": 165815 }, { "epoch": 1.84, "learning_rate": 1.939867717837423e-05, "loss": 0.6282, "step": 165820 }, { "epoch": 1.84, "learning_rate": 1.939775445123572e-05, "loss": 0.5731, "step": 165825 }, { "epoch": 1.84, "learning_rate": 1.9396831724097206e-05, "loss": 0.6197, "step": 165830 }, { "epoch": 1.84, "learning_rate": 1.939590899695869e-05, "loss": 0.6157, "step": 165835 }, { "epoch": 1.84, "learning_rate": 1.9394986269820182e-05, "loss": 0.6096, "step": 165840 }, { "epoch": 1.84, "learning_rate": 1.9394063542681666e-05, "loss": 0.6219, "step": 165845 }, { "epoch": 1.84, "learning_rate": 1.9393140815543154e-05, "loss": 0.6271, "step": 165850 }, { "epoch": 1.84, "learning_rate": 1.9392218088404642e-05, "loss": 0.66, "step": 165855 }, { "epoch": 1.84, "learning_rate": 1.939129536126613e-05, "loss": 0.5627, "step": 165860 }, { "epoch": 1.84, "learning_rate": 1.9390372634127618e-05, "loss": 0.6112, "step": 165865 }, { "epoch": 1.84, "learning_rate": 1.9389449906989106e-05, "loss": 0.5974, "step": 165870 }, { "epoch": 1.84, "learning_rate": 1.9388527179850593e-05, "loss": 0.6277, "step": 165875 }, { "epoch": 1.84, "learning_rate": 1.938760445271208e-05, "loss": 0.6247, "step": 165880 }, { "epoch": 1.84, "learning_rate": 1.938668172557357e-05, "loss": 0.5912, "step": 165885 }, { "epoch": 1.84, "learning_rate": 1.9385758998435054e-05, "loss": 0.654, "step": 165890 }, { "epoch": 1.84, "learning_rate": 1.9384836271296545e-05, "loss": 0.6484, "step": 165895 }, { "epoch": 1.84, "learning_rate": 1.938391354415803e-05, "loss": 0.6048, "step": 165900 }, { "epoch": 1.84, "learning_rate": 1.938299081701952e-05, "loss": 0.6237, "step": 165905 }, { "epoch": 1.84, "learning_rate": 1.9382068089881005e-05, "loss": 0.6012, "step": 165910 }, { "epoch": 1.84, "learning_rate": 1.9381145362742496e-05, "loss": 0.6723, "step": 165915 }, { "epoch": 1.84, "learning_rate": 1.938022263560398e-05, "loss": 0.609, "step": 165920 }, { "epoch": 1.84, "learning_rate": 1.937929990846547e-05, "loss": 0.5969, "step": 165925 }, { "epoch": 1.84, "learning_rate": 1.9378377181326956e-05, "loss": 0.6257, "step": 165930 }, { "epoch": 1.84, "learning_rate": 1.9377454454188444e-05, "loss": 0.6248, "step": 165935 }, { "epoch": 1.84, "learning_rate": 1.9376531727049932e-05, "loss": 0.6064, "step": 165940 }, { "epoch": 1.84, "learning_rate": 1.9375608999911417e-05, "loss": 0.6722, "step": 165945 }, { "epoch": 1.84, "learning_rate": 1.9374686272772908e-05, "loss": 0.6097, "step": 165950 }, { "epoch": 1.84, "learning_rate": 1.9373763545634392e-05, "loss": 0.6747, "step": 165955 }, { "epoch": 1.84, "learning_rate": 1.9372840818495883e-05, "loss": 0.6227, "step": 165960 }, { "epoch": 1.84, "learning_rate": 1.9371918091357368e-05, "loss": 0.5444, "step": 165965 }, { "epoch": 1.84, "learning_rate": 1.937099536421886e-05, "loss": 0.6558, "step": 165970 }, { "epoch": 1.84, "learning_rate": 1.9370072637080344e-05, "loss": 0.5811, "step": 165975 }, { "epoch": 1.84, "learning_rate": 1.9369149909941835e-05, "loss": 0.5953, "step": 165980 }, { "epoch": 1.84, "learning_rate": 1.936822718280332e-05, "loss": 0.6573, "step": 165985 }, { "epoch": 1.84, "learning_rate": 1.9367304455664807e-05, "loss": 0.6537, "step": 165990 }, { "epoch": 1.84, "learning_rate": 1.9366381728526295e-05, "loss": 0.6584, "step": 165995 }, { "epoch": 1.84, "learning_rate": 1.9365459001387783e-05, "loss": 0.6192, "step": 166000 }, { "epoch": 1.84, "eval_loss": 0.5786400437355042, "eval_runtime": 69.0445, "eval_samples_per_second": 28.967, "eval_steps_per_second": 14.483, "step": 166000 }, { "epoch": 1.84, "learning_rate": 1.936453627424927e-05, "loss": 0.6193, "step": 166005 }, { "epoch": 1.84, "learning_rate": 1.9363613547110755e-05, "loss": 0.6122, "step": 166010 }, { "epoch": 1.84, "learning_rate": 1.9362690819972246e-05, "loss": 0.5755, "step": 166015 }, { "epoch": 1.84, "learning_rate": 1.936176809283373e-05, "loss": 0.5836, "step": 166020 }, { "epoch": 1.84, "learning_rate": 1.9360845365695222e-05, "loss": 0.5696, "step": 166025 }, { "epoch": 1.84, "learning_rate": 1.9359922638556707e-05, "loss": 0.587, "step": 166030 }, { "epoch": 1.84, "learning_rate": 1.9358999911418198e-05, "loss": 0.6604, "step": 166035 }, { "epoch": 1.84, "learning_rate": 1.9358077184279682e-05, "loss": 0.5937, "step": 166040 }, { "epoch": 1.84, "learning_rate": 1.935715445714117e-05, "loss": 0.6574, "step": 166045 }, { "epoch": 1.84, "learning_rate": 1.9356231730002658e-05, "loss": 0.5946, "step": 166050 }, { "epoch": 1.84, "learning_rate": 1.9355309002864146e-05, "loss": 0.6327, "step": 166055 }, { "epoch": 1.84, "learning_rate": 1.9354386275725634e-05, "loss": 0.6031, "step": 166060 }, { "epoch": 1.84, "learning_rate": 1.935346354858712e-05, "loss": 0.6138, "step": 166065 }, { "epoch": 1.84, "learning_rate": 1.935254082144861e-05, "loss": 0.5808, "step": 166070 }, { "epoch": 1.84, "learning_rate": 1.9351618094310094e-05, "loss": 0.6218, "step": 166075 }, { "epoch": 1.84, "learning_rate": 1.9350695367171585e-05, "loss": 0.6269, "step": 166080 }, { "epoch": 1.84, "learning_rate": 1.934977264003307e-05, "loss": 0.6218, "step": 166085 }, { "epoch": 1.84, "learning_rate": 1.934884991289456e-05, "loss": 0.6023, "step": 166090 }, { "epoch": 1.84, "learning_rate": 1.9347927185756045e-05, "loss": 0.5312, "step": 166095 }, { "epoch": 1.84, "learning_rate": 1.9347004458617533e-05, "loss": 0.5589, "step": 166100 }, { "epoch": 1.84, "learning_rate": 1.934608173147902e-05, "loss": 0.6174, "step": 166105 }, { "epoch": 1.84, "learning_rate": 1.934515900434051e-05, "loss": 0.5631, "step": 166110 }, { "epoch": 1.84, "learning_rate": 1.9344236277201997e-05, "loss": 0.6726, "step": 166115 }, { "epoch": 1.84, "learning_rate": 1.9343313550063484e-05, "loss": 0.6271, "step": 166120 }, { "epoch": 1.84, "learning_rate": 1.9342390822924972e-05, "loss": 0.6318, "step": 166125 }, { "epoch": 1.84, "learning_rate": 1.934146809578646e-05, "loss": 0.6453, "step": 166130 }, { "epoch": 1.84, "learning_rate": 1.9340545368647948e-05, "loss": 0.618, "step": 166135 }, { "epoch": 1.84, "learning_rate": 1.9339622641509436e-05, "loss": 0.6153, "step": 166140 }, { "epoch": 1.84, "learning_rate": 1.9338699914370924e-05, "loss": 0.6951, "step": 166145 }, { "epoch": 1.84, "learning_rate": 1.9337777187232408e-05, "loss": 0.5845, "step": 166150 }, { "epoch": 1.84, "learning_rate": 1.93368544600939e-05, "loss": 0.6244, "step": 166155 }, { "epoch": 1.84, "learning_rate": 1.9335931732955384e-05, "loss": 0.6222, "step": 166160 }, { "epoch": 1.84, "learning_rate": 1.9335009005816872e-05, "loss": 0.6741, "step": 166165 }, { "epoch": 1.84, "learning_rate": 1.933408627867836e-05, "loss": 0.6001, "step": 166170 }, { "epoch": 1.84, "learning_rate": 1.9333163551539847e-05, "loss": 0.6326, "step": 166175 }, { "epoch": 1.84, "learning_rate": 1.9332240824401335e-05, "loss": 0.6165, "step": 166180 }, { "epoch": 1.84, "learning_rate": 1.9331318097262823e-05, "loss": 0.6124, "step": 166185 }, { "epoch": 1.84, "learning_rate": 1.933039537012431e-05, "loss": 0.6403, "step": 166190 }, { "epoch": 1.84, "learning_rate": 1.93294726429858e-05, "loss": 0.663, "step": 166195 }, { "epoch": 1.84, "learning_rate": 1.9328549915847287e-05, "loss": 0.6203, "step": 166200 }, { "epoch": 1.84, "learning_rate": 1.9327627188708774e-05, "loss": 0.5621, "step": 166205 }, { "epoch": 1.84, "learning_rate": 1.9326704461570262e-05, "loss": 0.6374, "step": 166210 }, { "epoch": 1.84, "learning_rate": 1.932578173443175e-05, "loss": 0.6072, "step": 166215 }, { "epoch": 1.84, "learning_rate": 1.9324859007293235e-05, "loss": 0.6333, "step": 166220 }, { "epoch": 1.84, "learning_rate": 1.9323936280154722e-05, "loss": 0.6048, "step": 166225 }, { "epoch": 1.84, "learning_rate": 1.932301355301621e-05, "loss": 0.6139, "step": 166230 }, { "epoch": 1.84, "learning_rate": 1.9322090825877698e-05, "loss": 0.5643, "step": 166235 }, { "epoch": 1.84, "learning_rate": 1.9321168098739186e-05, "loss": 0.5746, "step": 166240 }, { "epoch": 1.84, "learning_rate": 1.9320245371600674e-05, "loss": 0.59, "step": 166245 }, { "epoch": 1.84, "learning_rate": 1.9319322644462162e-05, "loss": 0.6404, "step": 166250 }, { "epoch": 1.84, "learning_rate": 1.931839991732365e-05, "loss": 0.639, "step": 166255 }, { "epoch": 1.84, "learning_rate": 1.9317477190185137e-05, "loss": 0.657, "step": 166260 }, { "epoch": 1.84, "learning_rate": 1.9316554463046625e-05, "loss": 0.6063, "step": 166265 }, { "epoch": 1.84, "learning_rate": 1.9315631735908113e-05, "loss": 0.6257, "step": 166270 }, { "epoch": 1.84, "learning_rate": 1.9314709008769598e-05, "loss": 0.6165, "step": 166275 }, { "epoch": 1.84, "learning_rate": 1.931378628163109e-05, "loss": 0.6115, "step": 166280 }, { "epoch": 1.84, "learning_rate": 1.9312863554492573e-05, "loss": 0.5784, "step": 166285 }, { "epoch": 1.84, "learning_rate": 1.9311940827354065e-05, "loss": 0.6672, "step": 166290 }, { "epoch": 1.84, "learning_rate": 1.931101810021555e-05, "loss": 0.5769, "step": 166295 }, { "epoch": 1.84, "learning_rate": 1.9310095373077037e-05, "loss": 0.6561, "step": 166300 }, { "epoch": 1.84, "learning_rate": 1.9309172645938525e-05, "loss": 0.6294, "step": 166305 }, { "epoch": 1.84, "learning_rate": 1.9308249918800013e-05, "loss": 0.6059, "step": 166310 }, { "epoch": 1.84, "learning_rate": 1.93073271916615e-05, "loss": 0.6184, "step": 166315 }, { "epoch": 1.84, "learning_rate": 1.9306404464522988e-05, "loss": 0.6523, "step": 166320 }, { "epoch": 1.84, "learning_rate": 1.9305481737384476e-05, "loss": 0.5726, "step": 166325 }, { "epoch": 1.84, "learning_rate": 1.9304559010245964e-05, "loss": 0.6108, "step": 166330 }, { "epoch": 1.84, "learning_rate": 1.9303636283107452e-05, "loss": 0.6733, "step": 166335 }, { "epoch": 1.84, "learning_rate": 1.9302713555968936e-05, "loss": 0.6414, "step": 166340 }, { "epoch": 1.84, "learning_rate": 1.9301790828830427e-05, "loss": 0.6512, "step": 166345 }, { "epoch": 1.84, "learning_rate": 1.9300868101691912e-05, "loss": 0.6441, "step": 166350 }, { "epoch": 1.84, "learning_rate": 1.9299945374553403e-05, "loss": 0.6391, "step": 166355 }, { "epoch": 1.84, "learning_rate": 1.9299022647414888e-05, "loss": 0.5459, "step": 166360 }, { "epoch": 1.84, "learning_rate": 1.929809992027638e-05, "loss": 0.5967, "step": 166365 }, { "epoch": 1.84, "learning_rate": 1.9297177193137863e-05, "loss": 0.6551, "step": 166370 }, { "epoch": 1.84, "learning_rate": 1.929625446599935e-05, "loss": 0.649, "step": 166375 }, { "epoch": 1.84, "learning_rate": 1.929533173886084e-05, "loss": 0.6298, "step": 166380 }, { "epoch": 1.84, "learning_rate": 1.9294409011722327e-05, "loss": 0.5523, "step": 166385 }, { "epoch": 1.84, "learning_rate": 1.9293486284583815e-05, "loss": 0.5896, "step": 166390 }, { "epoch": 1.84, "learning_rate": 1.92925635574453e-05, "loss": 0.5973, "step": 166395 }, { "epoch": 1.84, "learning_rate": 1.929164083030679e-05, "loss": 0.6519, "step": 166400 }, { "epoch": 1.84, "learning_rate": 1.9290718103168275e-05, "loss": 0.5797, "step": 166405 }, { "epoch": 1.84, "learning_rate": 1.9289795376029766e-05, "loss": 0.6196, "step": 166410 }, { "epoch": 1.84, "learning_rate": 1.928887264889125e-05, "loss": 0.6392, "step": 166415 }, { "epoch": 1.84, "learning_rate": 1.9287949921752742e-05, "loss": 0.6083, "step": 166420 }, { "epoch": 1.84, "learning_rate": 1.9287027194614226e-05, "loss": 0.5611, "step": 166425 }, { "epoch": 1.84, "learning_rate": 1.9286104467475714e-05, "loss": 0.5689, "step": 166430 }, { "epoch": 1.84, "learning_rate": 1.9285181740337202e-05, "loss": 0.6004, "step": 166435 }, { "epoch": 1.84, "learning_rate": 1.928425901319869e-05, "loss": 0.6571, "step": 166440 }, { "epoch": 1.84, "learning_rate": 1.9283336286060178e-05, "loss": 0.6024, "step": 166445 }, { "epoch": 1.84, "learning_rate": 1.9282413558921666e-05, "loss": 0.5687, "step": 166450 }, { "epoch": 1.84, "learning_rate": 1.9281490831783153e-05, "loss": 0.596, "step": 166455 }, { "epoch": 1.84, "learning_rate": 1.9280568104644638e-05, "loss": 0.5361, "step": 166460 }, { "epoch": 1.84, "learning_rate": 1.927964537750613e-05, "loss": 0.6362, "step": 166465 }, { "epoch": 1.84, "learning_rate": 1.9278722650367614e-05, "loss": 0.6095, "step": 166470 }, { "epoch": 1.84, "learning_rate": 1.9277799923229105e-05, "loss": 0.5886, "step": 166475 }, { "epoch": 1.84, "learning_rate": 1.927687719609059e-05, "loss": 0.5611, "step": 166480 }, { "epoch": 1.84, "learning_rate": 1.927595446895208e-05, "loss": 0.5774, "step": 166485 }, { "epoch": 1.84, "learning_rate": 1.9275031741813565e-05, "loss": 0.6393, "step": 166490 }, { "epoch": 1.84, "learning_rate": 1.9274109014675053e-05, "loss": 0.5998, "step": 166495 }, { "epoch": 1.84, "learning_rate": 1.927318628753654e-05, "loss": 0.6466, "step": 166500 }, { "epoch": 1.84, "learning_rate": 1.927226356039803e-05, "loss": 0.6269, "step": 166505 }, { "epoch": 1.84, "learning_rate": 1.9271340833259516e-05, "loss": 0.6233, "step": 166510 }, { "epoch": 1.84, "learning_rate": 1.9270418106121004e-05, "loss": 0.6193, "step": 166515 }, { "epoch": 1.84, "learning_rate": 1.9269495378982492e-05, "loss": 0.6315, "step": 166520 }, { "epoch": 1.84, "learning_rate": 1.926857265184398e-05, "loss": 0.6488, "step": 166525 }, { "epoch": 1.84, "learning_rate": 1.9267649924705468e-05, "loss": 0.6257, "step": 166530 }, { "epoch": 1.84, "learning_rate": 1.9266727197566952e-05, "loss": 0.6619, "step": 166535 }, { "epoch": 1.84, "learning_rate": 1.9265804470428443e-05, "loss": 0.5816, "step": 166540 }, { "epoch": 1.84, "learning_rate": 1.9264881743289928e-05, "loss": 0.644, "step": 166545 }, { "epoch": 1.84, "learning_rate": 1.9263959016151416e-05, "loss": 0.6536, "step": 166550 }, { "epoch": 1.84, "learning_rate": 1.9263036289012904e-05, "loss": 0.649, "step": 166555 }, { "epoch": 1.84, "learning_rate": 1.926211356187439e-05, "loss": 0.5868, "step": 166560 }, { "epoch": 1.84, "learning_rate": 1.926119083473588e-05, "loss": 0.6734, "step": 166565 }, { "epoch": 1.84, "learning_rate": 1.9260268107597367e-05, "loss": 0.6107, "step": 166570 }, { "epoch": 1.84, "learning_rate": 1.9259345380458855e-05, "loss": 0.6391, "step": 166575 }, { "epoch": 1.84, "learning_rate": 1.9258422653320343e-05, "loss": 0.6821, "step": 166580 }, { "epoch": 1.84, "learning_rate": 1.925749992618183e-05, "loss": 0.6289, "step": 166585 }, { "epoch": 1.84, "learning_rate": 1.925657719904332e-05, "loss": 0.5463, "step": 166590 }, { "epoch": 1.84, "learning_rate": 1.9255654471904806e-05, "loss": 0.6258, "step": 166595 }, { "epoch": 1.84, "learning_rate": 1.9254731744766294e-05, "loss": 0.6019, "step": 166600 }, { "epoch": 1.84, "learning_rate": 1.925380901762778e-05, "loss": 0.5919, "step": 166605 }, { "epoch": 1.84, "learning_rate": 1.9252886290489267e-05, "loss": 0.5611, "step": 166610 }, { "epoch": 1.84, "learning_rate": 1.9251963563350754e-05, "loss": 0.5866, "step": 166615 }, { "epoch": 1.84, "learning_rate": 1.9251040836212242e-05, "loss": 0.5744, "step": 166620 }, { "epoch": 1.84, "learning_rate": 1.925011810907373e-05, "loss": 0.6193, "step": 166625 }, { "epoch": 1.85, "learning_rate": 1.9249195381935218e-05, "loss": 0.6354, "step": 166630 }, { "epoch": 1.85, "learning_rate": 1.9248272654796706e-05, "loss": 0.5985, "step": 166635 }, { "epoch": 1.85, "learning_rate": 1.9247349927658194e-05, "loss": 0.655, "step": 166640 }, { "epoch": 1.85, "learning_rate": 1.924642720051968e-05, "loss": 0.6813, "step": 166645 }, { "epoch": 1.85, "learning_rate": 1.924550447338117e-05, "loss": 0.5856, "step": 166650 }, { "epoch": 1.85, "learning_rate": 1.9244581746242657e-05, "loss": 0.6668, "step": 166655 }, { "epoch": 1.85, "learning_rate": 1.924365901910414e-05, "loss": 0.6145, "step": 166660 }, { "epoch": 1.85, "learning_rate": 1.9242736291965633e-05, "loss": 0.5989, "step": 166665 }, { "epoch": 1.85, "learning_rate": 1.9241813564827117e-05, "loss": 0.6525, "step": 166670 }, { "epoch": 1.85, "learning_rate": 1.924089083768861e-05, "loss": 0.6, "step": 166675 }, { "epoch": 1.85, "learning_rate": 1.9239968110550093e-05, "loss": 0.5828, "step": 166680 }, { "epoch": 1.85, "learning_rate": 1.923904538341158e-05, "loss": 0.645, "step": 166685 }, { "epoch": 1.85, "learning_rate": 1.923812265627307e-05, "loss": 0.6146, "step": 166690 }, { "epoch": 1.85, "learning_rate": 1.9237199929134557e-05, "loss": 0.6255, "step": 166695 }, { "epoch": 1.85, "learning_rate": 1.9236277201996044e-05, "loss": 0.607, "step": 166700 }, { "epoch": 1.85, "learning_rate": 1.9235354474857532e-05, "loss": 0.6854, "step": 166705 }, { "epoch": 1.85, "learning_rate": 1.923443174771902e-05, "loss": 0.606, "step": 166710 }, { "epoch": 1.85, "learning_rate": 1.9233509020580508e-05, "loss": 0.6813, "step": 166715 }, { "epoch": 1.85, "learning_rate": 1.9232586293441996e-05, "loss": 0.6117, "step": 166720 }, { "epoch": 1.85, "learning_rate": 1.923166356630348e-05, "loss": 0.637, "step": 166725 }, { "epoch": 1.85, "learning_rate": 1.923074083916497e-05, "loss": 0.5694, "step": 166730 }, { "epoch": 1.85, "learning_rate": 1.9229818112026456e-05, "loss": 0.6533, "step": 166735 }, { "epoch": 1.85, "learning_rate": 1.9228895384887947e-05, "loss": 0.6357, "step": 166740 }, { "epoch": 1.85, "learning_rate": 1.922797265774943e-05, "loss": 0.6223, "step": 166745 }, { "epoch": 1.85, "learning_rate": 1.9227049930610923e-05, "loss": 0.6011, "step": 166750 }, { "epoch": 1.85, "learning_rate": 1.9226127203472407e-05, "loss": 0.5704, "step": 166755 }, { "epoch": 1.85, "learning_rate": 1.9225204476333895e-05, "loss": 0.5921, "step": 166760 }, { "epoch": 1.85, "learning_rate": 1.9224281749195383e-05, "loss": 0.593, "step": 166765 }, { "epoch": 1.85, "learning_rate": 1.922335902205687e-05, "loss": 0.686, "step": 166770 }, { "epoch": 1.85, "learning_rate": 1.922243629491836e-05, "loss": 0.6547, "step": 166775 }, { "epoch": 1.85, "learning_rate": 1.9221513567779843e-05, "loss": 0.5997, "step": 166780 }, { "epoch": 1.85, "learning_rate": 1.9220590840641334e-05, "loss": 0.6102, "step": 166785 }, { "epoch": 1.85, "learning_rate": 1.921966811350282e-05, "loss": 0.5899, "step": 166790 }, { "epoch": 1.85, "learning_rate": 1.921874538636431e-05, "loss": 0.6355, "step": 166795 }, { "epoch": 1.85, "learning_rate": 1.9217822659225795e-05, "loss": 0.6417, "step": 166800 }, { "epoch": 1.85, "learning_rate": 1.9216899932087286e-05, "loss": 0.6288, "step": 166805 }, { "epoch": 1.85, "learning_rate": 1.921597720494877e-05, "loss": 0.5375, "step": 166810 }, { "epoch": 1.85, "learning_rate": 1.9215054477810258e-05, "loss": 0.5829, "step": 166815 }, { "epoch": 1.85, "learning_rate": 1.9214131750671746e-05, "loss": 0.6425, "step": 166820 }, { "epoch": 1.85, "learning_rate": 1.9213209023533234e-05, "loss": 0.6606, "step": 166825 }, { "epoch": 1.85, "learning_rate": 1.9212286296394722e-05, "loss": 0.6099, "step": 166830 }, { "epoch": 1.85, "learning_rate": 1.9211363569256206e-05, "loss": 0.6097, "step": 166835 }, { "epoch": 1.85, "learning_rate": 1.9210440842117697e-05, "loss": 0.6363, "step": 166840 }, { "epoch": 1.85, "learning_rate": 1.9209518114979182e-05, "loss": 0.6152, "step": 166845 }, { "epoch": 1.85, "learning_rate": 1.9208595387840673e-05, "loss": 0.6135, "step": 166850 }, { "epoch": 1.85, "learning_rate": 1.9207672660702158e-05, "loss": 0.6, "step": 166855 }, { "epoch": 1.85, "learning_rate": 1.920674993356365e-05, "loss": 0.5697, "step": 166860 }, { "epoch": 1.85, "learning_rate": 1.9205827206425133e-05, "loss": 0.5759, "step": 166865 }, { "epoch": 1.85, "learning_rate": 1.9204904479286624e-05, "loss": 0.5927, "step": 166870 }, { "epoch": 1.85, "learning_rate": 1.920398175214811e-05, "loss": 0.6552, "step": 166875 }, { "epoch": 1.85, "learning_rate": 1.9203059025009597e-05, "loss": 0.6388, "step": 166880 }, { "epoch": 1.85, "learning_rate": 1.9202136297871085e-05, "loss": 0.5945, "step": 166885 }, { "epoch": 1.85, "learning_rate": 1.9201213570732572e-05, "loss": 0.59, "step": 166890 }, { "epoch": 1.85, "learning_rate": 1.920029084359406e-05, "loss": 0.5857, "step": 166895 }, { "epoch": 1.85, "learning_rate": 1.9199368116455548e-05, "loss": 0.6214, "step": 166900 }, { "epoch": 1.85, "learning_rate": 1.9198445389317036e-05, "loss": 0.6058, "step": 166905 }, { "epoch": 1.85, "learning_rate": 1.919752266217852e-05, "loss": 0.6358, "step": 166910 }, { "epoch": 1.85, "learning_rate": 1.9196599935040012e-05, "loss": 0.6329, "step": 166915 }, { "epoch": 1.85, "learning_rate": 1.9195677207901496e-05, "loss": 0.5879, "step": 166920 }, { "epoch": 1.85, "learning_rate": 1.9194754480762987e-05, "loss": 0.6247, "step": 166925 }, { "epoch": 1.85, "learning_rate": 1.9193831753624472e-05, "loss": 0.6916, "step": 166930 }, { "epoch": 1.85, "learning_rate": 1.919290902648596e-05, "loss": 0.6129, "step": 166935 }, { "epoch": 1.85, "learning_rate": 1.9191986299347448e-05, "loss": 0.5825, "step": 166940 }, { "epoch": 1.85, "learning_rate": 1.9191063572208935e-05, "loss": 0.6425, "step": 166945 }, { "epoch": 1.85, "learning_rate": 1.9190140845070423e-05, "loss": 0.5766, "step": 166950 }, { "epoch": 1.85, "learning_rate": 1.918921811793191e-05, "loss": 0.6365, "step": 166955 }, { "epoch": 1.85, "learning_rate": 1.91882953907934e-05, "loss": 0.6304, "step": 166960 }, { "epoch": 1.85, "learning_rate": 1.9187372663654887e-05, "loss": 0.606, "step": 166965 }, { "epoch": 1.85, "learning_rate": 1.9186449936516375e-05, "loss": 0.5635, "step": 166970 }, { "epoch": 1.85, "learning_rate": 1.9185527209377863e-05, "loss": 0.6722, "step": 166975 }, { "epoch": 1.85, "learning_rate": 1.918460448223935e-05, "loss": 0.5882, "step": 166980 }, { "epoch": 1.85, "learning_rate": 1.9183681755100835e-05, "loss": 0.6038, "step": 166985 }, { "epoch": 1.85, "learning_rate": 1.9182759027962323e-05, "loss": 0.5874, "step": 166990 }, { "epoch": 1.85, "learning_rate": 1.918183630082381e-05, "loss": 0.6824, "step": 166995 }, { "epoch": 1.85, "learning_rate": 1.91809135736853e-05, "loss": 0.6613, "step": 167000 }, { "epoch": 1.85, "eval_loss": 0.6063949465751648, "eval_runtime": 69.2129, "eval_samples_per_second": 28.896, "eval_steps_per_second": 14.448, "step": 167000 }, { "epoch": 1.85, "learning_rate": 1.9179990846546786e-05, "loss": 0.6088, "step": 167005 }, { "epoch": 1.85, "learning_rate": 1.9179068119408274e-05, "loss": 0.6217, "step": 167010 }, { "epoch": 1.85, "learning_rate": 1.9178145392269762e-05, "loss": 0.6315, "step": 167015 }, { "epoch": 1.85, "learning_rate": 1.917722266513125e-05, "loss": 0.6873, "step": 167020 }, { "epoch": 1.85, "learning_rate": 1.9176299937992738e-05, "loss": 0.6421, "step": 167025 }, { "epoch": 1.85, "learning_rate": 1.9175377210854225e-05, "loss": 0.6069, "step": 167030 }, { "epoch": 1.85, "learning_rate": 1.9174454483715713e-05, "loss": 0.63, "step": 167035 }, { "epoch": 1.85, "learning_rate": 1.91735317565772e-05, "loss": 0.5821, "step": 167040 }, { "epoch": 1.85, "learning_rate": 1.9172609029438686e-05, "loss": 0.6435, "step": 167045 }, { "epoch": 1.85, "learning_rate": 1.9171686302300177e-05, "loss": 0.6146, "step": 167050 }, { "epoch": 1.85, "learning_rate": 1.917076357516166e-05, "loss": 0.6352, "step": 167055 }, { "epoch": 1.85, "learning_rate": 1.916984084802315e-05, "loss": 0.5943, "step": 167060 }, { "epoch": 1.85, "learning_rate": 1.9168918120884637e-05, "loss": 0.698, "step": 167065 }, { "epoch": 1.85, "learning_rate": 1.9167995393746125e-05, "loss": 0.6126, "step": 167070 }, { "epoch": 1.85, "learning_rate": 1.9167072666607613e-05, "loss": 0.6168, "step": 167075 }, { "epoch": 1.85, "learning_rate": 1.91661499394691e-05, "loss": 0.6591, "step": 167080 }, { "epoch": 1.85, "learning_rate": 1.916522721233059e-05, "loss": 0.6904, "step": 167085 }, { "epoch": 1.85, "learning_rate": 1.9164304485192076e-05, "loss": 0.6663, "step": 167090 }, { "epoch": 1.85, "learning_rate": 1.9163381758053564e-05, "loss": 0.6195, "step": 167095 }, { "epoch": 1.85, "learning_rate": 1.9162459030915052e-05, "loss": 0.6086, "step": 167100 }, { "epoch": 1.85, "learning_rate": 1.916153630377654e-05, "loss": 0.6297, "step": 167105 }, { "epoch": 1.85, "learning_rate": 1.9160613576638024e-05, "loss": 0.6333, "step": 167110 }, { "epoch": 1.85, "learning_rate": 1.9159690849499516e-05, "loss": 0.5955, "step": 167115 }, { "epoch": 1.85, "learning_rate": 1.9158768122361e-05, "loss": 0.6492, "step": 167120 }, { "epoch": 1.85, "learning_rate": 1.915784539522249e-05, "loss": 0.5913, "step": 167125 }, { "epoch": 1.85, "learning_rate": 1.9156922668083976e-05, "loss": 0.5595, "step": 167130 }, { "epoch": 1.85, "learning_rate": 1.9155999940945464e-05, "loss": 0.5922, "step": 167135 }, { "epoch": 1.85, "learning_rate": 1.915507721380695e-05, "loss": 0.6205, "step": 167140 }, { "epoch": 1.85, "learning_rate": 1.915415448666844e-05, "loss": 0.6386, "step": 167145 }, { "epoch": 1.85, "learning_rate": 1.9153231759529927e-05, "loss": 0.6162, "step": 167150 }, { "epoch": 1.85, "learning_rate": 1.9152309032391415e-05, "loss": 0.589, "step": 167155 }, { "epoch": 1.85, "learning_rate": 1.9151386305252903e-05, "loss": 0.6067, "step": 167160 }, { "epoch": 1.85, "learning_rate": 1.9150463578114387e-05, "loss": 0.6676, "step": 167165 }, { "epoch": 1.85, "learning_rate": 1.914954085097588e-05, "loss": 0.6088, "step": 167170 }, { "epoch": 1.85, "learning_rate": 1.9148618123837363e-05, "loss": 0.5963, "step": 167175 }, { "epoch": 1.85, "learning_rate": 1.9147695396698854e-05, "loss": 0.5515, "step": 167180 }, { "epoch": 1.85, "learning_rate": 1.914677266956034e-05, "loss": 0.5937, "step": 167185 }, { "epoch": 1.85, "learning_rate": 1.914584994242183e-05, "loss": 0.6296, "step": 167190 }, { "epoch": 1.85, "learning_rate": 1.9144927215283314e-05, "loss": 0.5812, "step": 167195 }, { "epoch": 1.85, "learning_rate": 1.9144004488144802e-05, "loss": 0.6377, "step": 167200 }, { "epoch": 1.85, "learning_rate": 1.914308176100629e-05, "loss": 0.6129, "step": 167205 }, { "epoch": 1.85, "learning_rate": 1.9142159033867778e-05, "loss": 0.626, "step": 167210 }, { "epoch": 1.85, "learning_rate": 1.9141236306729266e-05, "loss": 0.6434, "step": 167215 }, { "epoch": 1.85, "learning_rate": 1.914031357959075e-05, "loss": 0.5971, "step": 167220 }, { "epoch": 1.85, "learning_rate": 1.913939085245224e-05, "loss": 0.6576, "step": 167225 }, { "epoch": 1.85, "learning_rate": 1.9138468125313726e-05, "loss": 0.6424, "step": 167230 }, { "epoch": 1.85, "learning_rate": 1.9137545398175217e-05, "loss": 0.6102, "step": 167235 }, { "epoch": 1.85, "learning_rate": 1.91366226710367e-05, "loss": 0.5983, "step": 167240 }, { "epoch": 1.85, "learning_rate": 1.9135699943898193e-05, "loss": 0.6276, "step": 167245 }, { "epoch": 1.85, "learning_rate": 1.9134777216759677e-05, "loss": 0.6269, "step": 167250 }, { "epoch": 1.85, "learning_rate": 1.913385448962117e-05, "loss": 0.6378, "step": 167255 }, { "epoch": 1.85, "learning_rate": 1.9132931762482653e-05, "loss": 0.5839, "step": 167260 }, { "epoch": 1.85, "learning_rate": 1.913200903534414e-05, "loss": 0.5939, "step": 167265 }, { "epoch": 1.85, "learning_rate": 1.913108630820563e-05, "loss": 0.5937, "step": 167270 }, { "epoch": 1.85, "learning_rate": 1.9130163581067117e-05, "loss": 0.6845, "step": 167275 }, { "epoch": 1.85, "learning_rate": 1.9129240853928604e-05, "loss": 0.6195, "step": 167280 }, { "epoch": 1.85, "learning_rate": 1.9128318126790092e-05, "loss": 0.6379, "step": 167285 }, { "epoch": 1.85, "learning_rate": 1.912739539965158e-05, "loss": 0.5736, "step": 167290 }, { "epoch": 1.85, "learning_rate": 1.9126472672513065e-05, "loss": 0.6164, "step": 167295 }, { "epoch": 1.85, "learning_rate": 1.9125549945374556e-05, "loss": 0.6101, "step": 167300 }, { "epoch": 1.85, "learning_rate": 1.912462721823604e-05, "loss": 0.6074, "step": 167305 }, { "epoch": 1.85, "learning_rate": 1.912370449109753e-05, "loss": 0.622, "step": 167310 }, { "epoch": 1.85, "learning_rate": 1.9122781763959016e-05, "loss": 0.6465, "step": 167315 }, { "epoch": 1.85, "learning_rate": 1.9121859036820504e-05, "loss": 0.6496, "step": 167320 }, { "epoch": 1.85, "learning_rate": 1.912093630968199e-05, "loss": 0.5821, "step": 167325 }, { "epoch": 1.85, "learning_rate": 1.912001358254348e-05, "loss": 0.637, "step": 167330 }, { "epoch": 1.85, "learning_rate": 1.9119090855404967e-05, "loss": 0.6466, "step": 167335 }, { "epoch": 1.85, "learning_rate": 1.9118168128266455e-05, "loss": 0.5282, "step": 167340 }, { "epoch": 1.85, "learning_rate": 1.9117245401127943e-05, "loss": 0.6483, "step": 167345 }, { "epoch": 1.85, "learning_rate": 1.911632267398943e-05, "loss": 0.6142, "step": 167350 }, { "epoch": 1.85, "learning_rate": 1.911539994685092e-05, "loss": 0.6318, "step": 167355 }, { "epoch": 1.85, "learning_rate": 1.9114477219712407e-05, "loss": 0.5665, "step": 167360 }, { "epoch": 1.85, "learning_rate": 1.9113554492573894e-05, "loss": 0.6384, "step": 167365 }, { "epoch": 1.85, "learning_rate": 1.911263176543538e-05, "loss": 0.5985, "step": 167370 }, { "epoch": 1.85, "learning_rate": 1.9111709038296867e-05, "loss": 0.5934, "step": 167375 }, { "epoch": 1.85, "learning_rate": 1.9110786311158355e-05, "loss": 0.6556, "step": 167380 }, { "epoch": 1.85, "learning_rate": 1.9109863584019842e-05, "loss": 0.5833, "step": 167385 }, { "epoch": 1.85, "learning_rate": 1.910894085688133e-05, "loss": 0.6405, "step": 167390 }, { "epoch": 1.85, "learning_rate": 1.9108018129742818e-05, "loss": 0.6405, "step": 167395 }, { "epoch": 1.85, "learning_rate": 1.9107095402604306e-05, "loss": 0.5969, "step": 167400 }, { "epoch": 1.85, "learning_rate": 1.9106172675465794e-05, "loss": 0.6486, "step": 167405 }, { "epoch": 1.85, "learning_rate": 1.910524994832728e-05, "loss": 0.6301, "step": 167410 }, { "epoch": 1.85, "learning_rate": 1.910432722118877e-05, "loss": 0.5833, "step": 167415 }, { "epoch": 1.85, "learning_rate": 1.9103404494050257e-05, "loss": 0.5994, "step": 167420 }, { "epoch": 1.85, "learning_rate": 1.9102481766911745e-05, "loss": 0.5988, "step": 167425 }, { "epoch": 1.85, "learning_rate": 1.9101559039773233e-05, "loss": 0.6321, "step": 167430 }, { "epoch": 1.85, "learning_rate": 1.910063631263472e-05, "loss": 0.6538, "step": 167435 }, { "epoch": 1.85, "learning_rate": 1.9099713585496205e-05, "loss": 0.6293, "step": 167440 }, { "epoch": 1.85, "learning_rate": 1.9098790858357693e-05, "loss": 0.6688, "step": 167445 }, { "epoch": 1.85, "learning_rate": 1.909786813121918e-05, "loss": 0.6438, "step": 167450 }, { "epoch": 1.85, "learning_rate": 1.909694540408067e-05, "loss": 0.6729, "step": 167455 }, { "epoch": 1.85, "learning_rate": 1.9096022676942157e-05, "loss": 0.5726, "step": 167460 }, { "epoch": 1.85, "learning_rate": 1.9095099949803645e-05, "loss": 0.6403, "step": 167465 }, { "epoch": 1.85, "learning_rate": 1.9094177222665132e-05, "loss": 0.611, "step": 167470 }, { "epoch": 1.85, "learning_rate": 1.909325449552662e-05, "loss": 0.6487, "step": 167475 }, { "epoch": 1.85, "learning_rate": 1.9092331768388108e-05, "loss": 0.591, "step": 167480 }, { "epoch": 1.85, "learning_rate": 1.9091409041249596e-05, "loss": 0.5825, "step": 167485 }, { "epoch": 1.85, "learning_rate": 1.9090486314111084e-05, "loss": 0.6979, "step": 167490 }, { "epoch": 1.85, "learning_rate": 1.9089563586972568e-05, "loss": 0.6193, "step": 167495 }, { "epoch": 1.85, "learning_rate": 1.908864085983406e-05, "loss": 0.5894, "step": 167500 }, { "epoch": 1.85, "learning_rate": 1.9087718132695544e-05, "loss": 0.608, "step": 167505 }, { "epoch": 1.85, "learning_rate": 1.9086795405557035e-05, "loss": 0.6023, "step": 167510 }, { "epoch": 1.85, "learning_rate": 1.908587267841852e-05, "loss": 0.6703, "step": 167515 }, { "epoch": 1.85, "learning_rate": 1.9084949951280008e-05, "loss": 0.6427, "step": 167520 }, { "epoch": 1.85, "learning_rate": 1.9084027224141495e-05, "loss": 0.6456, "step": 167525 }, { "epoch": 1.86, "learning_rate": 1.9083104497002983e-05, "loss": 0.6245, "step": 167530 }, { "epoch": 1.86, "learning_rate": 1.908218176986447e-05, "loss": 0.6429, "step": 167535 }, { "epoch": 1.86, "learning_rate": 1.908125904272596e-05, "loss": 0.6758, "step": 167540 }, { "epoch": 1.86, "learning_rate": 1.9080336315587447e-05, "loss": 0.6119, "step": 167545 }, { "epoch": 1.86, "learning_rate": 1.907941358844893e-05, "loss": 0.615, "step": 167550 }, { "epoch": 1.86, "learning_rate": 1.9078490861310422e-05, "loss": 0.6377, "step": 167555 }, { "epoch": 1.86, "learning_rate": 1.9077568134171907e-05, "loss": 0.6237, "step": 167560 }, { "epoch": 1.86, "learning_rate": 1.9076645407033398e-05, "loss": 0.6054, "step": 167565 }, { "epoch": 1.86, "learning_rate": 1.9075722679894883e-05, "loss": 0.5921, "step": 167570 }, { "epoch": 1.86, "learning_rate": 1.9074799952756374e-05, "loss": 0.6198, "step": 167575 }, { "epoch": 1.86, "learning_rate": 1.907387722561786e-05, "loss": 0.6196, "step": 167580 }, { "epoch": 1.86, "learning_rate": 1.907295449847935e-05, "loss": 0.5951, "step": 167585 }, { "epoch": 1.86, "learning_rate": 1.9072031771340834e-05, "loss": 0.6509, "step": 167590 }, { "epoch": 1.86, "learning_rate": 1.9071109044202322e-05, "loss": 0.6681, "step": 167595 }, { "epoch": 1.86, "learning_rate": 1.907018631706381e-05, "loss": 0.6456, "step": 167600 }, { "epoch": 1.86, "learning_rate": 1.9069263589925294e-05, "loss": 0.6451, "step": 167605 }, { "epoch": 1.86, "learning_rate": 1.9068340862786785e-05, "loss": 0.6292, "step": 167610 }, { "epoch": 1.86, "learning_rate": 1.906741813564827e-05, "loss": 0.6211, "step": 167615 }, { "epoch": 1.86, "learning_rate": 1.906649540850976e-05, "loss": 0.6494, "step": 167620 }, { "epoch": 1.86, "learning_rate": 1.9065572681371246e-05, "loss": 0.5767, "step": 167625 }, { "epoch": 1.86, "learning_rate": 1.9064649954232737e-05, "loss": 0.6234, "step": 167630 }, { "epoch": 1.86, "learning_rate": 1.906372722709422e-05, "loss": 0.6365, "step": 167635 }, { "epoch": 1.86, "learning_rate": 1.9062804499955713e-05, "loss": 0.6145, "step": 167640 }, { "epoch": 1.86, "learning_rate": 1.9061881772817197e-05, "loss": 0.5962, "step": 167645 }, { "epoch": 1.86, "learning_rate": 1.9060959045678685e-05, "loss": 0.638, "step": 167650 }, { "epoch": 1.86, "learning_rate": 1.9060036318540173e-05, "loss": 0.5865, "step": 167655 }, { "epoch": 1.86, "learning_rate": 1.905911359140166e-05, "loss": 0.6941, "step": 167660 }, { "epoch": 1.86, "learning_rate": 1.905819086426315e-05, "loss": 0.6115, "step": 167665 }, { "epoch": 1.86, "learning_rate": 1.9057268137124633e-05, "loss": 0.6479, "step": 167670 }, { "epoch": 1.86, "learning_rate": 1.9056345409986124e-05, "loss": 0.579, "step": 167675 }, { "epoch": 1.86, "learning_rate": 1.905542268284761e-05, "loss": 0.5927, "step": 167680 }, { "epoch": 1.86, "learning_rate": 1.90544999557091e-05, "loss": 0.588, "step": 167685 }, { "epoch": 1.86, "learning_rate": 1.9053577228570584e-05, "loss": 0.6149, "step": 167690 }, { "epoch": 1.86, "learning_rate": 1.9052654501432075e-05, "loss": 0.6573, "step": 167695 }, { "epoch": 1.86, "learning_rate": 1.905173177429356e-05, "loss": 0.6569, "step": 167700 }, { "epoch": 1.86, "learning_rate": 1.9050809047155048e-05, "loss": 0.6306, "step": 167705 }, { "epoch": 1.86, "learning_rate": 1.9049886320016536e-05, "loss": 0.6113, "step": 167710 }, { "epoch": 1.86, "learning_rate": 1.9048963592878023e-05, "loss": 0.6077, "step": 167715 }, { "epoch": 1.86, "learning_rate": 1.904804086573951e-05, "loss": 0.6557, "step": 167720 }, { "epoch": 1.86, "learning_rate": 1.9047118138601e-05, "loss": 0.6404, "step": 167725 }, { "epoch": 1.86, "learning_rate": 1.9046195411462487e-05, "loss": 0.6274, "step": 167730 }, { "epoch": 1.86, "learning_rate": 1.9045272684323975e-05, "loss": 0.6769, "step": 167735 }, { "epoch": 1.86, "learning_rate": 1.9044349957185463e-05, "loss": 0.6308, "step": 167740 }, { "epoch": 1.86, "learning_rate": 1.9043427230046947e-05, "loss": 0.5951, "step": 167745 }, { "epoch": 1.86, "learning_rate": 1.904250450290844e-05, "loss": 0.5937, "step": 167750 }, { "epoch": 1.86, "learning_rate": 1.9041581775769923e-05, "loss": 0.6013, "step": 167755 }, { "epoch": 1.86, "learning_rate": 1.904065904863141e-05, "loss": 0.6033, "step": 167760 }, { "epoch": 1.86, "learning_rate": 1.90397363214929e-05, "loss": 0.6535, "step": 167765 }, { "epoch": 1.86, "learning_rate": 1.9038813594354386e-05, "loss": 0.6175, "step": 167770 }, { "epoch": 1.86, "learning_rate": 1.9037890867215874e-05, "loss": 0.6543, "step": 167775 }, { "epoch": 1.86, "learning_rate": 1.9036968140077362e-05, "loss": 0.6524, "step": 167780 }, { "epoch": 1.86, "learning_rate": 1.903604541293885e-05, "loss": 0.5655, "step": 167785 }, { "epoch": 1.86, "learning_rate": 1.9035122685800338e-05, "loss": 0.617, "step": 167790 }, { "epoch": 1.86, "learning_rate": 1.9034199958661826e-05, "loss": 0.6515, "step": 167795 }, { "epoch": 1.86, "learning_rate": 1.9033277231523314e-05, "loss": 0.5566, "step": 167800 }, { "epoch": 1.86, "learning_rate": 1.90323545043848e-05, "loss": 0.5785, "step": 167805 }, { "epoch": 1.86, "learning_rate": 1.903143177724629e-05, "loss": 0.5719, "step": 167810 }, { "epoch": 1.86, "learning_rate": 1.9030509050107777e-05, "loss": 0.6283, "step": 167815 }, { "epoch": 1.86, "learning_rate": 1.902958632296926e-05, "loss": 0.612, "step": 167820 }, { "epoch": 1.86, "learning_rate": 1.902866359583075e-05, "loss": 0.6185, "step": 167825 }, { "epoch": 1.86, "learning_rate": 1.9027740868692237e-05, "loss": 0.6235, "step": 167830 }, { "epoch": 1.86, "learning_rate": 1.9026818141553725e-05, "loss": 0.5926, "step": 167835 }, { "epoch": 1.86, "learning_rate": 1.9025895414415213e-05, "loss": 0.6335, "step": 167840 }, { "epoch": 1.86, "learning_rate": 1.90249726872767e-05, "loss": 0.6484, "step": 167845 }, { "epoch": 1.86, "learning_rate": 1.902404996013819e-05, "loss": 0.608, "step": 167850 }, { "epoch": 1.86, "learning_rate": 1.9023127232999676e-05, "loss": 0.62, "step": 167855 }, { "epoch": 1.86, "learning_rate": 1.9022204505861164e-05, "loss": 0.577, "step": 167860 }, { "epoch": 1.86, "learning_rate": 1.9021281778722652e-05, "loss": 0.6374, "step": 167865 }, { "epoch": 1.86, "learning_rate": 1.902035905158414e-05, "loss": 0.642, "step": 167870 }, { "epoch": 1.86, "learning_rate": 1.9019436324445628e-05, "loss": 0.6072, "step": 167875 }, { "epoch": 1.86, "learning_rate": 1.9018513597307112e-05, "loss": 0.6413, "step": 167880 }, { "epoch": 1.86, "learning_rate": 1.9017590870168604e-05, "loss": 0.6274, "step": 167885 }, { "epoch": 1.86, "learning_rate": 1.9016668143030088e-05, "loss": 0.6506, "step": 167890 }, { "epoch": 1.86, "learning_rate": 1.9015745415891576e-05, "loss": 0.6702, "step": 167895 }, { "epoch": 1.86, "learning_rate": 1.9014822688753064e-05, "loss": 0.621, "step": 167900 }, { "epoch": 1.86, "learning_rate": 1.901389996161455e-05, "loss": 0.5895, "step": 167905 }, { "epoch": 1.86, "learning_rate": 1.901297723447604e-05, "loss": 0.6311, "step": 167910 }, { "epoch": 1.86, "learning_rate": 1.9012054507337527e-05, "loss": 0.5786, "step": 167915 }, { "epoch": 1.86, "learning_rate": 1.9011131780199015e-05, "loss": 0.6511, "step": 167920 }, { "epoch": 1.86, "learning_rate": 1.9010209053060503e-05, "loss": 0.6102, "step": 167925 }, { "epoch": 1.86, "learning_rate": 1.900928632592199e-05, "loss": 0.5918, "step": 167930 }, { "epoch": 1.86, "learning_rate": 1.9008363598783475e-05, "loss": 0.611, "step": 167935 }, { "epoch": 1.86, "learning_rate": 1.9007440871644967e-05, "loss": 0.6754, "step": 167940 }, { "epoch": 1.86, "learning_rate": 1.900651814450645e-05, "loss": 0.6739, "step": 167945 }, { "epoch": 1.86, "learning_rate": 1.9005595417367942e-05, "loss": 0.6661, "step": 167950 }, { "epoch": 1.86, "learning_rate": 1.9004672690229427e-05, "loss": 0.6203, "step": 167955 }, { "epoch": 1.86, "learning_rate": 1.9003749963090918e-05, "loss": 0.5934, "step": 167960 }, { "epoch": 1.86, "learning_rate": 1.9002827235952402e-05, "loss": 0.6341, "step": 167965 }, { "epoch": 1.86, "learning_rate": 1.900190450881389e-05, "loss": 0.6131, "step": 167970 }, { "epoch": 1.86, "learning_rate": 1.9000981781675378e-05, "loss": 0.6113, "step": 167975 }, { "epoch": 1.86, "learning_rate": 1.9000059054536866e-05, "loss": 0.6366, "step": 167980 }, { "epoch": 1.86, "learning_rate": 1.8999136327398354e-05, "loss": 0.5704, "step": 167985 }, { "epoch": 1.86, "learning_rate": 1.8998213600259838e-05, "loss": 0.6186, "step": 167990 }, { "epoch": 1.86, "learning_rate": 1.899729087312133e-05, "loss": 0.6417, "step": 167995 }, { "epoch": 1.86, "learning_rate": 1.8996368145982814e-05, "loss": 0.5923, "step": 168000 }, { "epoch": 1.86, "eval_loss": 0.6017884612083435, "eval_runtime": 69.1971, "eval_samples_per_second": 28.903, "eval_steps_per_second": 14.451, "step": 168000 }, { "epoch": 1.86, "learning_rate": 1.8995445418844305e-05, "loss": 0.659, "step": 168005 }, { "epoch": 1.86, "learning_rate": 1.899452269170579e-05, "loss": 0.6527, "step": 168010 }, { "epoch": 1.86, "learning_rate": 1.899359996456728e-05, "loss": 0.6663, "step": 168015 }, { "epoch": 1.86, "learning_rate": 1.8992677237428765e-05, "loss": 0.6266, "step": 168020 }, { "epoch": 1.86, "learning_rate": 1.8991754510290257e-05, "loss": 0.6097, "step": 168025 }, { "epoch": 1.86, "learning_rate": 1.899083178315174e-05, "loss": 0.6881, "step": 168030 }, { "epoch": 1.86, "learning_rate": 1.898990905601323e-05, "loss": 0.6128, "step": 168035 }, { "epoch": 1.86, "learning_rate": 1.8988986328874717e-05, "loss": 0.6217, "step": 168040 }, { "epoch": 1.86, "learning_rate": 1.8988063601736205e-05, "loss": 0.6335, "step": 168045 }, { "epoch": 1.86, "learning_rate": 1.8987140874597692e-05, "loss": 0.6396, "step": 168050 }, { "epoch": 1.86, "learning_rate": 1.8986218147459177e-05, "loss": 0.626, "step": 168055 }, { "epoch": 1.86, "learning_rate": 1.8985295420320668e-05, "loss": 0.5512, "step": 168060 }, { "epoch": 1.86, "learning_rate": 1.8984372693182153e-05, "loss": 0.6293, "step": 168065 }, { "epoch": 1.86, "learning_rate": 1.8983449966043644e-05, "loss": 0.6163, "step": 168070 }, { "epoch": 1.86, "learning_rate": 1.8982527238905128e-05, "loss": 0.5809, "step": 168075 }, { "epoch": 1.86, "learning_rate": 1.898160451176662e-05, "loss": 0.5606, "step": 168080 }, { "epoch": 1.86, "learning_rate": 1.8980681784628104e-05, "loss": 0.5656, "step": 168085 }, { "epoch": 1.86, "learning_rate": 1.8979759057489592e-05, "loss": 0.6655, "step": 168090 }, { "epoch": 1.86, "learning_rate": 1.897883633035108e-05, "loss": 0.5933, "step": 168095 }, { "epoch": 1.86, "learning_rate": 1.8977913603212567e-05, "loss": 0.6026, "step": 168100 }, { "epoch": 1.86, "learning_rate": 1.8976990876074055e-05, "loss": 0.6228, "step": 168105 }, { "epoch": 1.86, "learning_rate": 1.8976068148935543e-05, "loss": 0.5877, "step": 168110 }, { "epoch": 1.86, "learning_rate": 1.897514542179703e-05, "loss": 0.6235, "step": 168115 }, { "epoch": 1.86, "learning_rate": 1.897422269465852e-05, "loss": 0.5773, "step": 168120 }, { "epoch": 1.86, "learning_rate": 1.8973299967520007e-05, "loss": 0.6148, "step": 168125 }, { "epoch": 1.86, "learning_rate": 1.897237724038149e-05, "loss": 0.6522, "step": 168130 }, { "epoch": 1.86, "learning_rate": 1.8971454513242982e-05, "loss": 0.5593, "step": 168135 }, { "epoch": 1.86, "learning_rate": 1.8970531786104467e-05, "loss": 0.606, "step": 168140 }, { "epoch": 1.86, "learning_rate": 1.8969609058965955e-05, "loss": 0.601, "step": 168145 }, { "epoch": 1.86, "learning_rate": 1.8968686331827443e-05, "loss": 0.575, "step": 168150 }, { "epoch": 1.86, "learning_rate": 1.896776360468893e-05, "loss": 0.6231, "step": 168155 }, { "epoch": 1.86, "learning_rate": 1.8966840877550418e-05, "loss": 0.5953, "step": 168160 }, { "epoch": 1.86, "learning_rate": 1.8965918150411906e-05, "loss": 0.5949, "step": 168165 }, { "epoch": 1.86, "learning_rate": 1.8964995423273394e-05, "loss": 0.6339, "step": 168170 }, { "epoch": 1.86, "learning_rate": 1.8964072696134882e-05, "loss": 0.6213, "step": 168175 }, { "epoch": 1.86, "learning_rate": 1.896314996899637e-05, "loss": 0.5512, "step": 168180 }, { "epoch": 1.86, "learning_rate": 1.8962227241857858e-05, "loss": 0.6125, "step": 168185 }, { "epoch": 1.86, "learning_rate": 1.8961304514719345e-05, "loss": 0.6199, "step": 168190 }, { "epoch": 1.86, "learning_rate": 1.8960381787580833e-05, "loss": 0.6034, "step": 168195 }, { "epoch": 1.86, "learning_rate": 1.895945906044232e-05, "loss": 0.6642, "step": 168200 }, { "epoch": 1.86, "learning_rate": 1.8958536333303806e-05, "loss": 0.5851, "step": 168205 }, { "epoch": 1.86, "learning_rate": 1.8957613606165293e-05, "loss": 0.6251, "step": 168210 }, { "epoch": 1.86, "learning_rate": 1.895669087902678e-05, "loss": 0.6415, "step": 168215 }, { "epoch": 1.86, "learning_rate": 1.895576815188827e-05, "loss": 0.6081, "step": 168220 }, { "epoch": 1.86, "learning_rate": 1.8954845424749757e-05, "loss": 0.5947, "step": 168225 }, { "epoch": 1.86, "learning_rate": 1.8953922697611245e-05, "loss": 0.6307, "step": 168230 }, { "epoch": 1.86, "learning_rate": 1.8952999970472733e-05, "loss": 0.6229, "step": 168235 }, { "epoch": 1.86, "learning_rate": 1.895207724333422e-05, "loss": 0.6117, "step": 168240 }, { "epoch": 1.86, "learning_rate": 1.895115451619571e-05, "loss": 0.6197, "step": 168245 }, { "epoch": 1.86, "learning_rate": 1.8950231789057196e-05, "loss": 0.6281, "step": 168250 }, { "epoch": 1.86, "learning_rate": 1.8949309061918684e-05, "loss": 0.6659, "step": 168255 }, { "epoch": 1.86, "learning_rate": 1.8948386334780172e-05, "loss": 0.5968, "step": 168260 }, { "epoch": 1.86, "learning_rate": 1.8947463607641656e-05, "loss": 0.6457, "step": 168265 }, { "epoch": 1.86, "learning_rate": 1.8946540880503148e-05, "loss": 0.6352, "step": 168270 }, { "epoch": 1.86, "learning_rate": 1.8945618153364632e-05, "loss": 0.6366, "step": 168275 }, { "epoch": 1.86, "learning_rate": 1.894469542622612e-05, "loss": 0.5603, "step": 168280 }, { "epoch": 1.86, "learning_rate": 1.8943772699087608e-05, "loss": 0.5414, "step": 168285 }, { "epoch": 1.86, "learning_rate": 1.8942849971949096e-05, "loss": 0.6354, "step": 168290 }, { "epoch": 1.86, "learning_rate": 1.8941927244810583e-05, "loss": 0.5544, "step": 168295 }, { "epoch": 1.86, "learning_rate": 1.894100451767207e-05, "loss": 0.608, "step": 168300 }, { "epoch": 1.86, "learning_rate": 1.894008179053356e-05, "loss": 0.6484, "step": 168305 }, { "epoch": 1.86, "learning_rate": 1.8939159063395047e-05, "loss": 0.6033, "step": 168310 }, { "epoch": 1.86, "learning_rate": 1.8938236336256535e-05, "loss": 0.6169, "step": 168315 }, { "epoch": 1.86, "learning_rate": 1.893731360911802e-05, "loss": 0.6536, "step": 168320 }, { "epoch": 1.86, "learning_rate": 1.893639088197951e-05, "loss": 0.5995, "step": 168325 }, { "epoch": 1.86, "learning_rate": 1.8935468154840995e-05, "loss": 0.6392, "step": 168330 }, { "epoch": 1.86, "learning_rate": 1.8934545427702486e-05, "loss": 0.6154, "step": 168335 }, { "epoch": 1.86, "learning_rate": 1.893362270056397e-05, "loss": 0.6394, "step": 168340 }, { "epoch": 1.86, "learning_rate": 1.8932699973425462e-05, "loss": 0.6021, "step": 168345 }, { "epoch": 1.86, "learning_rate": 1.8931777246286946e-05, "loss": 0.6568, "step": 168350 }, { "epoch": 1.86, "learning_rate": 1.8930854519148434e-05, "loss": 0.5837, "step": 168355 }, { "epoch": 1.86, "learning_rate": 1.8929931792009922e-05, "loss": 0.6416, "step": 168360 }, { "epoch": 1.86, "learning_rate": 1.892900906487141e-05, "loss": 0.6345, "step": 168365 }, { "epoch": 1.86, "learning_rate": 1.8928086337732898e-05, "loss": 0.6266, "step": 168370 }, { "epoch": 1.86, "learning_rate": 1.8927163610594382e-05, "loss": 0.5842, "step": 168375 }, { "epoch": 1.86, "learning_rate": 1.8926240883455873e-05, "loss": 0.6368, "step": 168380 }, { "epoch": 1.86, "learning_rate": 1.8925318156317358e-05, "loss": 0.5814, "step": 168385 }, { "epoch": 1.86, "learning_rate": 1.892439542917885e-05, "loss": 0.6494, "step": 168390 }, { "epoch": 1.86, "learning_rate": 1.8923472702040334e-05, "loss": 0.5536, "step": 168395 }, { "epoch": 1.86, "learning_rate": 1.8922549974901825e-05, "loss": 0.6053, "step": 168400 }, { "epoch": 1.86, "learning_rate": 1.892162724776331e-05, "loss": 0.6358, "step": 168405 }, { "epoch": 1.86, "learning_rate": 1.89207045206248e-05, "loss": 0.5906, "step": 168410 }, { "epoch": 1.86, "learning_rate": 1.8919781793486285e-05, "loss": 0.6284, "step": 168415 }, { "epoch": 1.86, "learning_rate": 1.8918859066347773e-05, "loss": 0.5739, "step": 168420 }, { "epoch": 1.86, "learning_rate": 1.891793633920926e-05, "loss": 0.6039, "step": 168425 }, { "epoch": 1.86, "learning_rate": 1.891701361207075e-05, "loss": 0.6417, "step": 168430 }, { "epoch": 1.87, "learning_rate": 1.8916090884932236e-05, "loss": 0.644, "step": 168435 }, { "epoch": 1.87, "learning_rate": 1.891516815779372e-05, "loss": 0.6341, "step": 168440 }, { "epoch": 1.87, "learning_rate": 1.8914245430655212e-05, "loss": 0.6332, "step": 168445 }, { "epoch": 1.87, "learning_rate": 1.8913322703516697e-05, "loss": 0.6605, "step": 168450 }, { "epoch": 1.87, "learning_rate": 1.8912399976378188e-05, "loss": 0.6201, "step": 168455 }, { "epoch": 1.87, "learning_rate": 1.8911477249239672e-05, "loss": 0.6628, "step": 168460 }, { "epoch": 1.87, "learning_rate": 1.8910554522101164e-05, "loss": 0.6076, "step": 168465 }, { "epoch": 1.87, "learning_rate": 1.8909631794962648e-05, "loss": 0.5883, "step": 168470 }, { "epoch": 1.87, "learning_rate": 1.8908709067824136e-05, "loss": 0.6404, "step": 168475 }, { "epoch": 1.87, "learning_rate": 1.8907786340685624e-05, "loss": 0.581, "step": 168480 }, { "epoch": 1.87, "learning_rate": 1.890686361354711e-05, "loss": 0.651, "step": 168485 }, { "epoch": 1.87, "learning_rate": 1.89059408864086e-05, "loss": 0.6399, "step": 168490 }, { "epoch": 1.87, "learning_rate": 1.8905018159270087e-05, "loss": 0.6529, "step": 168495 }, { "epoch": 1.87, "learning_rate": 1.8904095432131575e-05, "loss": 0.6273, "step": 168500 }, { "epoch": 1.87, "learning_rate": 1.890317270499306e-05, "loss": 0.6435, "step": 168505 }, { "epoch": 1.87, "learning_rate": 1.890224997785455e-05, "loss": 0.6708, "step": 168510 }, { "epoch": 1.87, "learning_rate": 1.8901327250716035e-05, "loss": 0.6219, "step": 168515 }, { "epoch": 1.87, "learning_rate": 1.8900404523577526e-05, "loss": 0.6408, "step": 168520 }, { "epoch": 1.87, "learning_rate": 1.889948179643901e-05, "loss": 0.6249, "step": 168525 }, { "epoch": 1.87, "learning_rate": 1.8898559069300502e-05, "loss": 0.6139, "step": 168530 }, { "epoch": 1.87, "learning_rate": 1.8897636342161987e-05, "loss": 0.6528, "step": 168535 }, { "epoch": 1.87, "learning_rate": 1.8896713615023474e-05, "loss": 0.5771, "step": 168540 }, { "epoch": 1.87, "learning_rate": 1.8895790887884962e-05, "loss": 0.6127, "step": 168545 }, { "epoch": 1.87, "learning_rate": 1.889486816074645e-05, "loss": 0.6246, "step": 168550 }, { "epoch": 1.87, "learning_rate": 1.8893945433607938e-05, "loss": 0.641, "step": 168555 }, { "epoch": 1.87, "learning_rate": 1.8893022706469426e-05, "loss": 0.588, "step": 168560 }, { "epoch": 1.87, "learning_rate": 1.8892099979330914e-05, "loss": 0.6267, "step": 168565 }, { "epoch": 1.87, "learning_rate": 1.88911772521924e-05, "loss": 0.5787, "step": 168570 }, { "epoch": 1.87, "learning_rate": 1.889025452505389e-05, "loss": 0.643, "step": 168575 }, { "epoch": 1.87, "learning_rate": 1.8889331797915374e-05, "loss": 0.6202, "step": 168580 }, { "epoch": 1.87, "learning_rate": 1.8888409070776865e-05, "loss": 0.6658, "step": 168585 }, { "epoch": 1.87, "learning_rate": 1.888748634363835e-05, "loss": 0.6422, "step": 168590 }, { "epoch": 1.87, "learning_rate": 1.8886563616499837e-05, "loss": 0.6509, "step": 168595 }, { "epoch": 1.87, "learning_rate": 1.8885640889361325e-05, "loss": 0.6175, "step": 168600 }, { "epoch": 1.87, "learning_rate": 1.8884718162222813e-05, "loss": 0.6472, "step": 168605 }, { "epoch": 1.87, "learning_rate": 1.88837954350843e-05, "loss": 0.6567, "step": 168610 }, { "epoch": 1.87, "learning_rate": 1.888287270794579e-05, "loss": 0.6437, "step": 168615 }, { "epoch": 1.87, "learning_rate": 1.8881949980807277e-05, "loss": 0.6407, "step": 168620 }, { "epoch": 1.87, "learning_rate": 1.8881027253668765e-05, "loss": 0.6208, "step": 168625 }, { "epoch": 1.87, "learning_rate": 1.8880104526530252e-05, "loss": 0.5701, "step": 168630 }, { "epoch": 1.87, "learning_rate": 1.887918179939174e-05, "loss": 0.6247, "step": 168635 }, { "epoch": 1.87, "learning_rate": 1.8878259072253228e-05, "loss": 0.6194, "step": 168640 }, { "epoch": 1.87, "learning_rate": 1.8877336345114716e-05, "loss": 0.6339, "step": 168645 }, { "epoch": 1.87, "learning_rate": 1.88764136179762e-05, "loss": 0.6044, "step": 168650 }, { "epoch": 1.87, "learning_rate": 1.8875490890837688e-05, "loss": 0.6262, "step": 168655 }, { "epoch": 1.87, "learning_rate": 1.8874568163699176e-05, "loss": 0.618, "step": 168660 }, { "epoch": 1.87, "learning_rate": 1.8873645436560664e-05, "loss": 0.6148, "step": 168665 }, { "epoch": 1.87, "learning_rate": 1.8872722709422152e-05, "loss": 0.6305, "step": 168670 }, { "epoch": 1.87, "learning_rate": 1.887179998228364e-05, "loss": 0.6236, "step": 168675 }, { "epoch": 1.87, "learning_rate": 1.8870877255145127e-05, "loss": 0.5796, "step": 168680 }, { "epoch": 1.87, "learning_rate": 1.8869954528006615e-05, "loss": 0.6797, "step": 168685 }, { "epoch": 1.87, "learning_rate": 1.8869031800868103e-05, "loss": 0.6371, "step": 168690 }, { "epoch": 1.87, "learning_rate": 1.886810907372959e-05, "loss": 0.5717, "step": 168695 }, { "epoch": 1.87, "learning_rate": 1.886718634659108e-05, "loss": 0.6139, "step": 168700 }, { "epoch": 1.87, "learning_rate": 1.8866263619452563e-05, "loss": 0.6024, "step": 168705 }, { "epoch": 1.87, "learning_rate": 1.8865340892314055e-05, "loss": 0.6207, "step": 168710 }, { "epoch": 1.87, "learning_rate": 1.886441816517554e-05, "loss": 0.636, "step": 168715 }, { "epoch": 1.87, "learning_rate": 1.886349543803703e-05, "loss": 0.6264, "step": 168720 }, { "epoch": 1.87, "learning_rate": 1.8862572710898515e-05, "loss": 0.5023, "step": 168725 }, { "epoch": 1.87, "learning_rate": 1.8861649983760003e-05, "loss": 0.6454, "step": 168730 }, { "epoch": 1.87, "learning_rate": 1.886072725662149e-05, "loss": 0.5603, "step": 168735 }, { "epoch": 1.87, "learning_rate": 1.8859804529482978e-05, "loss": 0.5986, "step": 168740 }, { "epoch": 1.87, "learning_rate": 1.8858881802344466e-05, "loss": 0.6528, "step": 168745 }, { "epoch": 1.87, "learning_rate": 1.8857959075205954e-05, "loss": 0.5645, "step": 168750 }, { "epoch": 1.87, "learning_rate": 1.8857036348067442e-05, "loss": 0.7012, "step": 168755 }, { "epoch": 1.87, "learning_rate": 1.885611362092893e-05, "loss": 0.5845, "step": 168760 }, { "epoch": 1.87, "learning_rate": 1.8855190893790417e-05, "loss": 0.6409, "step": 168765 }, { "epoch": 1.87, "learning_rate": 1.8854268166651902e-05, "loss": 0.6049, "step": 168770 }, { "epoch": 1.87, "learning_rate": 1.8853345439513393e-05, "loss": 0.6458, "step": 168775 }, { "epoch": 1.87, "learning_rate": 1.8852422712374878e-05, "loss": 0.6505, "step": 168780 }, { "epoch": 1.87, "learning_rate": 1.885149998523637e-05, "loss": 0.6369, "step": 168785 }, { "epoch": 1.87, "learning_rate": 1.8850577258097853e-05, "loss": 0.587, "step": 168790 }, { "epoch": 1.87, "learning_rate": 1.8849654530959345e-05, "loss": 0.6306, "step": 168795 }, { "epoch": 1.87, "learning_rate": 1.884873180382083e-05, "loss": 0.6325, "step": 168800 }, { "epoch": 1.87, "learning_rate": 1.8847809076682317e-05, "loss": 0.5401, "step": 168805 }, { "epoch": 1.87, "learning_rate": 1.8846886349543805e-05, "loss": 0.5868, "step": 168810 }, { "epoch": 1.87, "learning_rate": 1.8845963622405293e-05, "loss": 0.5859, "step": 168815 }, { "epoch": 1.87, "learning_rate": 1.884504089526678e-05, "loss": 0.6149, "step": 168820 }, { "epoch": 1.87, "learning_rate": 1.8844118168128265e-05, "loss": 0.6404, "step": 168825 }, { "epoch": 1.87, "learning_rate": 1.8843195440989756e-05, "loss": 0.612, "step": 168830 }, { "epoch": 1.87, "learning_rate": 1.884227271385124e-05, "loss": 0.6067, "step": 168835 }, { "epoch": 1.87, "learning_rate": 1.8841349986712732e-05, "loss": 0.6276, "step": 168840 }, { "epoch": 1.87, "learning_rate": 1.8840427259574216e-05, "loss": 0.5493, "step": 168845 }, { "epoch": 1.87, "learning_rate": 1.8839504532435708e-05, "loss": 0.587, "step": 168850 }, { "epoch": 1.87, "learning_rate": 1.8838581805297192e-05, "loss": 0.5608, "step": 168855 }, { "epoch": 1.87, "learning_rate": 1.883765907815868e-05, "loss": 0.5749, "step": 168860 }, { "epoch": 1.87, "learning_rate": 1.8836736351020168e-05, "loss": 0.6017, "step": 168865 }, { "epoch": 1.87, "learning_rate": 1.8835813623881656e-05, "loss": 0.6733, "step": 168870 }, { "epoch": 1.87, "learning_rate": 1.8834890896743143e-05, "loss": 0.6019, "step": 168875 }, { "epoch": 1.87, "learning_rate": 1.8833968169604628e-05, "loss": 0.618, "step": 168880 }, { "epoch": 1.87, "learning_rate": 1.883304544246612e-05, "loss": 0.6609, "step": 168885 }, { "epoch": 1.87, "learning_rate": 1.8832122715327604e-05, "loss": 0.6766, "step": 168890 }, { "epoch": 1.87, "learning_rate": 1.8831199988189095e-05, "loss": 0.6771, "step": 168895 }, { "epoch": 1.87, "learning_rate": 1.883027726105058e-05, "loss": 0.6174, "step": 168900 }, { "epoch": 1.87, "learning_rate": 1.882935453391207e-05, "loss": 0.5919, "step": 168905 }, { "epoch": 1.87, "learning_rate": 1.8828431806773555e-05, "loss": 0.5907, "step": 168910 }, { "epoch": 1.87, "learning_rate": 1.8827509079635046e-05, "loss": 0.6405, "step": 168915 }, { "epoch": 1.87, "learning_rate": 1.882658635249653e-05, "loss": 0.6346, "step": 168920 }, { "epoch": 1.87, "learning_rate": 1.882566362535802e-05, "loss": 0.5959, "step": 168925 }, { "epoch": 1.87, "learning_rate": 1.8824740898219506e-05, "loss": 0.6084, "step": 168930 }, { "epoch": 1.87, "learning_rate": 1.8823818171080994e-05, "loss": 0.5881, "step": 168935 }, { "epoch": 1.87, "learning_rate": 1.8822895443942482e-05, "loss": 0.577, "step": 168940 }, { "epoch": 1.87, "learning_rate": 1.882197271680397e-05, "loss": 0.6247, "step": 168945 }, { "epoch": 1.87, "learning_rate": 1.8821049989665458e-05, "loss": 0.5906, "step": 168950 }, { "epoch": 1.87, "learning_rate": 1.8820127262526946e-05, "loss": 0.6034, "step": 168955 }, { "epoch": 1.87, "learning_rate": 1.8819204535388433e-05, "loss": 0.5627, "step": 168960 }, { "epoch": 1.87, "learning_rate": 1.8818281808249918e-05, "loss": 0.6115, "step": 168965 }, { "epoch": 1.87, "learning_rate": 1.881735908111141e-05, "loss": 0.6872, "step": 168970 }, { "epoch": 1.87, "learning_rate": 1.8816436353972894e-05, "loss": 0.5813, "step": 168975 }, { "epoch": 1.87, "learning_rate": 1.881551362683438e-05, "loss": 0.6478, "step": 168980 }, { "epoch": 1.87, "learning_rate": 1.881459089969587e-05, "loss": 0.6249, "step": 168985 }, { "epoch": 1.87, "learning_rate": 1.8813668172557357e-05, "loss": 0.6247, "step": 168990 }, { "epoch": 1.87, "learning_rate": 1.8812745445418845e-05, "loss": 0.6074, "step": 168995 }, { "epoch": 1.87, "learning_rate": 1.8811822718280333e-05, "loss": 0.5894, "step": 169000 }, { "epoch": 1.87, "eval_loss": 0.5912185311317444, "eval_runtime": 69.3994, "eval_samples_per_second": 28.819, "eval_steps_per_second": 14.409, "step": 169000 }, { "epoch": 1.87, "learning_rate": 1.881089999114182e-05, "loss": 0.6196, "step": 169005 }, { "epoch": 1.87, "learning_rate": 1.880997726400331e-05, "loss": 0.6005, "step": 169010 }, { "epoch": 1.87, "learning_rate": 1.8809054536864796e-05, "loss": 0.6462, "step": 169015 }, { "epoch": 1.87, "learning_rate": 1.8808131809726284e-05, "loss": 0.6605, "step": 169020 }, { "epoch": 1.87, "learning_rate": 1.8807209082587772e-05, "loss": 0.5926, "step": 169025 }, { "epoch": 1.87, "learning_rate": 1.880628635544926e-05, "loss": 0.6269, "step": 169030 }, { "epoch": 1.87, "learning_rate": 1.8805363628310744e-05, "loss": 0.5983, "step": 169035 }, { "epoch": 1.87, "learning_rate": 1.8804440901172232e-05, "loss": 0.6194, "step": 169040 }, { "epoch": 1.87, "learning_rate": 1.880351817403372e-05, "loss": 0.6232, "step": 169045 }, { "epoch": 1.87, "learning_rate": 1.8802595446895208e-05, "loss": 0.6356, "step": 169050 }, { "epoch": 1.87, "learning_rate": 1.8801672719756696e-05, "loss": 0.6273, "step": 169055 }, { "epoch": 1.87, "learning_rate": 1.8800749992618184e-05, "loss": 0.6016, "step": 169060 }, { "epoch": 1.87, "learning_rate": 1.879982726547967e-05, "loss": 0.6884, "step": 169065 }, { "epoch": 1.87, "learning_rate": 1.879890453834116e-05, "loss": 0.6047, "step": 169070 }, { "epoch": 1.87, "learning_rate": 1.8797981811202647e-05, "loss": 0.6061, "step": 169075 }, { "epoch": 1.87, "learning_rate": 1.8797059084064135e-05, "loss": 0.6244, "step": 169080 }, { "epoch": 1.87, "learning_rate": 1.8796136356925623e-05, "loss": 0.6025, "step": 169085 }, { "epoch": 1.87, "learning_rate": 1.8795213629787107e-05, "loss": 0.6076, "step": 169090 }, { "epoch": 1.87, "learning_rate": 1.87942909026486e-05, "loss": 0.6461, "step": 169095 }, { "epoch": 1.87, "learning_rate": 1.8793368175510083e-05, "loss": 0.581, "step": 169100 }, { "epoch": 1.87, "learning_rate": 1.8792445448371574e-05, "loss": 0.708, "step": 169105 }, { "epoch": 1.87, "learning_rate": 1.879152272123306e-05, "loss": 0.5782, "step": 169110 }, { "epoch": 1.87, "learning_rate": 1.8790599994094547e-05, "loss": 0.5839, "step": 169115 }, { "epoch": 1.87, "learning_rate": 1.8789677266956034e-05, "loss": 0.6544, "step": 169120 }, { "epoch": 1.87, "learning_rate": 1.8788754539817522e-05, "loss": 0.5949, "step": 169125 }, { "epoch": 1.87, "learning_rate": 1.878783181267901e-05, "loss": 0.598, "step": 169130 }, { "epoch": 1.87, "learning_rate": 1.8786909085540498e-05, "loss": 0.6428, "step": 169135 }, { "epoch": 1.87, "learning_rate": 1.8785986358401986e-05, "loss": 0.5818, "step": 169140 }, { "epoch": 1.87, "learning_rate": 1.8785063631263474e-05, "loss": 0.6079, "step": 169145 }, { "epoch": 1.87, "learning_rate": 1.878414090412496e-05, "loss": 0.592, "step": 169150 }, { "epoch": 1.87, "learning_rate": 1.8783218176986446e-05, "loss": 0.6141, "step": 169155 }, { "epoch": 1.87, "learning_rate": 1.8782295449847937e-05, "loss": 0.6096, "step": 169160 }, { "epoch": 1.87, "learning_rate": 1.878137272270942e-05, "loss": 0.6064, "step": 169165 }, { "epoch": 1.87, "learning_rate": 1.8780449995570913e-05, "loss": 0.6667, "step": 169170 }, { "epoch": 1.87, "learning_rate": 1.8779527268432397e-05, "loss": 0.6205, "step": 169175 }, { "epoch": 1.87, "learning_rate": 1.877860454129389e-05, "loss": 0.6181, "step": 169180 }, { "epoch": 1.87, "learning_rate": 1.8777681814155373e-05, "loss": 0.5908, "step": 169185 }, { "epoch": 1.87, "learning_rate": 1.877675908701686e-05, "loss": 0.6217, "step": 169190 }, { "epoch": 1.87, "learning_rate": 1.877583635987835e-05, "loss": 0.6361, "step": 169195 }, { "epoch": 1.87, "learning_rate": 1.8774913632739837e-05, "loss": 0.6721, "step": 169200 }, { "epoch": 1.87, "learning_rate": 1.8773990905601324e-05, "loss": 0.6571, "step": 169205 }, { "epoch": 1.87, "learning_rate": 1.877306817846281e-05, "loss": 0.6153, "step": 169210 }, { "epoch": 1.87, "learning_rate": 1.87721454513243e-05, "loss": 0.6349, "step": 169215 }, { "epoch": 1.87, "learning_rate": 1.8771222724185785e-05, "loss": 0.5961, "step": 169220 }, { "epoch": 1.87, "learning_rate": 1.8770299997047276e-05, "loss": 0.6471, "step": 169225 }, { "epoch": 1.87, "learning_rate": 1.876937726990876e-05, "loss": 0.6171, "step": 169230 }, { "epoch": 1.87, "learning_rate": 1.876845454277025e-05, "loss": 0.6209, "step": 169235 }, { "epoch": 1.87, "learning_rate": 1.8767531815631736e-05, "loss": 0.5953, "step": 169240 }, { "epoch": 1.87, "learning_rate": 1.8766609088493224e-05, "loss": 0.5873, "step": 169245 }, { "epoch": 1.87, "learning_rate": 1.8765686361354712e-05, "loss": 0.588, "step": 169250 }, { "epoch": 1.87, "learning_rate": 1.87647636342162e-05, "loss": 0.6178, "step": 169255 }, { "epoch": 1.87, "learning_rate": 1.8763840907077687e-05, "loss": 0.589, "step": 169260 }, { "epoch": 1.87, "learning_rate": 1.8762918179939172e-05, "loss": 0.5979, "step": 169265 }, { "epoch": 1.87, "learning_rate": 1.8761995452800663e-05, "loss": 0.5918, "step": 169270 }, { "epoch": 1.87, "learning_rate": 1.8761072725662148e-05, "loss": 0.5951, "step": 169275 }, { "epoch": 1.87, "learning_rate": 1.876014999852364e-05, "loss": 0.6395, "step": 169280 }, { "epoch": 1.87, "learning_rate": 1.8759227271385123e-05, "loss": 0.6246, "step": 169285 }, { "epoch": 1.87, "learning_rate": 1.8758304544246614e-05, "loss": 0.6069, "step": 169290 }, { "epoch": 1.87, "learning_rate": 1.87573818171081e-05, "loss": 0.593, "step": 169295 }, { "epoch": 1.87, "learning_rate": 1.875645908996959e-05, "loss": 0.5912, "step": 169300 }, { "epoch": 1.87, "learning_rate": 1.8755536362831075e-05, "loss": 0.6311, "step": 169305 }, { "epoch": 1.87, "learning_rate": 1.8754613635692563e-05, "loss": 0.5943, "step": 169310 }, { "epoch": 1.87, "learning_rate": 1.875369090855405e-05, "loss": 0.6369, "step": 169315 }, { "epoch": 1.87, "learning_rate": 1.8752768181415538e-05, "loss": 0.6299, "step": 169320 }, { "epoch": 1.87, "learning_rate": 1.8751845454277026e-05, "loss": 0.6297, "step": 169325 }, { "epoch": 1.87, "learning_rate": 1.8750922727138514e-05, "loss": 0.6706, "step": 169330 }, { "epoch": 1.87, "learning_rate": 1.8750000000000002e-05, "loss": 0.5757, "step": 169335 }, { "epoch": 1.88, "learning_rate": 1.8749077272861486e-05, "loss": 0.6335, "step": 169340 }, { "epoch": 1.88, "learning_rate": 1.8748154545722977e-05, "loss": 0.6644, "step": 169345 }, { "epoch": 1.88, "learning_rate": 1.8747231818584462e-05, "loss": 0.6144, "step": 169350 }, { "epoch": 1.88, "learning_rate": 1.8746309091445953e-05, "loss": 0.6517, "step": 169355 }, { "epoch": 1.88, "learning_rate": 1.8745386364307438e-05, "loss": 0.5833, "step": 169360 }, { "epoch": 1.88, "learning_rate": 1.8744463637168925e-05, "loss": 0.6385, "step": 169365 }, { "epoch": 1.88, "learning_rate": 1.8743540910030413e-05, "loss": 0.5745, "step": 169370 }, { "epoch": 1.88, "learning_rate": 1.87426181828919e-05, "loss": 0.5982, "step": 169375 }, { "epoch": 1.88, "learning_rate": 1.874169545575339e-05, "loss": 0.6203, "step": 169380 }, { "epoch": 1.88, "learning_rate": 1.8740772728614877e-05, "loss": 0.6057, "step": 169385 }, { "epoch": 1.88, "learning_rate": 1.8739850001476365e-05, "loss": 0.664, "step": 169390 }, { "epoch": 1.88, "learning_rate": 1.8738927274337853e-05, "loss": 0.6097, "step": 169395 }, { "epoch": 1.88, "learning_rate": 1.873800454719934e-05, "loss": 0.5966, "step": 169400 }, { "epoch": 1.88, "learning_rate": 1.8737081820060828e-05, "loss": 0.6414, "step": 169405 }, { "epoch": 1.88, "learning_rate": 1.8736159092922316e-05, "loss": 0.619, "step": 169410 }, { "epoch": 1.88, "learning_rate": 1.87352363657838e-05, "loss": 0.6348, "step": 169415 }, { "epoch": 1.88, "learning_rate": 1.873431363864529e-05, "loss": 0.6229, "step": 169420 }, { "epoch": 1.88, "learning_rate": 1.8733390911506776e-05, "loss": 0.652, "step": 169425 }, { "epoch": 1.88, "learning_rate": 1.8732468184368264e-05, "loss": 0.6664, "step": 169430 }, { "epoch": 1.88, "learning_rate": 1.8731545457229752e-05, "loss": 0.6265, "step": 169435 }, { "epoch": 1.88, "learning_rate": 1.873062273009124e-05, "loss": 0.5987, "step": 169440 }, { "epoch": 1.88, "learning_rate": 1.8729700002952728e-05, "loss": 0.6017, "step": 169445 }, { "epoch": 1.88, "learning_rate": 1.8728777275814215e-05, "loss": 0.6129, "step": 169450 }, { "epoch": 1.88, "learning_rate": 1.8727854548675703e-05, "loss": 0.6304, "step": 169455 }, { "epoch": 1.88, "learning_rate": 1.872693182153719e-05, "loss": 0.6635, "step": 169460 }, { "epoch": 1.88, "learning_rate": 1.872600909439868e-05, "loss": 0.6061, "step": 169465 }, { "epoch": 1.88, "learning_rate": 1.8725086367260167e-05, "loss": 0.6035, "step": 169470 }, { "epoch": 1.88, "learning_rate": 1.872416364012165e-05, "loss": 0.6155, "step": 169475 }, { "epoch": 1.88, "learning_rate": 1.8723240912983143e-05, "loss": 0.6816, "step": 169480 }, { "epoch": 1.88, "learning_rate": 1.8722318185844627e-05, "loss": 0.5866, "step": 169485 }, { "epoch": 1.88, "learning_rate": 1.8721395458706115e-05, "loss": 0.6414, "step": 169490 }, { "epoch": 1.88, "learning_rate": 1.8720472731567603e-05, "loss": 0.5981, "step": 169495 }, { "epoch": 1.88, "learning_rate": 1.871955000442909e-05, "loss": 0.627, "step": 169500 }, { "epoch": 1.88, "learning_rate": 1.871862727729058e-05, "loss": 0.6075, "step": 169505 }, { "epoch": 1.88, "learning_rate": 1.8717704550152066e-05, "loss": 0.6093, "step": 169510 }, { "epoch": 1.88, "learning_rate": 1.8716781823013554e-05, "loss": 0.5435, "step": 169515 }, { "epoch": 1.88, "learning_rate": 1.8715859095875042e-05, "loss": 0.591, "step": 169520 }, { "epoch": 1.88, "learning_rate": 1.871493636873653e-05, "loss": 0.6425, "step": 169525 }, { "epoch": 1.88, "learning_rate": 1.8714013641598018e-05, "loss": 0.5784, "step": 169530 }, { "epoch": 1.88, "learning_rate": 1.8713090914459506e-05, "loss": 0.5933, "step": 169535 }, { "epoch": 1.88, "learning_rate": 1.871216818732099e-05, "loss": 0.5922, "step": 169540 }, { "epoch": 1.88, "learning_rate": 1.871124546018248e-05, "loss": 0.6643, "step": 169545 }, { "epoch": 1.88, "learning_rate": 1.8710322733043966e-05, "loss": 0.6545, "step": 169550 }, { "epoch": 1.88, "learning_rate": 1.8709400005905457e-05, "loss": 0.6058, "step": 169555 }, { "epoch": 1.88, "learning_rate": 1.870847727876694e-05, "loss": 0.6039, "step": 169560 }, { "epoch": 1.88, "learning_rate": 1.870755455162843e-05, "loss": 0.6422, "step": 169565 }, { "epoch": 1.88, "learning_rate": 1.8706631824489917e-05, "loss": 0.642, "step": 169570 }, { "epoch": 1.88, "learning_rate": 1.8705709097351405e-05, "loss": 0.6188, "step": 169575 }, { "epoch": 1.88, "learning_rate": 1.8704786370212893e-05, "loss": 0.6129, "step": 169580 }, { "epoch": 1.88, "learning_rate": 1.870386364307438e-05, "loss": 0.5954, "step": 169585 }, { "epoch": 1.88, "learning_rate": 1.870294091593587e-05, "loss": 0.5889, "step": 169590 }, { "epoch": 1.88, "learning_rate": 1.8702018188797353e-05, "loss": 0.6468, "step": 169595 }, { "epoch": 1.88, "learning_rate": 1.8701095461658844e-05, "loss": 0.6648, "step": 169600 }, { "epoch": 1.88, "learning_rate": 1.870017273452033e-05, "loss": 0.6237, "step": 169605 }, { "epoch": 1.88, "learning_rate": 1.869925000738182e-05, "loss": 0.5883, "step": 169610 }, { "epoch": 1.88, "learning_rate": 1.8698327280243304e-05, "loss": 0.609, "step": 169615 }, { "epoch": 1.88, "learning_rate": 1.8697404553104796e-05, "loss": 0.6173, "step": 169620 }, { "epoch": 1.88, "learning_rate": 1.869648182596628e-05, "loss": 0.5986, "step": 169625 }, { "epoch": 1.88, "learning_rate": 1.8695559098827768e-05, "loss": 0.618, "step": 169630 }, { "epoch": 1.88, "learning_rate": 1.8694636371689256e-05, "loss": 0.5912, "step": 169635 }, { "epoch": 1.88, "learning_rate": 1.8693713644550744e-05, "loss": 0.585, "step": 169640 }, { "epoch": 1.88, "learning_rate": 1.869279091741223e-05, "loss": 0.6025, "step": 169645 }, { "epoch": 1.88, "learning_rate": 1.8691868190273716e-05, "loss": 0.6497, "step": 169650 }, { "epoch": 1.88, "learning_rate": 1.8690945463135207e-05, "loss": 0.6355, "step": 169655 }, { "epoch": 1.88, "learning_rate": 1.869002273599669e-05, "loss": 0.6013, "step": 169660 }, { "epoch": 1.88, "learning_rate": 1.8689100008858183e-05, "loss": 0.5974, "step": 169665 }, { "epoch": 1.88, "learning_rate": 1.8688177281719667e-05, "loss": 0.5729, "step": 169670 }, { "epoch": 1.88, "learning_rate": 1.868725455458116e-05, "loss": 0.6303, "step": 169675 }, { "epoch": 1.88, "learning_rate": 1.8686331827442643e-05, "loss": 0.638, "step": 169680 }, { "epoch": 1.88, "learning_rate": 1.8685409100304134e-05, "loss": 0.5927, "step": 169685 }, { "epoch": 1.88, "learning_rate": 1.868448637316562e-05, "loss": 0.5971, "step": 169690 }, { "epoch": 1.88, "learning_rate": 1.8683563646027107e-05, "loss": 0.5959, "step": 169695 }, { "epoch": 1.88, "learning_rate": 1.8682640918888594e-05, "loss": 0.5978, "step": 169700 }, { "epoch": 1.88, "learning_rate": 1.8681718191750082e-05, "loss": 0.6161, "step": 169705 }, { "epoch": 1.88, "learning_rate": 1.868079546461157e-05, "loss": 0.6149, "step": 169710 }, { "epoch": 1.88, "learning_rate": 1.8679872737473055e-05, "loss": 0.5764, "step": 169715 }, { "epoch": 1.88, "learning_rate": 1.8678950010334546e-05, "loss": 0.6194, "step": 169720 }, { "epoch": 1.88, "learning_rate": 1.867802728319603e-05, "loss": 0.6473, "step": 169725 }, { "epoch": 1.88, "learning_rate": 1.867710455605752e-05, "loss": 0.6364, "step": 169730 }, { "epoch": 1.88, "learning_rate": 1.8676181828919006e-05, "loss": 0.6169, "step": 169735 }, { "epoch": 1.88, "learning_rate": 1.8675259101780497e-05, "loss": 0.5742, "step": 169740 }, { "epoch": 1.88, "learning_rate": 1.867433637464198e-05, "loss": 0.5955, "step": 169745 }, { "epoch": 1.88, "learning_rate": 1.867341364750347e-05, "loss": 0.5721, "step": 169750 }, { "epoch": 1.88, "learning_rate": 1.8672490920364957e-05, "loss": 0.6611, "step": 169755 }, { "epoch": 1.88, "learning_rate": 1.8671568193226445e-05, "loss": 0.655, "step": 169760 }, { "epoch": 1.88, "learning_rate": 1.8670645466087933e-05, "loss": 0.6274, "step": 169765 }, { "epoch": 1.88, "learning_rate": 1.866972273894942e-05, "loss": 0.5964, "step": 169770 }, { "epoch": 1.88, "learning_rate": 1.866880001181091e-05, "loss": 0.6333, "step": 169775 }, { "epoch": 1.88, "learning_rate": 1.8667877284672397e-05, "loss": 0.6118, "step": 169780 }, { "epoch": 1.88, "learning_rate": 1.8666954557533884e-05, "loss": 0.678, "step": 169785 }, { "epoch": 1.88, "learning_rate": 1.8666031830395372e-05, "loss": 0.5654, "step": 169790 }, { "epoch": 1.88, "learning_rate": 1.866510910325686e-05, "loss": 0.6515, "step": 169795 }, { "epoch": 1.88, "learning_rate": 1.8664186376118345e-05, "loss": 0.5942, "step": 169800 }, { "epoch": 1.88, "learning_rate": 1.8663263648979832e-05, "loss": 0.6485, "step": 169805 }, { "epoch": 1.88, "learning_rate": 1.866234092184132e-05, "loss": 0.6479, "step": 169810 }, { "epoch": 1.88, "learning_rate": 1.8661418194702808e-05, "loss": 0.6143, "step": 169815 }, { "epoch": 1.88, "learning_rate": 1.8660495467564296e-05, "loss": 0.5992, "step": 169820 }, { "epoch": 1.88, "learning_rate": 1.8659572740425784e-05, "loss": 0.6098, "step": 169825 }, { "epoch": 1.88, "learning_rate": 1.865865001328727e-05, "loss": 0.5768, "step": 169830 }, { "epoch": 1.88, "learning_rate": 1.865772728614876e-05, "loss": 0.6664, "step": 169835 }, { "epoch": 1.88, "learning_rate": 1.8656804559010247e-05, "loss": 0.6192, "step": 169840 }, { "epoch": 1.88, "learning_rate": 1.8655881831871735e-05, "loss": 0.6322, "step": 169845 }, { "epoch": 1.88, "learning_rate": 1.8654959104733223e-05, "loss": 0.6229, "step": 169850 }, { "epoch": 1.88, "learning_rate": 1.865403637759471e-05, "loss": 0.6502, "step": 169855 }, { "epoch": 1.88, "learning_rate": 1.86531136504562e-05, "loss": 0.6525, "step": 169860 }, { "epoch": 1.88, "learning_rate": 1.8652190923317687e-05, "loss": 0.6055, "step": 169865 }, { "epoch": 1.88, "learning_rate": 1.865126819617917e-05, "loss": 0.5708, "step": 169870 }, { "epoch": 1.88, "learning_rate": 1.865034546904066e-05, "loss": 0.6306, "step": 169875 }, { "epoch": 1.88, "learning_rate": 1.8649422741902147e-05, "loss": 0.5797, "step": 169880 }, { "epoch": 1.88, "learning_rate": 1.8648500014763635e-05, "loss": 0.6351, "step": 169885 }, { "epoch": 1.88, "learning_rate": 1.8647577287625122e-05, "loss": 0.6312, "step": 169890 }, { "epoch": 1.88, "learning_rate": 1.864665456048661e-05, "loss": 0.5908, "step": 169895 }, { "epoch": 1.88, "learning_rate": 1.8645731833348098e-05, "loss": 0.6541, "step": 169900 }, { "epoch": 1.88, "learning_rate": 1.8644809106209586e-05, "loss": 0.6329, "step": 169905 }, { "epoch": 1.88, "learning_rate": 1.8643886379071074e-05, "loss": 0.6245, "step": 169910 }, { "epoch": 1.88, "learning_rate": 1.8642963651932562e-05, "loss": 0.6515, "step": 169915 }, { "epoch": 1.88, "learning_rate": 1.864204092479405e-05, "loss": 0.6245, "step": 169920 }, { "epoch": 1.88, "learning_rate": 1.8641118197655534e-05, "loss": 0.5545, "step": 169925 }, { "epoch": 1.88, "learning_rate": 1.8640195470517025e-05, "loss": 0.6393, "step": 169930 }, { "epoch": 1.88, "learning_rate": 1.863927274337851e-05, "loss": 0.6327, "step": 169935 }, { "epoch": 1.88, "learning_rate": 1.863835001624e-05, "loss": 0.623, "step": 169940 }, { "epoch": 1.88, "learning_rate": 1.8637427289101485e-05, "loss": 0.6087, "step": 169945 }, { "epoch": 1.88, "learning_rate": 1.8636504561962973e-05, "loss": 0.6362, "step": 169950 }, { "epoch": 1.88, "learning_rate": 1.863558183482446e-05, "loss": 0.6059, "step": 169955 }, { "epoch": 1.88, "learning_rate": 1.863465910768595e-05, "loss": 0.5889, "step": 169960 }, { "epoch": 1.88, "learning_rate": 1.8633736380547437e-05, "loss": 0.6053, "step": 169965 }, { "epoch": 1.88, "learning_rate": 1.8632813653408925e-05, "loss": 0.6155, "step": 169970 }, { "epoch": 1.88, "learning_rate": 1.8631890926270412e-05, "loss": 0.5804, "step": 169975 }, { "epoch": 1.88, "learning_rate": 1.8630968199131897e-05, "loss": 0.611, "step": 169980 }, { "epoch": 1.88, "learning_rate": 1.8630045471993388e-05, "loss": 0.5889, "step": 169985 }, { "epoch": 1.88, "learning_rate": 1.8629122744854873e-05, "loss": 0.5854, "step": 169990 }, { "epoch": 1.88, "learning_rate": 1.8628200017716364e-05, "loss": 0.6213, "step": 169995 }, { "epoch": 1.88, "learning_rate": 1.862727729057785e-05, "loss": 0.6462, "step": 170000 }, { "epoch": 1.88, "eval_loss": 0.5901840925216675, "eval_runtime": 69.5415, "eval_samples_per_second": 28.76, "eval_steps_per_second": 14.38, "step": 170000 }, { "epoch": 1.88, "learning_rate": 1.862635456343934e-05, "loss": 0.5858, "step": 170005 }, { "epoch": 1.88, "learning_rate": 1.8625431836300824e-05, "loss": 0.5674, "step": 170010 }, { "epoch": 1.88, "learning_rate": 1.8624509109162315e-05, "loss": 0.5914, "step": 170015 }, { "epoch": 1.88, "learning_rate": 1.86235863820238e-05, "loss": 0.6358, "step": 170020 }, { "epoch": 1.88, "learning_rate": 1.8622663654885288e-05, "loss": 0.587, "step": 170025 }, { "epoch": 1.88, "learning_rate": 1.8621740927746775e-05, "loss": 0.6508, "step": 170030 }, { "epoch": 1.88, "learning_rate": 1.862081820060826e-05, "loss": 0.6189, "step": 170035 }, { "epoch": 1.88, "learning_rate": 1.861989547346975e-05, "loss": 0.7285, "step": 170040 }, { "epoch": 1.88, "learning_rate": 1.8618972746331236e-05, "loss": 0.6581, "step": 170045 }, { "epoch": 1.88, "learning_rate": 1.8618050019192727e-05, "loss": 0.6201, "step": 170050 }, { "epoch": 1.88, "learning_rate": 1.861712729205421e-05, "loss": 0.5916, "step": 170055 }, { "epoch": 1.88, "learning_rate": 1.8616204564915703e-05, "loss": 0.6492, "step": 170060 }, { "epoch": 1.88, "learning_rate": 1.8615281837777187e-05, "loss": 0.5529, "step": 170065 }, { "epoch": 1.88, "learning_rate": 1.8614359110638678e-05, "loss": 0.619, "step": 170070 }, { "epoch": 1.88, "learning_rate": 1.8613436383500163e-05, "loss": 0.6561, "step": 170075 }, { "epoch": 1.88, "learning_rate": 1.861251365636165e-05, "loss": 0.6498, "step": 170080 }, { "epoch": 1.88, "learning_rate": 1.861159092922314e-05, "loss": 0.5708, "step": 170085 }, { "epoch": 1.88, "learning_rate": 1.8610668202084626e-05, "loss": 0.6341, "step": 170090 }, { "epoch": 1.88, "learning_rate": 1.8609745474946114e-05, "loss": 0.5906, "step": 170095 }, { "epoch": 1.88, "learning_rate": 1.86088227478076e-05, "loss": 0.6015, "step": 170100 }, { "epoch": 1.88, "learning_rate": 1.860790002066909e-05, "loss": 0.5879, "step": 170105 }, { "epoch": 1.88, "learning_rate": 1.8606977293530574e-05, "loss": 0.6436, "step": 170110 }, { "epoch": 1.88, "learning_rate": 1.8606054566392065e-05, "loss": 0.5912, "step": 170115 }, { "epoch": 1.88, "learning_rate": 1.860513183925355e-05, "loss": 0.6909, "step": 170120 }, { "epoch": 1.88, "learning_rate": 1.860420911211504e-05, "loss": 0.5916, "step": 170125 }, { "epoch": 1.88, "learning_rate": 1.8603286384976526e-05, "loss": 0.5624, "step": 170130 }, { "epoch": 1.88, "learning_rate": 1.8602363657838013e-05, "loss": 0.6122, "step": 170135 }, { "epoch": 1.88, "learning_rate": 1.86014409306995e-05, "loss": 0.6462, "step": 170140 }, { "epoch": 1.88, "learning_rate": 1.860051820356099e-05, "loss": 0.5694, "step": 170145 }, { "epoch": 1.88, "learning_rate": 1.8599595476422477e-05, "loss": 0.6092, "step": 170150 }, { "epoch": 1.88, "learning_rate": 1.8598672749283965e-05, "loss": 0.5788, "step": 170155 }, { "epoch": 1.88, "learning_rate": 1.8597750022145453e-05, "loss": 0.5925, "step": 170160 }, { "epoch": 1.88, "learning_rate": 1.859682729500694e-05, "loss": 0.6319, "step": 170165 }, { "epoch": 1.88, "learning_rate": 1.859590456786843e-05, "loss": 0.6499, "step": 170170 }, { "epoch": 1.88, "learning_rate": 1.8594981840729913e-05, "loss": 0.6555, "step": 170175 }, { "epoch": 1.88, "learning_rate": 1.8594059113591404e-05, "loss": 0.6148, "step": 170180 }, { "epoch": 1.88, "learning_rate": 1.859313638645289e-05, "loss": 0.7054, "step": 170185 }, { "epoch": 1.88, "learning_rate": 1.8592213659314376e-05, "loss": 0.5867, "step": 170190 }, { "epoch": 1.88, "learning_rate": 1.8591290932175864e-05, "loss": 0.5848, "step": 170195 }, { "epoch": 1.88, "learning_rate": 1.8590368205037352e-05, "loss": 0.561, "step": 170200 }, { "epoch": 1.88, "learning_rate": 1.858944547789884e-05, "loss": 0.609, "step": 170205 }, { "epoch": 1.88, "learning_rate": 1.8588522750760328e-05, "loss": 0.6153, "step": 170210 }, { "epoch": 1.88, "learning_rate": 1.8587600023621816e-05, "loss": 0.5902, "step": 170215 }, { "epoch": 1.88, "learning_rate": 1.8586677296483304e-05, "loss": 0.6124, "step": 170220 }, { "epoch": 1.88, "learning_rate": 1.858575456934479e-05, "loss": 0.5726, "step": 170225 }, { "epoch": 1.88, "learning_rate": 1.858483184220628e-05, "loss": 0.6474, "step": 170230 }, { "epoch": 1.88, "learning_rate": 1.8583909115067767e-05, "loss": 0.6474, "step": 170235 }, { "epoch": 1.89, "learning_rate": 1.8582986387929255e-05, "loss": 0.6379, "step": 170240 }, { "epoch": 1.89, "learning_rate": 1.8582063660790743e-05, "loss": 0.5813, "step": 170245 }, { "epoch": 1.89, "learning_rate": 1.8581140933652227e-05, "loss": 0.5894, "step": 170250 }, { "epoch": 1.89, "learning_rate": 1.8580218206513715e-05, "loss": 0.6064, "step": 170255 }, { "epoch": 1.89, "learning_rate": 1.8579295479375203e-05, "loss": 0.6316, "step": 170260 }, { "epoch": 1.89, "learning_rate": 1.857837275223669e-05, "loss": 0.6419, "step": 170265 }, { "epoch": 1.89, "learning_rate": 1.857745002509818e-05, "loss": 0.6018, "step": 170270 }, { "epoch": 1.89, "learning_rate": 1.8576527297959666e-05, "loss": 0.6245, "step": 170275 }, { "epoch": 1.89, "learning_rate": 1.8575604570821154e-05, "loss": 0.6504, "step": 170280 }, { "epoch": 1.89, "learning_rate": 1.8574681843682642e-05, "loss": 0.6234, "step": 170285 }, { "epoch": 1.89, "learning_rate": 1.857375911654413e-05, "loss": 0.6537, "step": 170290 }, { "epoch": 1.89, "learning_rate": 1.8572836389405618e-05, "loss": 0.6088, "step": 170295 }, { "epoch": 1.89, "learning_rate": 1.8571913662267106e-05, "loss": 0.6346, "step": 170300 }, { "epoch": 1.89, "learning_rate": 1.8570990935128594e-05, "loss": 0.6126, "step": 170305 }, { "epoch": 1.89, "learning_rate": 1.8570068207990078e-05, "loss": 0.6107, "step": 170310 }, { "epoch": 1.89, "learning_rate": 1.856914548085157e-05, "loss": 0.5992, "step": 170315 }, { "epoch": 1.89, "learning_rate": 1.8568222753713054e-05, "loss": 0.5777, "step": 170320 }, { "epoch": 1.89, "learning_rate": 1.856730002657454e-05, "loss": 0.6322, "step": 170325 }, { "epoch": 1.89, "learning_rate": 1.856637729943603e-05, "loss": 0.5723, "step": 170330 }, { "epoch": 1.89, "learning_rate": 1.8565454572297517e-05, "loss": 0.6254, "step": 170335 }, { "epoch": 1.89, "learning_rate": 1.8564531845159005e-05, "loss": 0.6189, "step": 170340 }, { "epoch": 1.89, "learning_rate": 1.8563609118020493e-05, "loss": 0.6542, "step": 170345 }, { "epoch": 1.89, "learning_rate": 1.856268639088198e-05, "loss": 0.6284, "step": 170350 }, { "epoch": 1.89, "learning_rate": 1.856176366374347e-05, "loss": 0.6362, "step": 170355 }, { "epoch": 1.89, "learning_rate": 1.8560840936604957e-05, "loss": 0.5749, "step": 170360 }, { "epoch": 1.89, "learning_rate": 1.855991820946644e-05, "loss": 0.6676, "step": 170365 }, { "epoch": 1.89, "learning_rate": 1.8558995482327932e-05, "loss": 0.5959, "step": 170370 }, { "epoch": 1.89, "learning_rate": 1.8558072755189417e-05, "loss": 0.6395, "step": 170375 }, { "epoch": 1.89, "learning_rate": 1.8557150028050908e-05, "loss": 0.5788, "step": 170380 }, { "epoch": 1.89, "learning_rate": 1.8556227300912392e-05, "loss": 0.6357, "step": 170385 }, { "epoch": 1.89, "learning_rate": 1.8555304573773884e-05, "loss": 0.6526, "step": 170390 }, { "epoch": 1.89, "learning_rate": 1.8554381846635368e-05, "loss": 0.5753, "step": 170395 }, { "epoch": 1.89, "learning_rate": 1.8553459119496856e-05, "loss": 0.7053, "step": 170400 }, { "epoch": 1.89, "learning_rate": 1.8552536392358344e-05, "loss": 0.6157, "step": 170405 }, { "epoch": 1.89, "learning_rate": 1.855161366521983e-05, "loss": 0.5968, "step": 170410 }, { "epoch": 1.89, "learning_rate": 1.855069093808132e-05, "loss": 0.6111, "step": 170415 }, { "epoch": 1.89, "learning_rate": 1.8549768210942804e-05, "loss": 0.5947, "step": 170420 }, { "epoch": 1.89, "learning_rate": 1.8548845483804295e-05, "loss": 0.6159, "step": 170425 }, { "epoch": 1.89, "learning_rate": 1.854792275666578e-05, "loss": 0.5808, "step": 170430 }, { "epoch": 1.89, "learning_rate": 1.854700002952727e-05, "loss": 0.6595, "step": 170435 }, { "epoch": 1.89, "learning_rate": 1.8546077302388755e-05, "loss": 0.5965, "step": 170440 }, { "epoch": 1.89, "learning_rate": 1.8545154575250247e-05, "loss": 0.6088, "step": 170445 }, { "epoch": 1.89, "learning_rate": 1.854423184811173e-05, "loss": 0.6067, "step": 170450 }, { "epoch": 1.89, "learning_rate": 1.8543309120973222e-05, "loss": 0.6038, "step": 170455 }, { "epoch": 1.89, "learning_rate": 1.8542386393834707e-05, "loss": 0.6436, "step": 170460 }, { "epoch": 1.89, "learning_rate": 1.8541463666696195e-05, "loss": 0.6685, "step": 170465 }, { "epoch": 1.89, "learning_rate": 1.8540540939557682e-05, "loss": 0.6089, "step": 170470 }, { "epoch": 1.89, "learning_rate": 1.853961821241917e-05, "loss": 0.6348, "step": 170475 }, { "epoch": 1.89, "learning_rate": 1.8538695485280658e-05, "loss": 0.6039, "step": 170480 }, { "epoch": 1.89, "learning_rate": 1.8537772758142143e-05, "loss": 0.6079, "step": 170485 }, { "epoch": 1.89, "learning_rate": 1.8536850031003634e-05, "loss": 0.5974, "step": 170490 }, { "epoch": 1.89, "learning_rate": 1.8535927303865118e-05, "loss": 0.5963, "step": 170495 }, { "epoch": 1.89, "learning_rate": 1.853500457672661e-05, "loss": 0.6178, "step": 170500 }, { "epoch": 1.89, "learning_rate": 1.8534081849588094e-05, "loss": 0.602, "step": 170505 }, { "epoch": 1.89, "learning_rate": 1.8533159122449585e-05, "loss": 0.637, "step": 170510 }, { "epoch": 1.89, "learning_rate": 1.853223639531107e-05, "loss": 0.5866, "step": 170515 }, { "epoch": 1.89, "learning_rate": 1.8531313668172558e-05, "loss": 0.6004, "step": 170520 }, { "epoch": 1.89, "learning_rate": 1.8530390941034045e-05, "loss": 0.5255, "step": 170525 }, { "epoch": 1.89, "learning_rate": 1.8529468213895533e-05, "loss": 0.6397, "step": 170530 }, { "epoch": 1.89, "learning_rate": 1.852854548675702e-05, "loss": 0.6225, "step": 170535 }, { "epoch": 1.89, "learning_rate": 1.852762275961851e-05, "loss": 0.607, "step": 170540 }, { "epoch": 1.89, "learning_rate": 1.8526700032479997e-05, "loss": 0.6558, "step": 170545 }, { "epoch": 1.89, "learning_rate": 1.852577730534148e-05, "loss": 0.6159, "step": 170550 }, { "epoch": 1.89, "learning_rate": 1.8524854578202972e-05, "loss": 0.5946, "step": 170555 }, { "epoch": 1.89, "learning_rate": 1.8523931851064457e-05, "loss": 0.5884, "step": 170560 }, { "epoch": 1.89, "learning_rate": 1.8523009123925948e-05, "loss": 0.5992, "step": 170565 }, { "epoch": 1.89, "learning_rate": 1.8522086396787433e-05, "loss": 0.5985, "step": 170570 }, { "epoch": 1.89, "learning_rate": 1.852116366964892e-05, "loss": 0.6374, "step": 170575 }, { "epoch": 1.89, "learning_rate": 1.8520240942510408e-05, "loss": 0.6221, "step": 170580 }, { "epoch": 1.89, "learning_rate": 1.8519318215371896e-05, "loss": 0.6347, "step": 170585 }, { "epoch": 1.89, "learning_rate": 1.8518395488233384e-05, "loss": 0.6235, "step": 170590 }, { "epoch": 1.89, "learning_rate": 1.8517472761094872e-05, "loss": 0.6313, "step": 170595 }, { "epoch": 1.89, "learning_rate": 1.851655003395636e-05, "loss": 0.5837, "step": 170600 }, { "epoch": 1.89, "learning_rate": 1.8515627306817848e-05, "loss": 0.6227, "step": 170605 }, { "epoch": 1.89, "learning_rate": 1.8514704579679335e-05, "loss": 0.622, "step": 170610 }, { "epoch": 1.89, "learning_rate": 1.8513781852540823e-05, "loss": 0.6097, "step": 170615 }, { "epoch": 1.89, "learning_rate": 1.851285912540231e-05, "loss": 0.641, "step": 170620 }, { "epoch": 1.89, "learning_rate": 1.85119363982638e-05, "loss": 0.5814, "step": 170625 }, { "epoch": 1.89, "learning_rate": 1.8511013671125287e-05, "loss": 0.646, "step": 170630 }, { "epoch": 1.89, "learning_rate": 1.851009094398677e-05, "loss": 0.6324, "step": 170635 }, { "epoch": 1.89, "learning_rate": 1.850916821684826e-05, "loss": 0.6235, "step": 170640 }, { "epoch": 1.89, "learning_rate": 1.8508245489709747e-05, "loss": 0.5933, "step": 170645 }, { "epoch": 1.89, "learning_rate": 1.8507322762571235e-05, "loss": 0.6057, "step": 170650 }, { "epoch": 1.89, "learning_rate": 1.8506400035432723e-05, "loss": 0.5854, "step": 170655 }, { "epoch": 1.89, "learning_rate": 1.850547730829421e-05, "loss": 0.6187, "step": 170660 }, { "epoch": 1.89, "learning_rate": 1.85045545811557e-05, "loss": 0.5819, "step": 170665 }, { "epoch": 1.89, "learning_rate": 1.8503631854017186e-05, "loss": 0.6118, "step": 170670 }, { "epoch": 1.89, "learning_rate": 1.8502709126878674e-05, "loss": 0.6095, "step": 170675 }, { "epoch": 1.89, "learning_rate": 1.8501786399740162e-05, "loss": 0.6663, "step": 170680 }, { "epoch": 1.89, "learning_rate": 1.850086367260165e-05, "loss": 0.6191, "step": 170685 }, { "epoch": 1.89, "learning_rate": 1.8499940945463138e-05, "loss": 0.6284, "step": 170690 }, { "epoch": 1.89, "learning_rate": 1.8499018218324622e-05, "loss": 0.6654, "step": 170695 }, { "epoch": 1.89, "learning_rate": 1.8498095491186113e-05, "loss": 0.57, "step": 170700 }, { "epoch": 1.89, "learning_rate": 1.8497172764047598e-05, "loss": 0.5893, "step": 170705 }, { "epoch": 1.89, "learning_rate": 1.8496250036909086e-05, "loss": 0.5809, "step": 170710 }, { "epoch": 1.89, "learning_rate": 1.8495327309770573e-05, "loss": 0.5769, "step": 170715 }, { "epoch": 1.89, "learning_rate": 1.849440458263206e-05, "loss": 0.6097, "step": 170720 }, { "epoch": 1.89, "learning_rate": 1.849348185549355e-05, "loss": 0.6572, "step": 170725 }, { "epoch": 1.89, "learning_rate": 1.8492559128355037e-05, "loss": 0.6074, "step": 170730 }, { "epoch": 1.89, "learning_rate": 1.8491636401216525e-05, "loss": 0.5988, "step": 170735 }, { "epoch": 1.89, "learning_rate": 1.8490713674078013e-05, "loss": 0.6055, "step": 170740 }, { "epoch": 1.89, "learning_rate": 1.84897909469395e-05, "loss": 0.6688, "step": 170745 }, { "epoch": 1.89, "learning_rate": 1.8488868219800985e-05, "loss": 0.6469, "step": 170750 }, { "epoch": 1.89, "learning_rate": 1.8487945492662476e-05, "loss": 0.6296, "step": 170755 }, { "epoch": 1.89, "learning_rate": 1.848702276552396e-05, "loss": 0.5913, "step": 170760 }, { "epoch": 1.89, "learning_rate": 1.8486100038385452e-05, "loss": 0.5712, "step": 170765 }, { "epoch": 1.89, "learning_rate": 1.8485177311246936e-05, "loss": 0.6192, "step": 170770 }, { "epoch": 1.89, "learning_rate": 1.8484254584108428e-05, "loss": 0.6173, "step": 170775 }, { "epoch": 1.89, "learning_rate": 1.8483331856969912e-05, "loss": 0.5918, "step": 170780 }, { "epoch": 1.89, "learning_rate": 1.84824091298314e-05, "loss": 0.6528, "step": 170785 }, { "epoch": 1.89, "learning_rate": 1.8481486402692888e-05, "loss": 0.5687, "step": 170790 }, { "epoch": 1.89, "learning_rate": 1.8480563675554376e-05, "loss": 0.6118, "step": 170795 }, { "epoch": 1.89, "learning_rate": 1.8479640948415863e-05, "loss": 0.6073, "step": 170800 }, { "epoch": 1.89, "learning_rate": 1.847871822127735e-05, "loss": 0.6026, "step": 170805 }, { "epoch": 1.89, "learning_rate": 1.847779549413884e-05, "loss": 0.6137, "step": 170810 }, { "epoch": 1.89, "learning_rate": 1.8476872767000324e-05, "loss": 0.601, "step": 170815 }, { "epoch": 1.89, "learning_rate": 1.8475950039861815e-05, "loss": 0.6134, "step": 170820 }, { "epoch": 1.89, "learning_rate": 1.84750273127233e-05, "loss": 0.6421, "step": 170825 }, { "epoch": 1.89, "learning_rate": 1.847410458558479e-05, "loss": 0.5853, "step": 170830 }, { "epoch": 1.89, "learning_rate": 1.8473181858446275e-05, "loss": 0.6245, "step": 170835 }, { "epoch": 1.89, "learning_rate": 1.8472259131307766e-05, "loss": 0.6336, "step": 170840 }, { "epoch": 1.89, "learning_rate": 1.847133640416925e-05, "loss": 0.6321, "step": 170845 }, { "epoch": 1.89, "learning_rate": 1.847041367703074e-05, "loss": 0.5979, "step": 170850 }, { "epoch": 1.89, "learning_rate": 1.8469490949892226e-05, "loss": 0.6197, "step": 170855 }, { "epoch": 1.89, "learning_rate": 1.8468568222753714e-05, "loss": 0.5813, "step": 170860 }, { "epoch": 1.89, "learning_rate": 1.8467645495615202e-05, "loss": 0.6078, "step": 170865 }, { "epoch": 1.89, "learning_rate": 1.8466722768476687e-05, "loss": 0.6044, "step": 170870 }, { "epoch": 1.89, "learning_rate": 1.8465800041338178e-05, "loss": 0.6513, "step": 170875 }, { "epoch": 1.89, "learning_rate": 1.8464877314199662e-05, "loss": 0.5932, "step": 170880 }, { "epoch": 1.89, "learning_rate": 1.8463954587061154e-05, "loss": 0.5649, "step": 170885 }, { "epoch": 1.89, "learning_rate": 1.8463031859922638e-05, "loss": 0.5371, "step": 170890 }, { "epoch": 1.89, "learning_rate": 1.846210913278413e-05, "loss": 0.5752, "step": 170895 }, { "epoch": 1.89, "learning_rate": 1.8461186405645614e-05, "loss": 0.6202, "step": 170900 }, { "epoch": 1.89, "learning_rate": 1.84602636785071e-05, "loss": 0.5789, "step": 170905 }, { "epoch": 1.89, "learning_rate": 1.845934095136859e-05, "loss": 0.6053, "step": 170910 }, { "epoch": 1.89, "learning_rate": 1.8458418224230077e-05, "loss": 0.6211, "step": 170915 }, { "epoch": 1.89, "learning_rate": 1.8457495497091565e-05, "loss": 0.5997, "step": 170920 }, { "epoch": 1.89, "learning_rate": 1.8456572769953053e-05, "loss": 0.5856, "step": 170925 }, { "epoch": 1.89, "learning_rate": 1.845565004281454e-05, "loss": 0.5929, "step": 170930 }, { "epoch": 1.89, "learning_rate": 1.8454727315676025e-05, "loss": 0.6044, "step": 170935 }, { "epoch": 1.89, "learning_rate": 1.8453804588537516e-05, "loss": 0.5772, "step": 170940 }, { "epoch": 1.89, "learning_rate": 1.8452881861399e-05, "loss": 0.653, "step": 170945 }, { "epoch": 1.89, "learning_rate": 1.8451959134260492e-05, "loss": 0.6528, "step": 170950 }, { "epoch": 1.89, "learning_rate": 1.8451036407121977e-05, "loss": 0.661, "step": 170955 }, { "epoch": 1.89, "learning_rate": 1.8450113679983468e-05, "loss": 0.6491, "step": 170960 }, { "epoch": 1.89, "learning_rate": 1.8449190952844952e-05, "loss": 0.6707, "step": 170965 }, { "epoch": 1.89, "learning_rate": 1.844826822570644e-05, "loss": 0.6557, "step": 170970 }, { "epoch": 1.89, "learning_rate": 1.8447345498567928e-05, "loss": 0.6199, "step": 170975 }, { "epoch": 1.89, "learning_rate": 1.8446422771429416e-05, "loss": 0.5978, "step": 170980 }, { "epoch": 1.89, "learning_rate": 1.8445500044290904e-05, "loss": 0.6338, "step": 170985 }, { "epoch": 1.89, "learning_rate": 1.844457731715239e-05, "loss": 0.6704, "step": 170990 }, { "epoch": 1.89, "learning_rate": 1.844365459001388e-05, "loss": 0.6048, "step": 170995 }, { "epoch": 1.89, "learning_rate": 1.8442731862875367e-05, "loss": 0.5811, "step": 171000 }, { "epoch": 1.89, "eval_loss": 0.6029744744300842, "eval_runtime": 69.4676, "eval_samples_per_second": 28.79, "eval_steps_per_second": 14.395, "step": 171000 }, { "epoch": 1.89, "learning_rate": 1.8441809135736855e-05, "loss": 0.6192, "step": 171005 }, { "epoch": 1.89, "learning_rate": 1.844088640859834e-05, "loss": 0.6329, "step": 171010 }, { "epoch": 1.89, "learning_rate": 1.843996368145983e-05, "loss": 0.6372, "step": 171015 }, { "epoch": 1.89, "learning_rate": 1.8439040954321315e-05, "loss": 0.6133, "step": 171020 }, { "epoch": 1.89, "learning_rate": 1.8438118227182803e-05, "loss": 0.6443, "step": 171025 }, { "epoch": 1.89, "learning_rate": 1.843719550004429e-05, "loss": 0.6049, "step": 171030 }, { "epoch": 1.89, "learning_rate": 1.843627277290578e-05, "loss": 0.5806, "step": 171035 }, { "epoch": 1.89, "learning_rate": 1.8435350045767267e-05, "loss": 0.5747, "step": 171040 }, { "epoch": 1.89, "learning_rate": 1.8434427318628755e-05, "loss": 0.6116, "step": 171045 }, { "epoch": 1.89, "learning_rate": 1.8433504591490242e-05, "loss": 0.6105, "step": 171050 }, { "epoch": 1.89, "learning_rate": 1.843258186435173e-05, "loss": 0.5751, "step": 171055 }, { "epoch": 1.89, "learning_rate": 1.8431659137213218e-05, "loss": 0.6157, "step": 171060 }, { "epoch": 1.89, "learning_rate": 1.8430736410074706e-05, "loss": 0.5985, "step": 171065 }, { "epoch": 1.89, "learning_rate": 1.8429813682936194e-05, "loss": 0.5991, "step": 171070 }, { "epoch": 1.89, "learning_rate": 1.842889095579768e-05, "loss": 0.6462, "step": 171075 }, { "epoch": 1.89, "learning_rate": 1.8427968228659166e-05, "loss": 0.586, "step": 171080 }, { "epoch": 1.89, "learning_rate": 1.8427045501520654e-05, "loss": 0.569, "step": 171085 }, { "epoch": 1.89, "learning_rate": 1.8426122774382142e-05, "loss": 0.6026, "step": 171090 }, { "epoch": 1.89, "learning_rate": 1.842520004724363e-05, "loss": 0.5713, "step": 171095 }, { "epoch": 1.89, "learning_rate": 1.8424277320105117e-05, "loss": 0.6438, "step": 171100 }, { "epoch": 1.89, "learning_rate": 1.8423354592966605e-05, "loss": 0.6045, "step": 171105 }, { "epoch": 1.89, "learning_rate": 1.8422431865828093e-05, "loss": 0.6766, "step": 171110 }, { "epoch": 1.89, "learning_rate": 1.842150913868958e-05, "loss": 0.613, "step": 171115 }, { "epoch": 1.89, "learning_rate": 1.842058641155107e-05, "loss": 0.5735, "step": 171120 }, { "epoch": 1.89, "learning_rate": 1.8419663684412557e-05, "loss": 0.6217, "step": 171125 }, { "epoch": 1.89, "learning_rate": 1.8418740957274045e-05, "loss": 0.6306, "step": 171130 }, { "epoch": 1.89, "learning_rate": 1.841781823013553e-05, "loss": 0.6154, "step": 171135 }, { "epoch": 1.89, "learning_rate": 1.841689550299702e-05, "loss": 0.6423, "step": 171140 }, { "epoch": 1.9, "learning_rate": 1.8415972775858505e-05, "loss": 0.5833, "step": 171145 }, { "epoch": 1.9, "learning_rate": 1.8415050048719996e-05, "loss": 0.6673, "step": 171150 }, { "epoch": 1.9, "learning_rate": 1.841412732158148e-05, "loss": 0.5861, "step": 171155 }, { "epoch": 1.9, "learning_rate": 1.8413204594442968e-05, "loss": 0.5819, "step": 171160 }, { "epoch": 1.9, "learning_rate": 1.8412281867304456e-05, "loss": 0.6464, "step": 171165 }, { "epoch": 1.9, "learning_rate": 1.8411359140165944e-05, "loss": 0.5762, "step": 171170 }, { "epoch": 1.9, "learning_rate": 1.8410436413027432e-05, "loss": 0.6279, "step": 171175 }, { "epoch": 1.9, "learning_rate": 1.840951368588892e-05, "loss": 0.5679, "step": 171180 }, { "epoch": 1.9, "learning_rate": 1.8408590958750407e-05, "loss": 0.599, "step": 171185 }, { "epoch": 1.9, "learning_rate": 1.8407668231611895e-05, "loss": 0.5878, "step": 171190 }, { "epoch": 1.9, "learning_rate": 1.8406745504473383e-05, "loss": 0.6532, "step": 171195 }, { "epoch": 1.9, "learning_rate": 1.8405822777334868e-05, "loss": 0.6141, "step": 171200 }, { "epoch": 1.9, "learning_rate": 1.840490005019636e-05, "loss": 0.6491, "step": 171205 }, { "epoch": 1.9, "learning_rate": 1.8403977323057843e-05, "loss": 0.6036, "step": 171210 }, { "epoch": 1.9, "learning_rate": 1.8403054595919335e-05, "loss": 0.623, "step": 171215 }, { "epoch": 1.9, "learning_rate": 1.840213186878082e-05, "loss": 0.6575, "step": 171220 }, { "epoch": 1.9, "learning_rate": 1.840120914164231e-05, "loss": 0.6062, "step": 171225 }, { "epoch": 1.9, "learning_rate": 1.8400286414503795e-05, "loss": 0.6427, "step": 171230 }, { "epoch": 1.9, "learning_rate": 1.8399363687365283e-05, "loss": 0.5588, "step": 171235 }, { "epoch": 1.9, "learning_rate": 1.839844096022677e-05, "loss": 0.6643, "step": 171240 }, { "epoch": 1.9, "learning_rate": 1.8397518233088258e-05, "loss": 0.6241, "step": 171245 }, { "epoch": 1.9, "learning_rate": 1.8396595505949746e-05, "loss": 0.6236, "step": 171250 }, { "epoch": 1.9, "learning_rate": 1.839567277881123e-05, "loss": 0.602, "step": 171255 }, { "epoch": 1.9, "learning_rate": 1.8394750051672722e-05, "loss": 0.5767, "step": 171260 }, { "epoch": 1.9, "learning_rate": 1.8393827324534206e-05, "loss": 0.6702, "step": 171265 }, { "epoch": 1.9, "learning_rate": 1.8392904597395698e-05, "loss": 0.5618, "step": 171270 }, { "epoch": 1.9, "learning_rate": 1.8391981870257182e-05, "loss": 0.6421, "step": 171275 }, { "epoch": 1.9, "learning_rate": 1.8391059143118673e-05, "loss": 0.5957, "step": 171280 }, { "epoch": 1.9, "learning_rate": 1.8390136415980158e-05, "loss": 0.5912, "step": 171285 }, { "epoch": 1.9, "learning_rate": 1.8389213688841646e-05, "loss": 0.6031, "step": 171290 }, { "epoch": 1.9, "learning_rate": 1.8388290961703133e-05, "loss": 0.6398, "step": 171295 }, { "epoch": 1.9, "learning_rate": 1.838736823456462e-05, "loss": 0.6039, "step": 171300 }, { "epoch": 1.9, "learning_rate": 1.838644550742611e-05, "loss": 0.6042, "step": 171305 }, { "epoch": 1.9, "learning_rate": 1.8385522780287594e-05, "loss": 0.612, "step": 171310 }, { "epoch": 1.9, "learning_rate": 1.8384600053149085e-05, "loss": 0.6055, "step": 171315 }, { "epoch": 1.9, "learning_rate": 1.838367732601057e-05, "loss": 0.6311, "step": 171320 }, { "epoch": 1.9, "learning_rate": 1.838275459887206e-05, "loss": 0.6583, "step": 171325 }, { "epoch": 1.9, "learning_rate": 1.8381831871733545e-05, "loss": 0.5955, "step": 171330 }, { "epoch": 1.9, "learning_rate": 1.8380909144595036e-05, "loss": 0.6316, "step": 171335 }, { "epoch": 1.9, "learning_rate": 1.837998641745652e-05, "loss": 0.6104, "step": 171340 }, { "epoch": 1.9, "learning_rate": 1.8379063690318012e-05, "loss": 0.6363, "step": 171345 }, { "epoch": 1.9, "learning_rate": 1.8378140963179496e-05, "loss": 0.5867, "step": 171350 }, { "epoch": 1.9, "learning_rate": 1.8377218236040984e-05, "loss": 0.6315, "step": 171355 }, { "epoch": 1.9, "learning_rate": 1.8376295508902472e-05, "loss": 0.6163, "step": 171360 }, { "epoch": 1.9, "learning_rate": 1.837537278176396e-05, "loss": 0.5783, "step": 171365 }, { "epoch": 1.9, "learning_rate": 1.8374450054625448e-05, "loss": 0.6562, "step": 171370 }, { "epoch": 1.9, "learning_rate": 1.8373527327486936e-05, "loss": 0.6445, "step": 171375 }, { "epoch": 1.9, "learning_rate": 1.8372604600348423e-05, "loss": 0.5759, "step": 171380 }, { "epoch": 1.9, "learning_rate": 1.8371681873209908e-05, "loss": 0.5939, "step": 171385 }, { "epoch": 1.9, "learning_rate": 1.83707591460714e-05, "loss": 0.6318, "step": 171390 }, { "epoch": 1.9, "learning_rate": 1.8369836418932884e-05, "loss": 0.6073, "step": 171395 }, { "epoch": 1.9, "learning_rate": 1.8368913691794375e-05, "loss": 0.5995, "step": 171400 }, { "epoch": 1.9, "learning_rate": 1.836799096465586e-05, "loss": 0.6386, "step": 171405 }, { "epoch": 1.9, "learning_rate": 1.8367068237517347e-05, "loss": 0.6497, "step": 171410 }, { "epoch": 1.9, "learning_rate": 1.8366145510378835e-05, "loss": 0.6195, "step": 171415 }, { "epoch": 1.9, "learning_rate": 1.8365222783240323e-05, "loss": 0.6281, "step": 171420 }, { "epoch": 1.9, "learning_rate": 1.836430005610181e-05, "loss": 0.611, "step": 171425 }, { "epoch": 1.9, "learning_rate": 1.83633773289633e-05, "loss": 0.573, "step": 171430 }, { "epoch": 1.9, "learning_rate": 1.8362454601824786e-05, "loss": 0.6176, "step": 171435 }, { "epoch": 1.9, "learning_rate": 1.8361531874686274e-05, "loss": 0.6586, "step": 171440 }, { "epoch": 1.9, "learning_rate": 1.8360609147547762e-05, "loss": 0.6039, "step": 171445 }, { "epoch": 1.9, "learning_rate": 1.835968642040925e-05, "loss": 0.6276, "step": 171450 }, { "epoch": 1.9, "learning_rate": 1.8358763693270738e-05, "loss": 0.6323, "step": 171455 }, { "epoch": 1.9, "learning_rate": 1.8357840966132226e-05, "loss": 0.5939, "step": 171460 }, { "epoch": 1.9, "learning_rate": 1.835691823899371e-05, "loss": 0.5986, "step": 171465 }, { "epoch": 1.9, "learning_rate": 1.8355995511855198e-05, "loss": 0.6633, "step": 171470 }, { "epoch": 1.9, "learning_rate": 1.8355072784716686e-05, "loss": 0.629, "step": 171475 }, { "epoch": 1.9, "learning_rate": 1.8354150057578174e-05, "loss": 0.6271, "step": 171480 }, { "epoch": 1.9, "learning_rate": 1.835322733043966e-05, "loss": 0.5976, "step": 171485 }, { "epoch": 1.9, "learning_rate": 1.835230460330115e-05, "loss": 0.704, "step": 171490 }, { "epoch": 1.9, "learning_rate": 1.8351381876162637e-05, "loss": 0.6375, "step": 171495 }, { "epoch": 1.9, "learning_rate": 1.8350459149024125e-05, "loss": 0.6818, "step": 171500 }, { "epoch": 1.9, "learning_rate": 1.8349536421885613e-05, "loss": 0.6413, "step": 171505 }, { "epoch": 1.9, "learning_rate": 1.83486136947471e-05, "loss": 0.6503, "step": 171510 }, { "epoch": 1.9, "learning_rate": 1.834769096760859e-05, "loss": 0.6441, "step": 171515 }, { "epoch": 1.9, "learning_rate": 1.8346768240470073e-05, "loss": 0.6436, "step": 171520 }, { "epoch": 1.9, "learning_rate": 1.8345845513331564e-05, "loss": 0.5937, "step": 171525 }, { "epoch": 1.9, "learning_rate": 1.834492278619305e-05, "loss": 0.6003, "step": 171530 }, { "epoch": 1.9, "learning_rate": 1.834400005905454e-05, "loss": 0.6345, "step": 171535 }, { "epoch": 1.9, "learning_rate": 1.8343077331916024e-05, "loss": 0.6483, "step": 171540 }, { "epoch": 1.9, "learning_rate": 1.8342154604777512e-05, "loss": 0.6053, "step": 171545 }, { "epoch": 1.9, "learning_rate": 1.8341231877639e-05, "loss": 0.6408, "step": 171550 }, { "epoch": 1.9, "learning_rate": 1.8340309150500488e-05, "loss": 0.6401, "step": 171555 }, { "epoch": 1.9, "learning_rate": 1.8339386423361976e-05, "loss": 0.5748, "step": 171560 }, { "epoch": 1.9, "learning_rate": 1.8338463696223464e-05, "loss": 0.5986, "step": 171565 }, { "epoch": 1.9, "learning_rate": 1.833754096908495e-05, "loss": 0.6275, "step": 171570 }, { "epoch": 1.9, "learning_rate": 1.833661824194644e-05, "loss": 0.6106, "step": 171575 }, { "epoch": 1.9, "learning_rate": 1.8335695514807927e-05, "loss": 0.6529, "step": 171580 }, { "epoch": 1.9, "learning_rate": 1.833477278766941e-05, "loss": 0.6281, "step": 171585 }, { "epoch": 1.9, "learning_rate": 1.8333850060530903e-05, "loss": 0.6036, "step": 171590 }, { "epoch": 1.9, "learning_rate": 1.8332927333392387e-05, "loss": 0.6008, "step": 171595 }, { "epoch": 1.9, "learning_rate": 1.833200460625388e-05, "loss": 0.6135, "step": 171600 }, { "epoch": 1.9, "learning_rate": 1.8331081879115363e-05, "loss": 0.6186, "step": 171605 }, { "epoch": 1.9, "learning_rate": 1.8330159151976854e-05, "loss": 0.6103, "step": 171610 }, { "epoch": 1.9, "learning_rate": 1.832923642483834e-05, "loss": 0.6177, "step": 171615 }, { "epoch": 1.9, "learning_rate": 1.8328313697699827e-05, "loss": 0.6853, "step": 171620 }, { "epoch": 1.9, "learning_rate": 1.8327390970561314e-05, "loss": 0.6138, "step": 171625 }, { "epoch": 1.9, "learning_rate": 1.8326468243422802e-05, "loss": 0.6451, "step": 171630 }, { "epoch": 1.9, "learning_rate": 1.832554551628429e-05, "loss": 0.5999, "step": 171635 }, { "epoch": 1.9, "learning_rate": 1.8324622789145775e-05, "loss": 0.6209, "step": 171640 }, { "epoch": 1.9, "learning_rate": 1.8323700062007266e-05, "loss": 0.6673, "step": 171645 }, { "epoch": 1.9, "learning_rate": 1.832277733486875e-05, "loss": 0.6043, "step": 171650 }, { "epoch": 1.9, "learning_rate": 1.832185460773024e-05, "loss": 0.6142, "step": 171655 }, { "epoch": 1.9, "learning_rate": 1.8320931880591726e-05, "loss": 0.608, "step": 171660 }, { "epoch": 1.9, "learning_rate": 1.8320009153453217e-05, "loss": 0.6213, "step": 171665 }, { "epoch": 1.9, "learning_rate": 1.8319086426314702e-05, "loss": 0.6444, "step": 171670 }, { "epoch": 1.9, "learning_rate": 1.831816369917619e-05, "loss": 0.6443, "step": 171675 }, { "epoch": 1.9, "learning_rate": 1.8317240972037677e-05, "loss": 0.6375, "step": 171680 }, { "epoch": 1.9, "learning_rate": 1.8316318244899165e-05, "loss": 0.6256, "step": 171685 }, { "epoch": 1.9, "learning_rate": 1.8315395517760653e-05, "loss": 0.6259, "step": 171690 }, { "epoch": 1.9, "learning_rate": 1.8314472790622138e-05, "loss": 0.6032, "step": 171695 }, { "epoch": 1.9, "learning_rate": 1.831355006348363e-05, "loss": 0.6024, "step": 171700 }, { "epoch": 1.9, "learning_rate": 1.8312627336345113e-05, "loss": 0.6381, "step": 171705 }, { "epoch": 1.9, "learning_rate": 1.8311704609206605e-05, "loss": 0.6031, "step": 171710 }, { "epoch": 1.9, "learning_rate": 1.831078188206809e-05, "loss": 0.6012, "step": 171715 }, { "epoch": 1.9, "learning_rate": 1.830985915492958e-05, "loss": 0.6466, "step": 171720 }, { "epoch": 1.9, "learning_rate": 1.8308936427791065e-05, "loss": 0.5669, "step": 171725 }, { "epoch": 1.9, "learning_rate": 1.8308013700652556e-05, "loss": 0.5871, "step": 171730 }, { "epoch": 1.9, "learning_rate": 1.830709097351404e-05, "loss": 0.6106, "step": 171735 }, { "epoch": 1.9, "learning_rate": 1.8306168246375528e-05, "loss": 0.633, "step": 171740 }, { "epoch": 1.9, "learning_rate": 1.8305245519237016e-05, "loss": 0.5876, "step": 171745 }, { "epoch": 1.9, "learning_rate": 1.8304322792098504e-05, "loss": 0.6549, "step": 171750 }, { "epoch": 1.9, "learning_rate": 1.8303400064959992e-05, "loss": 0.5833, "step": 171755 }, { "epoch": 1.9, "learning_rate": 1.830247733782148e-05, "loss": 0.6208, "step": 171760 }, { "epoch": 1.9, "learning_rate": 1.8301554610682967e-05, "loss": 0.6262, "step": 171765 }, { "epoch": 1.9, "learning_rate": 1.8300631883544452e-05, "loss": 0.6127, "step": 171770 }, { "epoch": 1.9, "learning_rate": 1.8299709156405943e-05, "loss": 0.6182, "step": 171775 }, { "epoch": 1.9, "learning_rate": 1.8298786429267428e-05, "loss": 0.6594, "step": 171780 }, { "epoch": 1.9, "learning_rate": 1.829786370212892e-05, "loss": 0.6511, "step": 171785 }, { "epoch": 1.9, "learning_rate": 1.8296940974990403e-05, "loss": 0.6258, "step": 171790 }, { "epoch": 1.9, "learning_rate": 1.829601824785189e-05, "loss": 0.6619, "step": 171795 }, { "epoch": 1.9, "learning_rate": 1.829509552071338e-05, "loss": 0.6065, "step": 171800 }, { "epoch": 1.9, "learning_rate": 1.8294172793574867e-05, "loss": 0.608, "step": 171805 }, { "epoch": 1.9, "learning_rate": 1.8293250066436355e-05, "loss": 0.5654, "step": 171810 }, { "epoch": 1.9, "learning_rate": 1.8292327339297843e-05, "loss": 0.6249, "step": 171815 }, { "epoch": 1.9, "learning_rate": 1.829140461215933e-05, "loss": 0.6506, "step": 171820 }, { "epoch": 1.9, "learning_rate": 1.8290481885020818e-05, "loss": 0.5639, "step": 171825 }, { "epoch": 1.9, "learning_rate": 1.8289559157882306e-05, "loss": 0.6093, "step": 171830 }, { "epoch": 1.9, "learning_rate": 1.8288636430743794e-05, "loss": 0.595, "step": 171835 }, { "epoch": 1.9, "learning_rate": 1.8287713703605282e-05, "loss": 0.5763, "step": 171840 }, { "epoch": 1.9, "learning_rate": 1.8286790976466766e-05, "loss": 0.6019, "step": 171845 }, { "epoch": 1.9, "learning_rate": 1.8285868249328254e-05, "loss": 0.5658, "step": 171850 }, { "epoch": 1.9, "learning_rate": 1.8284945522189742e-05, "loss": 0.6672, "step": 171855 }, { "epoch": 1.9, "learning_rate": 1.828402279505123e-05, "loss": 0.6185, "step": 171860 }, { "epoch": 1.9, "learning_rate": 1.8283100067912718e-05, "loss": 0.6057, "step": 171865 }, { "epoch": 1.9, "learning_rate": 1.8282177340774205e-05, "loss": 0.5923, "step": 171870 }, { "epoch": 1.9, "learning_rate": 1.8281254613635693e-05, "loss": 0.5579, "step": 171875 }, { "epoch": 1.9, "learning_rate": 1.828033188649718e-05, "loss": 0.63, "step": 171880 }, { "epoch": 1.9, "learning_rate": 1.827940915935867e-05, "loss": 0.6601, "step": 171885 }, { "epoch": 1.9, "learning_rate": 1.8278486432220157e-05, "loss": 0.6418, "step": 171890 }, { "epoch": 1.9, "learning_rate": 1.8277563705081645e-05, "loss": 0.562, "step": 171895 }, { "epoch": 1.9, "learning_rate": 1.8276640977943133e-05, "loss": 0.5733, "step": 171900 }, { "epoch": 1.9, "learning_rate": 1.827571825080462e-05, "loss": 0.6291, "step": 171905 }, { "epoch": 1.9, "learning_rate": 1.8274795523666108e-05, "loss": 0.6161, "step": 171910 }, { "epoch": 1.9, "learning_rate": 1.8273872796527593e-05, "loss": 0.6497, "step": 171915 }, { "epoch": 1.9, "learning_rate": 1.827295006938908e-05, "loss": 0.5838, "step": 171920 }, { "epoch": 1.9, "learning_rate": 1.827202734225057e-05, "loss": 0.6522, "step": 171925 }, { "epoch": 1.9, "learning_rate": 1.8271104615112056e-05, "loss": 0.606, "step": 171930 }, { "epoch": 1.9, "learning_rate": 1.8270181887973544e-05, "loss": 0.6148, "step": 171935 }, { "epoch": 1.9, "learning_rate": 1.8269259160835032e-05, "loss": 0.6166, "step": 171940 }, { "epoch": 1.9, "learning_rate": 1.826833643369652e-05, "loss": 0.6395, "step": 171945 }, { "epoch": 1.9, "learning_rate": 1.8267413706558008e-05, "loss": 0.654, "step": 171950 }, { "epoch": 1.9, "learning_rate": 1.8266490979419496e-05, "loss": 0.5915, "step": 171955 }, { "epoch": 1.9, "learning_rate": 1.8265568252280983e-05, "loss": 0.5703, "step": 171960 }, { "epoch": 1.9, "learning_rate": 1.826464552514247e-05, "loss": 0.6397, "step": 171965 }, { "epoch": 1.9, "learning_rate": 1.8263722798003956e-05, "loss": 0.5986, "step": 171970 }, { "epoch": 1.9, "learning_rate": 1.8262800070865447e-05, "loss": 0.6184, "step": 171975 }, { "epoch": 1.9, "learning_rate": 1.826187734372693e-05, "loss": 0.6008, "step": 171980 }, { "epoch": 1.9, "learning_rate": 1.8260954616588423e-05, "loss": 0.5869, "step": 171985 }, { "epoch": 1.9, "learning_rate": 1.8260031889449907e-05, "loss": 0.5526, "step": 171990 }, { "epoch": 1.9, "learning_rate": 1.8259109162311395e-05, "loss": 0.6666, "step": 171995 }, { "epoch": 1.9, "learning_rate": 1.8258186435172883e-05, "loss": 0.6358, "step": 172000 }, { "epoch": 1.9, "eval_loss": 0.5914716720581055, "eval_runtime": 69.7229, "eval_samples_per_second": 28.685, "eval_steps_per_second": 14.342, "step": 172000 }, { "epoch": 1.9, "learning_rate": 1.825726370803437e-05, "loss": 0.6408, "step": 172005 }, { "epoch": 1.9, "learning_rate": 1.825634098089586e-05, "loss": 0.6136, "step": 172010 }, { "epoch": 1.9, "learning_rate": 1.8255418253757346e-05, "loss": 0.6119, "step": 172015 }, { "epoch": 1.9, "learning_rate": 1.8254495526618834e-05, "loss": 0.6476, "step": 172020 }, { "epoch": 1.9, "learning_rate": 1.825357279948032e-05, "loss": 0.5665, "step": 172025 }, { "epoch": 1.9, "learning_rate": 1.825265007234181e-05, "loss": 0.6151, "step": 172030 }, { "epoch": 1.9, "learning_rate": 1.8251727345203294e-05, "loss": 0.6466, "step": 172035 }, { "epoch": 1.9, "learning_rate": 1.8250804618064786e-05, "loss": 0.6369, "step": 172040 }, { "epoch": 1.91, "learning_rate": 1.824988189092627e-05, "loss": 0.6184, "step": 172045 }, { "epoch": 1.91, "learning_rate": 1.824895916378776e-05, "loss": 0.6785, "step": 172050 }, { "epoch": 1.91, "learning_rate": 1.8248036436649246e-05, "loss": 0.6238, "step": 172055 }, { "epoch": 1.91, "learning_rate": 1.8247113709510737e-05, "loss": 0.7162, "step": 172060 }, { "epoch": 1.91, "learning_rate": 1.824619098237222e-05, "loss": 0.6736, "step": 172065 }, { "epoch": 1.91, "learning_rate": 1.824526825523371e-05, "loss": 0.6382, "step": 172070 }, { "epoch": 1.91, "learning_rate": 1.8244345528095197e-05, "loss": 0.6093, "step": 172075 }, { "epoch": 1.91, "learning_rate": 1.824342280095668e-05, "loss": 0.6413, "step": 172080 }, { "epoch": 1.91, "learning_rate": 1.8242500073818173e-05, "loss": 0.6466, "step": 172085 }, { "epoch": 1.91, "learning_rate": 1.8241577346679657e-05, "loss": 0.6067, "step": 172090 }, { "epoch": 1.91, "learning_rate": 1.824065461954115e-05, "loss": 0.6362, "step": 172095 }, { "epoch": 1.91, "learning_rate": 1.8239731892402633e-05, "loss": 0.6548, "step": 172100 }, { "epoch": 1.91, "learning_rate": 1.8238809165264124e-05, "loss": 0.6521, "step": 172105 }, { "epoch": 1.91, "learning_rate": 1.823788643812561e-05, "loss": 0.6762, "step": 172110 }, { "epoch": 1.91, "learning_rate": 1.82369637109871e-05, "loss": 0.6174, "step": 172115 }, { "epoch": 1.91, "learning_rate": 1.8236040983848584e-05, "loss": 0.5452, "step": 172120 }, { "epoch": 1.91, "learning_rate": 1.8235118256710072e-05, "loss": 0.5755, "step": 172125 }, { "epoch": 1.91, "learning_rate": 1.823419552957156e-05, "loss": 0.6157, "step": 172130 }, { "epoch": 1.91, "learning_rate": 1.8233272802433048e-05, "loss": 0.6069, "step": 172135 }, { "epoch": 1.91, "learning_rate": 1.8232350075294536e-05, "loss": 0.5882, "step": 172140 }, { "epoch": 1.91, "learning_rate": 1.823142734815602e-05, "loss": 0.6788, "step": 172145 }, { "epoch": 1.91, "learning_rate": 1.823050462101751e-05, "loss": 0.5716, "step": 172150 }, { "epoch": 1.91, "learning_rate": 1.8229581893878996e-05, "loss": 0.61, "step": 172155 }, { "epoch": 1.91, "learning_rate": 1.8228659166740487e-05, "loss": 0.5969, "step": 172160 }, { "epoch": 1.91, "learning_rate": 1.822773643960197e-05, "loss": 0.6148, "step": 172165 }, { "epoch": 1.91, "learning_rate": 1.8226813712463463e-05, "loss": 0.6107, "step": 172170 }, { "epoch": 1.91, "learning_rate": 1.8225890985324947e-05, "loss": 0.6349, "step": 172175 }, { "epoch": 1.91, "learning_rate": 1.8224968258186435e-05, "loss": 0.5974, "step": 172180 }, { "epoch": 1.91, "learning_rate": 1.8224045531047923e-05, "loss": 0.6027, "step": 172185 }, { "epoch": 1.91, "learning_rate": 1.822312280390941e-05, "loss": 0.6275, "step": 172190 }, { "epoch": 1.91, "learning_rate": 1.82222000767709e-05, "loss": 0.6564, "step": 172195 }, { "epoch": 1.91, "learning_rate": 1.8221277349632387e-05, "loss": 0.6005, "step": 172200 }, { "epoch": 1.91, "learning_rate": 1.8220354622493874e-05, "loss": 0.6522, "step": 172205 }, { "epoch": 1.91, "learning_rate": 1.8219431895355362e-05, "loss": 0.6069, "step": 172210 }, { "epoch": 1.91, "learning_rate": 1.821850916821685e-05, "loss": 0.6214, "step": 172215 }, { "epoch": 1.91, "learning_rate": 1.8217586441078335e-05, "loss": 0.6265, "step": 172220 }, { "epoch": 1.91, "learning_rate": 1.8216663713939826e-05, "loss": 0.5996, "step": 172225 }, { "epoch": 1.91, "learning_rate": 1.821574098680131e-05, "loss": 0.585, "step": 172230 }, { "epoch": 1.91, "learning_rate": 1.8214818259662798e-05, "loss": 0.6335, "step": 172235 }, { "epoch": 1.91, "learning_rate": 1.8213895532524286e-05, "loss": 0.5935, "step": 172240 }, { "epoch": 1.91, "learning_rate": 1.8212972805385774e-05, "loss": 0.6227, "step": 172245 }, { "epoch": 1.91, "learning_rate": 1.821205007824726e-05, "loss": 0.5904, "step": 172250 }, { "epoch": 1.91, "learning_rate": 1.821112735110875e-05, "loss": 0.6122, "step": 172255 }, { "epoch": 1.91, "learning_rate": 1.8210204623970237e-05, "loss": 0.6557, "step": 172260 }, { "epoch": 1.91, "learning_rate": 1.8209281896831725e-05, "loss": 0.6857, "step": 172265 }, { "epoch": 1.91, "learning_rate": 1.8208359169693213e-05, "loss": 0.6524, "step": 172270 }, { "epoch": 1.91, "learning_rate": 1.82074364425547e-05, "loss": 0.5966, "step": 172275 }, { "epoch": 1.91, "learning_rate": 1.820651371541619e-05, "loss": 0.6589, "step": 172280 }, { "epoch": 1.91, "learning_rate": 1.8205590988277677e-05, "loss": 0.5659, "step": 172285 }, { "epoch": 1.91, "learning_rate": 1.8204668261139164e-05, "loss": 0.6035, "step": 172290 }, { "epoch": 1.91, "learning_rate": 1.8203745534000652e-05, "loss": 0.6107, "step": 172295 }, { "epoch": 1.91, "learning_rate": 1.8202822806862137e-05, "loss": 0.6218, "step": 172300 }, { "epoch": 1.91, "learning_rate": 1.8201900079723625e-05, "loss": 0.6904, "step": 172305 }, { "epoch": 1.91, "learning_rate": 1.8200977352585112e-05, "loss": 0.6136, "step": 172310 }, { "epoch": 1.91, "learning_rate": 1.82000546254466e-05, "loss": 0.596, "step": 172315 }, { "epoch": 1.91, "learning_rate": 1.8199131898308088e-05, "loss": 0.5768, "step": 172320 }, { "epoch": 1.91, "learning_rate": 1.8198209171169576e-05, "loss": 0.6257, "step": 172325 }, { "epoch": 1.91, "learning_rate": 1.8197286444031064e-05, "loss": 0.5876, "step": 172330 }, { "epoch": 1.91, "learning_rate": 1.8196363716892552e-05, "loss": 0.6034, "step": 172335 }, { "epoch": 1.91, "learning_rate": 1.819544098975404e-05, "loss": 0.6117, "step": 172340 }, { "epoch": 1.91, "learning_rate": 1.8194518262615527e-05, "loss": 0.6806, "step": 172345 }, { "epoch": 1.91, "learning_rate": 1.8193595535477015e-05, "loss": 0.5857, "step": 172350 }, { "epoch": 1.91, "learning_rate": 1.81926728083385e-05, "loss": 0.5618, "step": 172355 }, { "epoch": 1.91, "learning_rate": 1.819175008119999e-05, "loss": 0.6113, "step": 172360 }, { "epoch": 1.91, "learning_rate": 1.8190827354061475e-05, "loss": 0.6163, "step": 172365 }, { "epoch": 1.91, "learning_rate": 1.8189904626922967e-05, "loss": 0.6474, "step": 172370 }, { "epoch": 1.91, "learning_rate": 1.818898189978445e-05, "loss": 0.5924, "step": 172375 }, { "epoch": 1.91, "learning_rate": 1.818805917264594e-05, "loss": 0.6075, "step": 172380 }, { "epoch": 1.91, "learning_rate": 1.8187136445507427e-05, "loss": 0.6067, "step": 172385 }, { "epoch": 1.91, "learning_rate": 1.8186213718368915e-05, "loss": 0.6174, "step": 172390 }, { "epoch": 1.91, "learning_rate": 1.8185290991230403e-05, "loss": 0.6498, "step": 172395 }, { "epoch": 1.91, "learning_rate": 1.818436826409189e-05, "loss": 0.6264, "step": 172400 }, { "epoch": 1.91, "learning_rate": 1.8183445536953378e-05, "loss": 0.6576, "step": 172405 }, { "epoch": 1.91, "learning_rate": 1.8182522809814863e-05, "loss": 0.6471, "step": 172410 }, { "epoch": 1.91, "learning_rate": 1.8181600082676354e-05, "loss": 0.5895, "step": 172415 }, { "epoch": 1.91, "learning_rate": 1.818067735553784e-05, "loss": 0.6422, "step": 172420 }, { "epoch": 1.91, "learning_rate": 1.817975462839933e-05, "loss": 0.5998, "step": 172425 }, { "epoch": 1.91, "learning_rate": 1.8178831901260814e-05, "loss": 0.635, "step": 172430 }, { "epoch": 1.91, "learning_rate": 1.8177909174122305e-05, "loss": 0.6006, "step": 172435 }, { "epoch": 1.91, "learning_rate": 1.817698644698379e-05, "loss": 0.6538, "step": 172440 }, { "epoch": 1.91, "learning_rate": 1.817606371984528e-05, "loss": 0.5581, "step": 172445 }, { "epoch": 1.91, "learning_rate": 1.8175140992706765e-05, "loss": 0.6486, "step": 172450 }, { "epoch": 1.91, "learning_rate": 1.8174218265568253e-05, "loss": 0.6579, "step": 172455 }, { "epoch": 1.91, "learning_rate": 1.817329553842974e-05, "loss": 0.563, "step": 172460 }, { "epoch": 1.91, "learning_rate": 1.8172372811291226e-05, "loss": 0.6187, "step": 172465 }, { "epoch": 1.91, "learning_rate": 1.8171450084152717e-05, "loss": 0.6341, "step": 172470 }, { "epoch": 1.91, "learning_rate": 1.81705273570142e-05, "loss": 0.6216, "step": 172475 }, { "epoch": 1.91, "learning_rate": 1.8169604629875693e-05, "loss": 0.6186, "step": 172480 }, { "epoch": 1.91, "learning_rate": 1.8168681902737177e-05, "loss": 0.613, "step": 172485 }, { "epoch": 1.91, "learning_rate": 1.8167759175598668e-05, "loss": 0.6322, "step": 172490 }, { "epoch": 1.91, "learning_rate": 1.8166836448460153e-05, "loss": 0.6414, "step": 172495 }, { "epoch": 1.91, "learning_rate": 1.8165913721321644e-05, "loss": 0.6483, "step": 172500 }, { "epoch": 1.91, "learning_rate": 1.816499099418313e-05, "loss": 0.6043, "step": 172505 }, { "epoch": 1.91, "learning_rate": 1.8164068267044616e-05, "loss": 0.5573, "step": 172510 }, { "epoch": 1.91, "learning_rate": 1.8163145539906104e-05, "loss": 0.5898, "step": 172515 }, { "epoch": 1.91, "learning_rate": 1.8162222812767592e-05, "loss": 0.5975, "step": 172520 }, { "epoch": 1.91, "learning_rate": 1.816130008562908e-05, "loss": 0.637, "step": 172525 }, { "epoch": 1.91, "learning_rate": 1.8160377358490564e-05, "loss": 0.6664, "step": 172530 }, { "epoch": 1.91, "learning_rate": 1.8159454631352055e-05, "loss": 0.6129, "step": 172535 }, { "epoch": 1.91, "learning_rate": 1.815853190421354e-05, "loss": 0.6018, "step": 172540 }, { "epoch": 1.91, "learning_rate": 1.815760917707503e-05, "loss": 0.5608, "step": 172545 }, { "epoch": 1.91, "learning_rate": 1.8156686449936516e-05, "loss": 0.6034, "step": 172550 }, { "epoch": 1.91, "learning_rate": 1.8155763722798007e-05, "loss": 0.6305, "step": 172555 }, { "epoch": 1.91, "learning_rate": 1.815484099565949e-05, "loss": 0.6494, "step": 172560 }, { "epoch": 1.91, "learning_rate": 1.815391826852098e-05, "loss": 0.6307, "step": 172565 }, { "epoch": 1.91, "learning_rate": 1.8152995541382467e-05, "loss": 0.6372, "step": 172570 }, { "epoch": 1.91, "learning_rate": 1.8152072814243955e-05, "loss": 0.625, "step": 172575 }, { "epoch": 1.91, "learning_rate": 1.8151150087105443e-05, "loss": 0.6172, "step": 172580 }, { "epoch": 1.91, "learning_rate": 1.815022735996693e-05, "loss": 0.6374, "step": 172585 }, { "epoch": 1.91, "learning_rate": 1.814930463282842e-05, "loss": 0.6007, "step": 172590 }, { "epoch": 1.91, "learning_rate": 1.8148381905689906e-05, "loss": 0.6041, "step": 172595 }, { "epoch": 1.91, "learning_rate": 1.8147459178551394e-05, "loss": 0.6358, "step": 172600 }, { "epoch": 1.91, "learning_rate": 1.814653645141288e-05, "loss": 0.5832, "step": 172605 }, { "epoch": 1.91, "learning_rate": 1.814561372427437e-05, "loss": 0.6223, "step": 172610 }, { "epoch": 1.91, "learning_rate": 1.8144690997135854e-05, "loss": 0.5826, "step": 172615 }, { "epoch": 1.91, "learning_rate": 1.8143768269997342e-05, "loss": 0.6601, "step": 172620 }, { "epoch": 1.91, "learning_rate": 1.814284554285883e-05, "loss": 0.6018, "step": 172625 }, { "epoch": 1.91, "learning_rate": 1.8141922815720318e-05, "loss": 0.6054, "step": 172630 }, { "epoch": 1.91, "learning_rate": 1.8141000088581806e-05, "loss": 0.6333, "step": 172635 }, { "epoch": 1.91, "learning_rate": 1.8140077361443294e-05, "loss": 0.6627, "step": 172640 }, { "epoch": 1.91, "learning_rate": 1.813915463430478e-05, "loss": 0.6351, "step": 172645 }, { "epoch": 1.91, "learning_rate": 1.813823190716627e-05, "loss": 0.6017, "step": 172650 }, { "epoch": 1.91, "learning_rate": 1.8137309180027757e-05, "loss": 0.6723, "step": 172655 }, { "epoch": 1.91, "learning_rate": 1.8136386452889245e-05, "loss": 0.593, "step": 172660 }, { "epoch": 1.91, "learning_rate": 1.8135463725750733e-05, "loss": 0.6397, "step": 172665 }, { "epoch": 1.91, "learning_rate": 1.813454099861222e-05, "loss": 0.6394, "step": 172670 }, { "epoch": 1.91, "learning_rate": 1.813361827147371e-05, "loss": 0.5777, "step": 172675 }, { "epoch": 1.91, "learning_rate": 1.8132695544335193e-05, "loss": 0.6023, "step": 172680 }, { "epoch": 1.91, "learning_rate": 1.813177281719668e-05, "loss": 0.5892, "step": 172685 }, { "epoch": 1.91, "learning_rate": 1.813085009005817e-05, "loss": 0.5749, "step": 172690 }, { "epoch": 1.91, "learning_rate": 1.8129927362919656e-05, "loss": 0.5764, "step": 172695 }, { "epoch": 1.91, "learning_rate": 1.8129004635781144e-05, "loss": 0.6114, "step": 172700 }, { "epoch": 1.91, "learning_rate": 1.8128081908642632e-05, "loss": 0.6392, "step": 172705 }, { "epoch": 1.91, "learning_rate": 1.812715918150412e-05, "loss": 0.6012, "step": 172710 }, { "epoch": 1.91, "learning_rate": 1.8126236454365608e-05, "loss": 0.6281, "step": 172715 }, { "epoch": 1.91, "learning_rate": 1.8125313727227096e-05, "loss": 0.6379, "step": 172720 }, { "epoch": 1.91, "learning_rate": 1.8124391000088584e-05, "loss": 0.5971, "step": 172725 }, { "epoch": 1.91, "learning_rate": 1.812346827295007e-05, "loss": 0.607, "step": 172730 }, { "epoch": 1.91, "learning_rate": 1.812254554581156e-05, "loss": 0.6084, "step": 172735 }, { "epoch": 1.91, "learning_rate": 1.8121622818673044e-05, "loss": 0.5877, "step": 172740 }, { "epoch": 1.91, "learning_rate": 1.8120700091534535e-05, "loss": 0.6089, "step": 172745 }, { "epoch": 1.91, "learning_rate": 1.811977736439602e-05, "loss": 0.6124, "step": 172750 }, { "epoch": 1.91, "learning_rate": 1.8118854637257507e-05, "loss": 0.6447, "step": 172755 }, { "epoch": 1.91, "learning_rate": 1.8117931910118995e-05, "loss": 0.5686, "step": 172760 }, { "epoch": 1.91, "learning_rate": 1.8117009182980483e-05, "loss": 0.6077, "step": 172765 }, { "epoch": 1.91, "learning_rate": 1.811608645584197e-05, "loss": 0.7213, "step": 172770 }, { "epoch": 1.91, "learning_rate": 1.811516372870346e-05, "loss": 0.5899, "step": 172775 }, { "epoch": 1.91, "learning_rate": 1.8114241001564947e-05, "loss": 0.5948, "step": 172780 }, { "epoch": 1.91, "learning_rate": 1.8113318274426434e-05, "loss": 0.6353, "step": 172785 }, { "epoch": 1.91, "learning_rate": 1.8112395547287922e-05, "loss": 0.6498, "step": 172790 }, { "epoch": 1.91, "learning_rate": 1.8111472820149407e-05, "loss": 0.5787, "step": 172795 }, { "epoch": 1.91, "learning_rate": 1.8110550093010898e-05, "loss": 0.6286, "step": 172800 }, { "epoch": 1.91, "learning_rate": 1.8109627365872382e-05, "loss": 0.6241, "step": 172805 }, { "epoch": 1.91, "learning_rate": 1.8108704638733874e-05, "loss": 0.6245, "step": 172810 }, { "epoch": 1.91, "learning_rate": 1.8107781911595358e-05, "loss": 0.5956, "step": 172815 }, { "epoch": 1.91, "learning_rate": 1.810685918445685e-05, "loss": 0.5442, "step": 172820 }, { "epoch": 1.91, "learning_rate": 1.8105936457318334e-05, "loss": 0.6118, "step": 172825 }, { "epoch": 1.91, "learning_rate": 1.810501373017982e-05, "loss": 0.6304, "step": 172830 }, { "epoch": 1.91, "learning_rate": 1.810409100304131e-05, "loss": 0.645, "step": 172835 }, { "epoch": 1.91, "learning_rate": 1.8103168275902797e-05, "loss": 0.6151, "step": 172840 }, { "epoch": 1.91, "learning_rate": 1.8102245548764285e-05, "loss": 0.669, "step": 172845 }, { "epoch": 1.91, "learning_rate": 1.810132282162577e-05, "loss": 0.5947, "step": 172850 }, { "epoch": 1.91, "learning_rate": 1.810040009448726e-05, "loss": 0.5659, "step": 172855 }, { "epoch": 1.91, "learning_rate": 1.8099477367348745e-05, "loss": 0.6505, "step": 172860 }, { "epoch": 1.91, "learning_rate": 1.8098554640210237e-05, "loss": 0.6202, "step": 172865 }, { "epoch": 1.91, "learning_rate": 1.809763191307172e-05, "loss": 0.6485, "step": 172870 }, { "epoch": 1.91, "learning_rate": 1.8096709185933212e-05, "loss": 0.5992, "step": 172875 }, { "epoch": 1.91, "learning_rate": 1.8095786458794697e-05, "loss": 0.6099, "step": 172880 }, { "epoch": 1.91, "learning_rate": 1.8094863731656188e-05, "loss": 0.5952, "step": 172885 }, { "epoch": 1.91, "learning_rate": 1.8093941004517672e-05, "loss": 0.6438, "step": 172890 }, { "epoch": 1.91, "learning_rate": 1.809301827737916e-05, "loss": 0.517, "step": 172895 }, { "epoch": 1.91, "learning_rate": 1.8092095550240648e-05, "loss": 0.5881, "step": 172900 }, { "epoch": 1.91, "learning_rate": 1.8091172823102136e-05, "loss": 0.6173, "step": 172905 }, { "epoch": 1.91, "learning_rate": 1.8090250095963624e-05, "loss": 0.6192, "step": 172910 }, { "epoch": 1.91, "learning_rate": 1.8089327368825108e-05, "loss": 0.628, "step": 172915 }, { "epoch": 1.91, "learning_rate": 1.80884046416866e-05, "loss": 0.5955, "step": 172920 }, { "epoch": 1.91, "learning_rate": 1.8087481914548084e-05, "loss": 0.6495, "step": 172925 }, { "epoch": 1.91, "learning_rate": 1.8086559187409575e-05, "loss": 0.6106, "step": 172930 }, { "epoch": 1.91, "learning_rate": 1.808563646027106e-05, "loss": 0.5648, "step": 172935 }, { "epoch": 1.91, "learning_rate": 1.808471373313255e-05, "loss": 0.5721, "step": 172940 }, { "epoch": 1.91, "learning_rate": 1.8083791005994035e-05, "loss": 0.6396, "step": 172945 }, { "epoch": 1.92, "learning_rate": 1.8082868278855523e-05, "loss": 0.6586, "step": 172950 }, { "epoch": 1.92, "learning_rate": 1.808194555171701e-05, "loss": 0.6276, "step": 172955 }, { "epoch": 1.92, "learning_rate": 1.80810228245785e-05, "loss": 0.5945, "step": 172960 }, { "epoch": 1.92, "learning_rate": 1.8080100097439987e-05, "loss": 0.5942, "step": 172965 }, { "epoch": 1.92, "learning_rate": 1.8079177370301475e-05, "loss": 0.6089, "step": 172970 }, { "epoch": 1.92, "learning_rate": 1.8078254643162962e-05, "loss": 0.617, "step": 172975 }, { "epoch": 1.92, "learning_rate": 1.8077331916024447e-05, "loss": 0.6338, "step": 172980 }, { "epoch": 1.92, "learning_rate": 1.8076409188885938e-05, "loss": 0.6598, "step": 172985 }, { "epoch": 1.92, "learning_rate": 1.8075486461747423e-05, "loss": 0.6253, "step": 172990 }, { "epoch": 1.92, "learning_rate": 1.8074563734608914e-05, "loss": 0.6232, "step": 172995 }, { "epoch": 1.92, "learning_rate": 1.80736410074704e-05, "loss": 0.614, "step": 173000 }, { "epoch": 1.92, "eval_loss": 0.588646650314331, "eval_runtime": 69.1551, "eval_samples_per_second": 28.921, "eval_steps_per_second": 14.46, "step": 173000 }, { "epoch": 1.92, "learning_rate": 1.8072718280331886e-05, "loss": 0.6001, "step": 173005 }, { "epoch": 1.92, "learning_rate": 1.8071795553193374e-05, "loss": 0.6008, "step": 173010 }, { "epoch": 1.92, "learning_rate": 1.8070872826054862e-05, "loss": 0.6465, "step": 173015 }, { "epoch": 1.92, "learning_rate": 1.806995009891635e-05, "loss": 0.6039, "step": 173020 }, { "epoch": 1.92, "learning_rate": 1.8069027371777838e-05, "loss": 0.6154, "step": 173025 }, { "epoch": 1.92, "learning_rate": 1.8068104644639325e-05, "loss": 0.5811, "step": 173030 }, { "epoch": 1.92, "learning_rate": 1.8067181917500813e-05, "loss": 0.6284, "step": 173035 }, { "epoch": 1.92, "learning_rate": 1.80662591903623e-05, "loss": 0.6309, "step": 173040 }, { "epoch": 1.92, "learning_rate": 1.806533646322379e-05, "loss": 0.6065, "step": 173045 }, { "epoch": 1.92, "learning_rate": 1.8064413736085277e-05, "loss": 0.6425, "step": 173050 }, { "epoch": 1.92, "learning_rate": 1.806349100894676e-05, "loss": 0.5858, "step": 173055 }, { "epoch": 1.92, "learning_rate": 1.8062568281808252e-05, "loss": 0.5774, "step": 173060 }, { "epoch": 1.92, "learning_rate": 1.8061645554669737e-05, "loss": 0.6403, "step": 173065 }, { "epoch": 1.92, "learning_rate": 1.8060722827531225e-05, "loss": 0.5831, "step": 173070 }, { "epoch": 1.92, "learning_rate": 1.8059800100392713e-05, "loss": 0.6301, "step": 173075 }, { "epoch": 1.92, "learning_rate": 1.80588773732542e-05, "loss": 0.5971, "step": 173080 }, { "epoch": 1.92, "learning_rate": 1.805795464611569e-05, "loss": 0.6664, "step": 173085 }, { "epoch": 1.92, "learning_rate": 1.8057031918977176e-05, "loss": 0.5841, "step": 173090 }, { "epoch": 1.92, "learning_rate": 1.8056109191838664e-05, "loss": 0.5879, "step": 173095 }, { "epoch": 1.92, "learning_rate": 1.8055186464700152e-05, "loss": 0.621, "step": 173100 }, { "epoch": 1.92, "learning_rate": 1.805426373756164e-05, "loss": 0.5949, "step": 173105 }, { "epoch": 1.92, "learning_rate": 1.8053341010423128e-05, "loss": 0.6658, "step": 173110 }, { "epoch": 1.92, "learning_rate": 1.8052418283284615e-05, "loss": 0.6141, "step": 173115 }, { "epoch": 1.92, "learning_rate": 1.8051495556146103e-05, "loss": 0.6177, "step": 173120 }, { "epoch": 1.92, "learning_rate": 1.8050572829007588e-05, "loss": 0.5734, "step": 173125 }, { "epoch": 1.92, "learning_rate": 1.804965010186908e-05, "loss": 0.6415, "step": 173130 }, { "epoch": 1.92, "learning_rate": 1.8048727374730563e-05, "loss": 0.5494, "step": 173135 }, { "epoch": 1.92, "learning_rate": 1.804780464759205e-05, "loss": 0.6014, "step": 173140 }, { "epoch": 1.92, "learning_rate": 1.804688192045354e-05, "loss": 0.6574, "step": 173145 }, { "epoch": 1.92, "learning_rate": 1.8045959193315027e-05, "loss": 0.623, "step": 173150 }, { "epoch": 1.92, "learning_rate": 1.8045036466176515e-05, "loss": 0.5926, "step": 173155 }, { "epoch": 1.92, "learning_rate": 1.8044113739038003e-05, "loss": 0.589, "step": 173160 }, { "epoch": 1.92, "learning_rate": 1.804319101189949e-05, "loss": 0.5889, "step": 173165 }, { "epoch": 1.92, "learning_rate": 1.804226828476098e-05, "loss": 0.644, "step": 173170 }, { "epoch": 1.92, "learning_rate": 1.8041345557622466e-05, "loss": 0.6123, "step": 173175 }, { "epoch": 1.92, "learning_rate": 1.804042283048395e-05, "loss": 0.6485, "step": 173180 }, { "epoch": 1.92, "learning_rate": 1.8039500103345442e-05, "loss": 0.6102, "step": 173185 }, { "epoch": 1.92, "learning_rate": 1.8038577376206926e-05, "loss": 0.6239, "step": 173190 }, { "epoch": 1.92, "learning_rate": 1.8037654649068418e-05, "loss": 0.6389, "step": 173195 }, { "epoch": 1.92, "learning_rate": 1.8036731921929902e-05, "loss": 0.6745, "step": 173200 }, { "epoch": 1.92, "learning_rate": 1.8035809194791393e-05, "loss": 0.6002, "step": 173205 }, { "epoch": 1.92, "learning_rate": 1.8034886467652878e-05, "loss": 0.6431, "step": 173210 }, { "epoch": 1.92, "learning_rate": 1.8033963740514366e-05, "loss": 0.6615, "step": 173215 }, { "epoch": 1.92, "learning_rate": 1.8033041013375853e-05, "loss": 0.5992, "step": 173220 }, { "epoch": 1.92, "learning_rate": 1.803211828623734e-05, "loss": 0.5922, "step": 173225 }, { "epoch": 1.92, "learning_rate": 1.803119555909883e-05, "loss": 0.6121, "step": 173230 }, { "epoch": 1.92, "learning_rate": 1.8030272831960317e-05, "loss": 0.6111, "step": 173235 }, { "epoch": 1.92, "learning_rate": 1.8029350104821805e-05, "loss": 0.5815, "step": 173240 }, { "epoch": 1.92, "learning_rate": 1.802842737768329e-05, "loss": 0.5908, "step": 173245 }, { "epoch": 1.92, "learning_rate": 1.802750465054478e-05, "loss": 0.5979, "step": 173250 }, { "epoch": 1.92, "learning_rate": 1.8026581923406265e-05, "loss": 0.5985, "step": 173255 }, { "epoch": 1.92, "learning_rate": 1.8025659196267756e-05, "loss": 0.6408, "step": 173260 }, { "epoch": 1.92, "learning_rate": 1.802473646912924e-05, "loss": 0.5784, "step": 173265 }, { "epoch": 1.92, "learning_rate": 1.8023813741990732e-05, "loss": 0.6328, "step": 173270 }, { "epoch": 1.92, "learning_rate": 1.8022891014852216e-05, "loss": 0.6135, "step": 173275 }, { "epoch": 1.92, "learning_rate": 1.8021968287713704e-05, "loss": 0.6117, "step": 173280 }, { "epoch": 1.92, "learning_rate": 1.8021045560575192e-05, "loss": 0.5658, "step": 173285 }, { "epoch": 1.92, "learning_rate": 1.802012283343668e-05, "loss": 0.583, "step": 173290 }, { "epoch": 1.92, "learning_rate": 1.8019200106298168e-05, "loss": 0.6675, "step": 173295 }, { "epoch": 1.92, "learning_rate": 1.8018277379159652e-05, "loss": 0.593, "step": 173300 }, { "epoch": 1.92, "learning_rate": 1.8017354652021144e-05, "loss": 0.6099, "step": 173305 }, { "epoch": 1.92, "learning_rate": 1.8016431924882628e-05, "loss": 0.676, "step": 173310 }, { "epoch": 1.92, "learning_rate": 1.801550919774412e-05, "loss": 0.6376, "step": 173315 }, { "epoch": 1.92, "learning_rate": 1.8014586470605604e-05, "loss": 0.5943, "step": 173320 }, { "epoch": 1.92, "learning_rate": 1.8013663743467095e-05, "loss": 0.5969, "step": 173325 }, { "epoch": 1.92, "learning_rate": 1.801274101632858e-05, "loss": 0.6071, "step": 173330 }, { "epoch": 1.92, "learning_rate": 1.8011818289190067e-05, "loss": 0.5517, "step": 173335 }, { "epoch": 1.92, "learning_rate": 1.8010895562051555e-05, "loss": 0.6677, "step": 173340 }, { "epoch": 1.92, "learning_rate": 1.8009972834913043e-05, "loss": 0.6051, "step": 173345 }, { "epoch": 1.92, "learning_rate": 1.800905010777453e-05, "loss": 0.6122, "step": 173350 }, { "epoch": 1.92, "learning_rate": 1.800812738063602e-05, "loss": 0.6347, "step": 173355 }, { "epoch": 1.92, "learning_rate": 1.8007204653497506e-05, "loss": 0.5632, "step": 173360 }, { "epoch": 1.92, "learning_rate": 1.800628192635899e-05, "loss": 0.601, "step": 173365 }, { "epoch": 1.92, "learning_rate": 1.8005359199220482e-05, "loss": 0.642, "step": 173370 }, { "epoch": 1.92, "learning_rate": 1.8004436472081967e-05, "loss": 0.5974, "step": 173375 }, { "epoch": 1.92, "learning_rate": 1.8003513744943458e-05, "loss": 0.5888, "step": 173380 }, { "epoch": 1.92, "learning_rate": 1.8002591017804942e-05, "loss": 0.614, "step": 173385 }, { "epoch": 1.92, "learning_rate": 1.8001668290666434e-05, "loss": 0.6195, "step": 173390 }, { "epoch": 1.92, "learning_rate": 1.8000745563527918e-05, "loss": 0.6207, "step": 173395 }, { "epoch": 1.92, "learning_rate": 1.7999822836389406e-05, "loss": 0.5605, "step": 173400 }, { "epoch": 1.92, "learning_rate": 1.7998900109250894e-05, "loss": 0.6207, "step": 173405 }, { "epoch": 1.92, "learning_rate": 1.799797738211238e-05, "loss": 0.5881, "step": 173410 }, { "epoch": 1.92, "learning_rate": 1.799705465497387e-05, "loss": 0.6416, "step": 173415 }, { "epoch": 1.92, "learning_rate": 1.7996131927835357e-05, "loss": 0.6303, "step": 173420 }, { "epoch": 1.92, "learning_rate": 1.7995209200696845e-05, "loss": 0.6068, "step": 173425 }, { "epoch": 1.92, "learning_rate": 1.7994286473558333e-05, "loss": 0.6588, "step": 173430 }, { "epoch": 1.92, "learning_rate": 1.799336374641982e-05, "loss": 0.6668, "step": 173435 }, { "epoch": 1.92, "learning_rate": 1.7992441019281305e-05, "loss": 0.6004, "step": 173440 }, { "epoch": 1.92, "learning_rate": 1.7991518292142797e-05, "loss": 0.6033, "step": 173445 }, { "epoch": 1.92, "learning_rate": 1.799059556500428e-05, "loss": 0.5872, "step": 173450 }, { "epoch": 1.92, "learning_rate": 1.798967283786577e-05, "loss": 0.649, "step": 173455 }, { "epoch": 1.92, "learning_rate": 1.7988750110727257e-05, "loss": 0.5979, "step": 173460 }, { "epoch": 1.92, "learning_rate": 1.7987827383588745e-05, "loss": 0.5553, "step": 173465 }, { "epoch": 1.92, "learning_rate": 1.7986904656450232e-05, "loss": 0.6348, "step": 173470 }, { "epoch": 1.92, "learning_rate": 1.798598192931172e-05, "loss": 0.6332, "step": 173475 }, { "epoch": 1.92, "learning_rate": 1.7985059202173208e-05, "loss": 0.6184, "step": 173480 }, { "epoch": 1.92, "learning_rate": 1.7984136475034696e-05, "loss": 0.6097, "step": 173485 }, { "epoch": 1.92, "learning_rate": 1.7983213747896184e-05, "loss": 0.5872, "step": 173490 }, { "epoch": 1.92, "learning_rate": 1.798229102075767e-05, "loss": 0.6415, "step": 173495 }, { "epoch": 1.92, "learning_rate": 1.798136829361916e-05, "loss": 0.6517, "step": 173500 }, { "epoch": 1.92, "learning_rate": 1.7980445566480647e-05, "loss": 0.6387, "step": 173505 }, { "epoch": 1.92, "learning_rate": 1.7979522839342132e-05, "loss": 0.5923, "step": 173510 }, { "epoch": 1.92, "learning_rate": 1.797860011220362e-05, "loss": 0.5808, "step": 173515 }, { "epoch": 1.92, "learning_rate": 1.7977677385065107e-05, "loss": 0.6316, "step": 173520 }, { "epoch": 1.92, "learning_rate": 1.7976754657926595e-05, "loss": 0.6057, "step": 173525 }, { "epoch": 1.92, "learning_rate": 1.7975831930788083e-05, "loss": 0.637, "step": 173530 }, { "epoch": 1.92, "learning_rate": 1.797490920364957e-05, "loss": 0.5599, "step": 173535 }, { "epoch": 1.92, "learning_rate": 1.797398647651106e-05, "loss": 0.5814, "step": 173540 }, { "epoch": 1.92, "learning_rate": 1.7973063749372547e-05, "loss": 0.673, "step": 173545 }, { "epoch": 1.92, "learning_rate": 1.7972141022234035e-05, "loss": 0.638, "step": 173550 }, { "epoch": 1.92, "learning_rate": 1.7971218295095522e-05, "loss": 0.6274, "step": 173555 }, { "epoch": 1.92, "learning_rate": 1.797029556795701e-05, "loss": 0.6262, "step": 173560 }, { "epoch": 1.92, "learning_rate": 1.7969372840818495e-05, "loss": 0.5629, "step": 173565 }, { "epoch": 1.92, "learning_rate": 1.7968450113679986e-05, "loss": 0.622, "step": 173570 }, { "epoch": 1.92, "learning_rate": 1.796752738654147e-05, "loss": 0.6161, "step": 173575 }, { "epoch": 1.92, "learning_rate": 1.796660465940296e-05, "loss": 0.6249, "step": 173580 }, { "epoch": 1.92, "learning_rate": 1.7965681932264446e-05, "loss": 0.5672, "step": 173585 }, { "epoch": 1.92, "learning_rate": 1.7964759205125934e-05, "loss": 0.63, "step": 173590 }, { "epoch": 1.92, "learning_rate": 1.7963836477987422e-05, "loss": 0.6074, "step": 173595 }, { "epoch": 1.92, "learning_rate": 1.796291375084891e-05, "loss": 0.5765, "step": 173600 }, { "epoch": 1.92, "learning_rate": 1.7961991023710398e-05, "loss": 0.6573, "step": 173605 }, { "epoch": 1.92, "learning_rate": 1.7961068296571885e-05, "loss": 0.6075, "step": 173610 }, { "epoch": 1.92, "learning_rate": 1.7960145569433373e-05, "loss": 0.6662, "step": 173615 }, { "epoch": 1.92, "learning_rate": 1.795922284229486e-05, "loss": 0.6185, "step": 173620 }, { "epoch": 1.92, "learning_rate": 1.795830011515635e-05, "loss": 0.6028, "step": 173625 }, { "epoch": 1.92, "learning_rate": 1.7957377388017833e-05, "loss": 0.6221, "step": 173630 }, { "epoch": 1.92, "learning_rate": 1.7956454660879325e-05, "loss": 0.675, "step": 173635 }, { "epoch": 1.92, "learning_rate": 1.795553193374081e-05, "loss": 0.587, "step": 173640 }, { "epoch": 1.92, "learning_rate": 1.79546092066023e-05, "loss": 0.6097, "step": 173645 }, { "epoch": 1.92, "learning_rate": 1.7953686479463785e-05, "loss": 0.5975, "step": 173650 }, { "epoch": 1.92, "learning_rate": 1.7952763752325276e-05, "loss": 0.6002, "step": 173655 }, { "epoch": 1.92, "learning_rate": 1.795184102518676e-05, "loss": 0.6269, "step": 173660 }, { "epoch": 1.92, "learning_rate": 1.795091829804825e-05, "loss": 0.6342, "step": 173665 }, { "epoch": 1.92, "learning_rate": 1.7949995570909736e-05, "loss": 0.598, "step": 173670 }, { "epoch": 1.92, "learning_rate": 1.7949072843771224e-05, "loss": 0.5682, "step": 173675 }, { "epoch": 1.92, "learning_rate": 1.7948150116632712e-05, "loss": 0.6112, "step": 173680 }, { "epoch": 1.92, "learning_rate": 1.7947227389494196e-05, "loss": 0.6204, "step": 173685 }, { "epoch": 1.92, "learning_rate": 1.7946304662355688e-05, "loss": 0.6087, "step": 173690 }, { "epoch": 1.92, "learning_rate": 1.7945381935217172e-05, "loss": 0.6218, "step": 173695 }, { "epoch": 1.92, "learning_rate": 1.7944459208078663e-05, "loss": 0.6218, "step": 173700 }, { "epoch": 1.92, "learning_rate": 1.7943536480940148e-05, "loss": 0.5974, "step": 173705 }, { "epoch": 1.92, "learning_rate": 1.794261375380164e-05, "loss": 0.6618, "step": 173710 }, { "epoch": 1.92, "learning_rate": 1.7941691026663123e-05, "loss": 0.6207, "step": 173715 }, { "epoch": 1.92, "learning_rate": 1.794076829952461e-05, "loss": 0.6399, "step": 173720 }, { "epoch": 1.92, "learning_rate": 1.79398455723861e-05, "loss": 0.625, "step": 173725 }, { "epoch": 1.92, "learning_rate": 1.7938922845247587e-05, "loss": 0.5905, "step": 173730 }, { "epoch": 1.92, "learning_rate": 1.7938000118109075e-05, "loss": 0.6138, "step": 173735 }, { "epoch": 1.92, "learning_rate": 1.793707739097056e-05, "loss": 0.6407, "step": 173740 }, { "epoch": 1.92, "learning_rate": 1.793615466383205e-05, "loss": 0.6097, "step": 173745 }, { "epoch": 1.92, "learning_rate": 1.7935231936693535e-05, "loss": 0.5765, "step": 173750 }, { "epoch": 1.92, "learning_rate": 1.7934309209555026e-05, "loss": 0.6322, "step": 173755 }, { "epoch": 1.92, "learning_rate": 1.793338648241651e-05, "loss": 0.5621, "step": 173760 }, { "epoch": 1.92, "learning_rate": 1.7932463755278002e-05, "loss": 0.6476, "step": 173765 }, { "epoch": 1.92, "learning_rate": 1.7931541028139486e-05, "loss": 0.6188, "step": 173770 }, { "epoch": 1.92, "learning_rate": 1.7930618301000978e-05, "loss": 0.6455, "step": 173775 }, { "epoch": 1.92, "learning_rate": 1.7929695573862462e-05, "loss": 0.6286, "step": 173780 }, { "epoch": 1.92, "learning_rate": 1.792877284672395e-05, "loss": 0.6157, "step": 173785 }, { "epoch": 1.92, "learning_rate": 1.7927850119585438e-05, "loss": 0.5681, "step": 173790 }, { "epoch": 1.92, "learning_rate": 1.7926927392446926e-05, "loss": 0.6061, "step": 173795 }, { "epoch": 1.92, "learning_rate": 1.7926004665308413e-05, "loss": 0.6164, "step": 173800 }, { "epoch": 1.92, "learning_rate": 1.79250819381699e-05, "loss": 0.607, "step": 173805 }, { "epoch": 1.92, "learning_rate": 1.792415921103139e-05, "loss": 0.5929, "step": 173810 }, { "epoch": 1.92, "learning_rate": 1.7923236483892874e-05, "loss": 0.6566, "step": 173815 }, { "epoch": 1.92, "learning_rate": 1.7922313756754365e-05, "loss": 0.643, "step": 173820 }, { "epoch": 1.92, "learning_rate": 1.792139102961585e-05, "loss": 0.5757, "step": 173825 }, { "epoch": 1.92, "learning_rate": 1.792046830247734e-05, "loss": 0.5642, "step": 173830 }, { "epoch": 1.92, "learning_rate": 1.7919545575338825e-05, "loss": 0.6684, "step": 173835 }, { "epoch": 1.92, "learning_rate": 1.7918622848200313e-05, "loss": 0.5999, "step": 173840 }, { "epoch": 1.92, "learning_rate": 1.79177001210618e-05, "loss": 0.6756, "step": 173845 }, { "epoch": 1.92, "learning_rate": 1.791677739392329e-05, "loss": 0.5726, "step": 173850 }, { "epoch": 1.93, "learning_rate": 1.7915854666784776e-05, "loss": 0.618, "step": 173855 }, { "epoch": 1.93, "learning_rate": 1.7914931939646264e-05, "loss": 0.6425, "step": 173860 }, { "epoch": 1.93, "learning_rate": 1.7914009212507752e-05, "loss": 0.6145, "step": 173865 }, { "epoch": 1.93, "learning_rate": 1.791308648536924e-05, "loss": 0.6217, "step": 173870 }, { "epoch": 1.93, "learning_rate": 1.7912163758230728e-05, "loss": 0.603, "step": 173875 }, { "epoch": 1.93, "learning_rate": 1.7911241031092216e-05, "loss": 0.6197, "step": 173880 }, { "epoch": 1.93, "learning_rate": 1.7910318303953703e-05, "loss": 0.5937, "step": 173885 }, { "epoch": 1.93, "learning_rate": 1.7909395576815188e-05, "loss": 0.617, "step": 173890 }, { "epoch": 1.93, "learning_rate": 1.7908472849676676e-05, "loss": 0.5504, "step": 173895 }, { "epoch": 1.93, "learning_rate": 1.7907550122538164e-05, "loss": 0.6141, "step": 173900 }, { "epoch": 1.93, "learning_rate": 1.790662739539965e-05, "loss": 0.5897, "step": 173905 }, { "epoch": 1.93, "learning_rate": 1.790570466826114e-05, "loss": 0.619, "step": 173910 }, { "epoch": 1.93, "learning_rate": 1.7904781941122627e-05, "loss": 0.6598, "step": 173915 }, { "epoch": 1.93, "learning_rate": 1.7903859213984115e-05, "loss": 0.6281, "step": 173920 }, { "epoch": 1.93, "learning_rate": 1.7902936486845603e-05, "loss": 0.6022, "step": 173925 }, { "epoch": 1.93, "learning_rate": 1.790201375970709e-05, "loss": 0.5506, "step": 173930 }, { "epoch": 1.93, "learning_rate": 1.790109103256858e-05, "loss": 0.6567, "step": 173935 }, { "epoch": 1.93, "learning_rate": 1.7900168305430066e-05, "loss": 0.6346, "step": 173940 }, { "epoch": 1.93, "learning_rate": 1.7899245578291554e-05, "loss": 0.5954, "step": 173945 }, { "epoch": 1.93, "learning_rate": 1.789832285115304e-05, "loss": 0.6681, "step": 173950 }, { "epoch": 1.93, "learning_rate": 1.789740012401453e-05, "loss": 0.5785, "step": 173955 }, { "epoch": 1.93, "learning_rate": 1.7896477396876014e-05, "loss": 0.6104, "step": 173960 }, { "epoch": 1.93, "learning_rate": 1.7895554669737502e-05, "loss": 0.5639, "step": 173965 }, { "epoch": 1.93, "learning_rate": 1.789463194259899e-05, "loss": 0.5936, "step": 173970 }, { "epoch": 1.93, "learning_rate": 1.7893709215460478e-05, "loss": 0.6455, "step": 173975 }, { "epoch": 1.93, "learning_rate": 1.7892786488321966e-05, "loss": 0.5583, "step": 173980 }, { "epoch": 1.93, "learning_rate": 1.7891863761183454e-05, "loss": 0.5705, "step": 173985 }, { "epoch": 1.93, "learning_rate": 1.789094103404494e-05, "loss": 0.6252, "step": 173990 }, { "epoch": 1.93, "learning_rate": 1.789001830690643e-05, "loss": 0.645, "step": 173995 }, { "epoch": 1.93, "learning_rate": 1.7889095579767917e-05, "loss": 0.5969, "step": 174000 }, { "epoch": 1.93, "eval_loss": 0.6083526611328125, "eval_runtime": 69.5613, "eval_samples_per_second": 28.752, "eval_steps_per_second": 14.376, "step": 174000 }, { "epoch": 1.93, "learning_rate": 1.7888172852629405e-05, "loss": 0.6433, "step": 174005 }, { "epoch": 1.93, "learning_rate": 1.7887250125490893e-05, "loss": 0.5714, "step": 174010 }, { "epoch": 1.93, "learning_rate": 1.7886327398352377e-05, "loss": 0.6747, "step": 174015 }, { "epoch": 1.93, "learning_rate": 1.788540467121387e-05, "loss": 0.6199, "step": 174020 }, { "epoch": 1.93, "learning_rate": 1.7884481944075353e-05, "loss": 0.6645, "step": 174025 }, { "epoch": 1.93, "learning_rate": 1.7883559216936844e-05, "loss": 0.6314, "step": 174030 }, { "epoch": 1.93, "learning_rate": 1.788263648979833e-05, "loss": 0.5804, "step": 174035 }, { "epoch": 1.93, "learning_rate": 1.788171376265982e-05, "loss": 0.6369, "step": 174040 }, { "epoch": 1.93, "learning_rate": 1.7880791035521304e-05, "loss": 0.6379, "step": 174045 }, { "epoch": 1.93, "learning_rate": 1.7879868308382792e-05, "loss": 0.6013, "step": 174050 }, { "epoch": 1.93, "learning_rate": 1.787894558124428e-05, "loss": 0.61, "step": 174055 }, { "epoch": 1.93, "learning_rate": 1.7878022854105768e-05, "loss": 0.5797, "step": 174060 }, { "epoch": 1.93, "learning_rate": 1.7877100126967256e-05, "loss": 0.6075, "step": 174065 }, { "epoch": 1.93, "learning_rate": 1.787617739982874e-05, "loss": 0.608, "step": 174070 }, { "epoch": 1.93, "learning_rate": 1.787525467269023e-05, "loss": 0.654, "step": 174075 }, { "epoch": 1.93, "learning_rate": 1.7874331945551716e-05, "loss": 0.6213, "step": 174080 }, { "epoch": 1.93, "learning_rate": 1.7873409218413207e-05, "loss": 0.6715, "step": 174085 }, { "epoch": 1.93, "learning_rate": 1.7872486491274692e-05, "loss": 0.6369, "step": 174090 }, { "epoch": 1.93, "learning_rate": 1.7871563764136183e-05, "loss": 0.6193, "step": 174095 }, { "epoch": 1.93, "learning_rate": 1.7870641036997667e-05, "loss": 0.6202, "step": 174100 }, { "epoch": 1.93, "learning_rate": 1.7869718309859155e-05, "loss": 0.6122, "step": 174105 }, { "epoch": 1.93, "learning_rate": 1.7868795582720643e-05, "loss": 0.5978, "step": 174110 }, { "epoch": 1.93, "learning_rate": 1.786787285558213e-05, "loss": 0.6202, "step": 174115 }, { "epoch": 1.93, "learning_rate": 1.786695012844362e-05, "loss": 0.6187, "step": 174120 }, { "epoch": 1.93, "learning_rate": 1.7866027401305103e-05, "loss": 0.6539, "step": 174125 }, { "epoch": 1.93, "learning_rate": 1.7865104674166595e-05, "loss": 0.5884, "step": 174130 }, { "epoch": 1.93, "learning_rate": 1.786418194702808e-05, "loss": 0.6165, "step": 174135 }, { "epoch": 1.93, "learning_rate": 1.786325921988957e-05, "loss": 0.5726, "step": 174140 }, { "epoch": 1.93, "learning_rate": 1.7862336492751055e-05, "loss": 0.588, "step": 174145 }, { "epoch": 1.93, "learning_rate": 1.7861413765612546e-05, "loss": 0.6378, "step": 174150 }, { "epoch": 1.93, "learning_rate": 1.786049103847403e-05, "loss": 0.662, "step": 174155 }, { "epoch": 1.93, "learning_rate": 1.785956831133552e-05, "loss": 0.6353, "step": 174160 }, { "epoch": 1.93, "learning_rate": 1.7858645584197006e-05, "loss": 0.6139, "step": 174165 }, { "epoch": 1.93, "learning_rate": 1.7857722857058494e-05, "loss": 0.6125, "step": 174170 }, { "epoch": 1.93, "learning_rate": 1.7856800129919982e-05, "loss": 0.6549, "step": 174175 }, { "epoch": 1.93, "learning_rate": 1.785587740278147e-05, "loss": 0.6169, "step": 174180 }, { "epoch": 1.93, "learning_rate": 1.7854954675642957e-05, "loss": 0.6092, "step": 174185 }, { "epoch": 1.93, "learning_rate": 1.7854031948504445e-05, "loss": 0.6, "step": 174190 }, { "epoch": 1.93, "learning_rate": 1.7853109221365933e-05, "loss": 0.5657, "step": 174195 }, { "epoch": 1.93, "learning_rate": 1.7852186494227418e-05, "loss": 0.6208, "step": 174200 }, { "epoch": 1.93, "learning_rate": 1.785126376708891e-05, "loss": 0.6128, "step": 174205 }, { "epoch": 1.93, "learning_rate": 1.7850341039950393e-05, "loss": 0.6214, "step": 174210 }, { "epoch": 1.93, "learning_rate": 1.7849418312811885e-05, "loss": 0.5957, "step": 174215 }, { "epoch": 1.93, "learning_rate": 1.784849558567337e-05, "loss": 0.6039, "step": 174220 }, { "epoch": 1.93, "learning_rate": 1.7847572858534857e-05, "loss": 0.6368, "step": 174225 }, { "epoch": 1.93, "learning_rate": 1.7846650131396345e-05, "loss": 0.6221, "step": 174230 }, { "epoch": 1.93, "learning_rate": 1.7845727404257833e-05, "loss": 0.5909, "step": 174235 }, { "epoch": 1.93, "learning_rate": 1.784480467711932e-05, "loss": 0.6104, "step": 174240 }, { "epoch": 1.93, "learning_rate": 1.7843881949980808e-05, "loss": 0.6665, "step": 174245 }, { "epoch": 1.93, "learning_rate": 1.7842959222842296e-05, "loss": 0.6015, "step": 174250 }, { "epoch": 1.93, "learning_rate": 1.7842036495703784e-05, "loss": 0.5819, "step": 174255 }, { "epoch": 1.93, "learning_rate": 1.7841113768565272e-05, "loss": 0.617, "step": 174260 }, { "epoch": 1.93, "learning_rate": 1.784019104142676e-05, "loss": 0.6474, "step": 174265 }, { "epoch": 1.93, "learning_rate": 1.7839268314288248e-05, "loss": 0.5963, "step": 174270 }, { "epoch": 1.93, "learning_rate": 1.7838345587149732e-05, "loss": 0.5986, "step": 174275 }, { "epoch": 1.93, "learning_rate": 1.783742286001122e-05, "loss": 0.645, "step": 174280 }, { "epoch": 1.93, "learning_rate": 1.7836500132872708e-05, "loss": 0.6353, "step": 174285 }, { "epoch": 1.93, "learning_rate": 1.7835577405734196e-05, "loss": 0.6039, "step": 174290 }, { "epoch": 1.93, "learning_rate": 1.7834654678595683e-05, "loss": 0.5989, "step": 174295 }, { "epoch": 1.93, "learning_rate": 1.783373195145717e-05, "loss": 0.6105, "step": 174300 }, { "epoch": 1.93, "learning_rate": 1.783280922431866e-05, "loss": 0.6136, "step": 174305 }, { "epoch": 1.93, "learning_rate": 1.7831886497180147e-05, "loss": 0.6137, "step": 174310 }, { "epoch": 1.93, "learning_rate": 1.7830963770041635e-05, "loss": 0.6211, "step": 174315 }, { "epoch": 1.93, "learning_rate": 1.7830041042903123e-05, "loss": 0.6282, "step": 174320 }, { "epoch": 1.93, "learning_rate": 1.782911831576461e-05, "loss": 0.6129, "step": 174325 }, { "epoch": 1.93, "learning_rate": 1.7828195588626098e-05, "loss": 0.605, "step": 174330 }, { "epoch": 1.93, "learning_rate": 1.7827272861487586e-05, "loss": 0.6307, "step": 174335 }, { "epoch": 1.93, "learning_rate": 1.7826350134349074e-05, "loss": 0.6311, "step": 174340 }, { "epoch": 1.93, "learning_rate": 1.782542740721056e-05, "loss": 0.6453, "step": 174345 }, { "epoch": 1.93, "learning_rate": 1.7824504680072046e-05, "loss": 0.6438, "step": 174350 }, { "epoch": 1.93, "learning_rate": 1.7823581952933534e-05, "loss": 0.6573, "step": 174355 }, { "epoch": 1.93, "learning_rate": 1.7822659225795022e-05, "loss": 0.6096, "step": 174360 }, { "epoch": 1.93, "learning_rate": 1.782173649865651e-05, "loss": 0.6672, "step": 174365 }, { "epoch": 1.93, "learning_rate": 1.7820813771517998e-05, "loss": 0.6213, "step": 174370 }, { "epoch": 1.93, "learning_rate": 1.7819891044379486e-05, "loss": 0.6146, "step": 174375 }, { "epoch": 1.93, "learning_rate": 1.7818968317240973e-05, "loss": 0.5976, "step": 174380 }, { "epoch": 1.93, "learning_rate": 1.781804559010246e-05, "loss": 0.6353, "step": 174385 }, { "epoch": 1.93, "learning_rate": 1.781712286296395e-05, "loss": 0.5919, "step": 174390 }, { "epoch": 1.93, "learning_rate": 1.7816200135825437e-05, "loss": 0.6259, "step": 174395 }, { "epoch": 1.93, "learning_rate": 1.781527740868692e-05, "loss": 0.5691, "step": 174400 }, { "epoch": 1.93, "learning_rate": 1.7814354681548413e-05, "loss": 0.6085, "step": 174405 }, { "epoch": 1.93, "learning_rate": 1.7813431954409897e-05, "loss": 0.6396, "step": 174410 }, { "epoch": 1.93, "learning_rate": 1.781250922727139e-05, "loss": 0.6094, "step": 174415 }, { "epoch": 1.93, "learning_rate": 1.7811586500132873e-05, "loss": 0.5988, "step": 174420 }, { "epoch": 1.93, "learning_rate": 1.781066377299436e-05, "loss": 0.6002, "step": 174425 }, { "epoch": 1.93, "learning_rate": 1.780974104585585e-05, "loss": 0.6122, "step": 174430 }, { "epoch": 1.93, "learning_rate": 1.7808818318717336e-05, "loss": 0.654, "step": 174435 }, { "epoch": 1.93, "learning_rate": 1.7807895591578824e-05, "loss": 0.5963, "step": 174440 }, { "epoch": 1.93, "learning_rate": 1.7806972864440312e-05, "loss": 0.6698, "step": 174445 }, { "epoch": 1.93, "learning_rate": 1.78060501373018e-05, "loss": 0.5636, "step": 174450 }, { "epoch": 1.93, "learning_rate": 1.7805127410163284e-05, "loss": 0.5678, "step": 174455 }, { "epoch": 1.93, "learning_rate": 1.7804204683024776e-05, "loss": 0.6211, "step": 174460 }, { "epoch": 1.93, "learning_rate": 1.780328195588626e-05, "loss": 0.6238, "step": 174465 }, { "epoch": 1.93, "learning_rate": 1.780235922874775e-05, "loss": 0.6327, "step": 174470 }, { "epoch": 1.93, "learning_rate": 1.7801436501609236e-05, "loss": 0.6476, "step": 174475 }, { "epoch": 1.93, "learning_rate": 1.7800513774470727e-05, "loss": 0.6053, "step": 174480 }, { "epoch": 1.93, "learning_rate": 1.779959104733221e-05, "loss": 0.6036, "step": 174485 }, { "epoch": 1.93, "learning_rate": 1.7798668320193703e-05, "loss": 0.6043, "step": 174490 }, { "epoch": 1.93, "learning_rate": 1.7797745593055187e-05, "loss": 0.6145, "step": 174495 }, { "epoch": 1.93, "learning_rate": 1.7796822865916675e-05, "loss": 0.5955, "step": 174500 }, { "epoch": 1.93, "learning_rate": 1.7795900138778163e-05, "loss": 0.6655, "step": 174505 }, { "epoch": 1.93, "learning_rate": 1.7794977411639647e-05, "loss": 0.588, "step": 174510 }, { "epoch": 1.93, "learning_rate": 1.779405468450114e-05, "loss": 0.5838, "step": 174515 }, { "epoch": 1.93, "learning_rate": 1.7793131957362623e-05, "loss": 0.578, "step": 174520 }, { "epoch": 1.93, "learning_rate": 1.7792209230224114e-05, "loss": 0.6149, "step": 174525 }, { "epoch": 1.93, "learning_rate": 1.77912865030856e-05, "loss": 0.6408, "step": 174530 }, { "epoch": 1.93, "learning_rate": 1.779036377594709e-05, "loss": 0.6611, "step": 174535 }, { "epoch": 1.93, "learning_rate": 1.7789441048808574e-05, "loss": 0.6971, "step": 174540 }, { "epoch": 1.93, "learning_rate": 1.7788518321670066e-05, "loss": 0.6151, "step": 174545 }, { "epoch": 1.93, "learning_rate": 1.778759559453155e-05, "loss": 0.6671, "step": 174550 }, { "epoch": 1.93, "learning_rate": 1.7786672867393038e-05, "loss": 0.6251, "step": 174555 }, { "epoch": 1.93, "learning_rate": 1.7785750140254526e-05, "loss": 0.6053, "step": 174560 }, { "epoch": 1.93, "learning_rate": 1.7784827413116014e-05, "loss": 0.5897, "step": 174565 }, { "epoch": 1.93, "learning_rate": 1.77839046859775e-05, "loss": 0.6052, "step": 174570 }, { "epoch": 1.93, "learning_rate": 1.7782981958838986e-05, "loss": 0.6477, "step": 174575 }, { "epoch": 1.93, "learning_rate": 1.7782059231700477e-05, "loss": 0.5678, "step": 174580 }, { "epoch": 1.93, "learning_rate": 1.778113650456196e-05, "loss": 0.6171, "step": 174585 }, { "epoch": 1.93, "learning_rate": 1.7780213777423453e-05, "loss": 0.5856, "step": 174590 }, { "epoch": 1.93, "learning_rate": 1.7779291050284937e-05, "loss": 0.602, "step": 174595 }, { "epoch": 1.93, "learning_rate": 1.777836832314643e-05, "loss": 0.6323, "step": 174600 }, { "epoch": 1.93, "learning_rate": 1.7777445596007913e-05, "loss": 0.6183, "step": 174605 }, { "epoch": 1.93, "learning_rate": 1.77765228688694e-05, "loss": 0.5736, "step": 174610 }, { "epoch": 1.93, "learning_rate": 1.777560014173089e-05, "loss": 0.6283, "step": 174615 }, { "epoch": 1.93, "learning_rate": 1.7774677414592377e-05, "loss": 0.6555, "step": 174620 }, { "epoch": 1.93, "learning_rate": 1.7773754687453864e-05, "loss": 0.6107, "step": 174625 }, { "epoch": 1.93, "learning_rate": 1.7772831960315352e-05, "loss": 0.564, "step": 174630 }, { "epoch": 1.93, "learning_rate": 1.777190923317684e-05, "loss": 0.6638, "step": 174635 }, { "epoch": 1.93, "learning_rate": 1.7770986506038328e-05, "loss": 0.6338, "step": 174640 }, { "epoch": 1.93, "learning_rate": 1.7770063778899816e-05, "loss": 0.644, "step": 174645 }, { "epoch": 1.93, "learning_rate": 1.77691410517613e-05, "loss": 0.6722, "step": 174650 }, { "epoch": 1.93, "learning_rate": 1.776821832462279e-05, "loss": 0.5908, "step": 174655 }, { "epoch": 1.93, "learning_rate": 1.7767295597484276e-05, "loss": 0.5439, "step": 174660 }, { "epoch": 1.93, "learning_rate": 1.7766372870345764e-05, "loss": 0.6483, "step": 174665 }, { "epoch": 1.93, "learning_rate": 1.776545014320725e-05, "loss": 0.6128, "step": 174670 }, { "epoch": 1.93, "learning_rate": 1.776452741606874e-05, "loss": 0.6489, "step": 174675 }, { "epoch": 1.93, "learning_rate": 1.7763604688930227e-05, "loss": 0.5766, "step": 174680 }, { "epoch": 1.93, "learning_rate": 1.7762681961791715e-05, "loss": 0.5528, "step": 174685 }, { "epoch": 1.93, "learning_rate": 1.7761759234653203e-05, "loss": 0.6303, "step": 174690 }, { "epoch": 1.93, "learning_rate": 1.776083650751469e-05, "loss": 0.5851, "step": 174695 }, { "epoch": 1.93, "learning_rate": 1.775991378037618e-05, "loss": 0.5641, "step": 174700 }, { "epoch": 1.93, "learning_rate": 1.7758991053237667e-05, "loss": 0.6379, "step": 174705 }, { "epoch": 1.93, "learning_rate": 1.7758068326099154e-05, "loss": 0.6528, "step": 174710 }, { "epoch": 1.93, "learning_rate": 1.7757145598960642e-05, "loss": 0.6377, "step": 174715 }, { "epoch": 1.93, "learning_rate": 1.775622287182213e-05, "loss": 0.6375, "step": 174720 }, { "epoch": 1.93, "learning_rate": 1.7755300144683615e-05, "loss": 0.6419, "step": 174725 }, { "epoch": 1.93, "learning_rate": 1.7754377417545102e-05, "loss": 0.5891, "step": 174730 }, { "epoch": 1.93, "learning_rate": 1.775345469040659e-05, "loss": 0.6786, "step": 174735 }, { "epoch": 1.93, "learning_rate": 1.7752531963268078e-05, "loss": 0.5977, "step": 174740 }, { "epoch": 1.93, "learning_rate": 1.7751609236129566e-05, "loss": 0.5868, "step": 174745 }, { "epoch": 1.93, "learning_rate": 1.7750686508991054e-05, "loss": 0.6056, "step": 174750 }, { "epoch": 1.94, "learning_rate": 1.7749763781852542e-05, "loss": 0.5991, "step": 174755 }, { "epoch": 1.94, "learning_rate": 1.774884105471403e-05, "loss": 0.6303, "step": 174760 }, { "epoch": 1.94, "learning_rate": 1.7747918327575517e-05, "loss": 0.6384, "step": 174765 }, { "epoch": 1.94, "learning_rate": 1.7746995600437005e-05, "loss": 0.5708, "step": 174770 }, { "epoch": 1.94, "learning_rate": 1.7746072873298493e-05, "loss": 0.6138, "step": 174775 }, { "epoch": 1.94, "learning_rate": 1.774515014615998e-05, "loss": 0.5778, "step": 174780 }, { "epoch": 1.94, "learning_rate": 1.7744227419021465e-05, "loss": 0.618, "step": 174785 }, { "epoch": 1.94, "learning_rate": 1.7743304691882957e-05, "loss": 0.605, "step": 174790 }, { "epoch": 1.94, "learning_rate": 1.774238196474444e-05, "loss": 0.6116, "step": 174795 }, { "epoch": 1.94, "learning_rate": 1.774145923760593e-05, "loss": 0.6933, "step": 174800 }, { "epoch": 1.94, "learning_rate": 1.7740536510467417e-05, "loss": 0.5952, "step": 174805 }, { "epoch": 1.94, "learning_rate": 1.7739613783328905e-05, "loss": 0.6392, "step": 174810 }, { "epoch": 1.94, "learning_rate": 1.7738691056190393e-05, "loss": 0.6232, "step": 174815 }, { "epoch": 1.94, "learning_rate": 1.773776832905188e-05, "loss": 0.5547, "step": 174820 }, { "epoch": 1.94, "learning_rate": 1.7736845601913368e-05, "loss": 0.5674, "step": 174825 }, { "epoch": 1.94, "learning_rate": 1.7735922874774856e-05, "loss": 0.601, "step": 174830 }, { "epoch": 1.94, "learning_rate": 1.7735000147636344e-05, "loss": 0.6136, "step": 174835 }, { "epoch": 1.94, "learning_rate": 1.773407742049783e-05, "loss": 0.6625, "step": 174840 }, { "epoch": 1.94, "learning_rate": 1.773315469335932e-05, "loss": 0.5995, "step": 174845 }, { "epoch": 1.94, "learning_rate": 1.7732231966220804e-05, "loss": 0.6094, "step": 174850 }, { "epoch": 1.94, "learning_rate": 1.7731309239082295e-05, "loss": 0.6065, "step": 174855 }, { "epoch": 1.94, "learning_rate": 1.773038651194378e-05, "loss": 0.6995, "step": 174860 }, { "epoch": 1.94, "learning_rate": 1.772946378480527e-05, "loss": 0.6349, "step": 174865 }, { "epoch": 1.94, "learning_rate": 1.7728541057666755e-05, "loss": 0.5933, "step": 174870 }, { "epoch": 1.94, "learning_rate": 1.7727618330528247e-05, "loss": 0.6, "step": 174875 }, { "epoch": 1.94, "learning_rate": 1.772669560338973e-05, "loss": 0.6267, "step": 174880 }, { "epoch": 1.94, "learning_rate": 1.772577287625122e-05, "loss": 0.6251, "step": 174885 }, { "epoch": 1.94, "learning_rate": 1.7724850149112707e-05, "loss": 0.5698, "step": 174890 }, { "epoch": 1.94, "learning_rate": 1.772392742197419e-05, "loss": 0.6132, "step": 174895 }, { "epoch": 1.94, "learning_rate": 1.7723004694835683e-05, "loss": 0.6087, "step": 174900 }, { "epoch": 1.94, "learning_rate": 1.7722081967697167e-05, "loss": 0.6307, "step": 174905 }, { "epoch": 1.94, "learning_rate": 1.7721159240558658e-05, "loss": 0.6493, "step": 174910 }, { "epoch": 1.94, "learning_rate": 1.7720236513420143e-05, "loss": 0.5659, "step": 174915 }, { "epoch": 1.94, "learning_rate": 1.7719313786281634e-05, "loss": 0.6218, "step": 174920 }, { "epoch": 1.94, "learning_rate": 1.771839105914312e-05, "loss": 0.575, "step": 174925 }, { "epoch": 1.94, "learning_rate": 1.771746833200461e-05, "loss": 0.5943, "step": 174930 }, { "epoch": 1.94, "learning_rate": 1.7716545604866094e-05, "loss": 0.6111, "step": 174935 }, { "epoch": 1.94, "learning_rate": 1.7715622877727582e-05, "loss": 0.6379, "step": 174940 }, { "epoch": 1.94, "learning_rate": 1.771470015058907e-05, "loss": 0.6154, "step": 174945 }, { "epoch": 1.94, "learning_rate": 1.7713777423450558e-05, "loss": 0.6357, "step": 174950 }, { "epoch": 1.94, "learning_rate": 1.7712854696312046e-05, "loss": 0.5862, "step": 174955 }, { "epoch": 1.94, "learning_rate": 1.771193196917353e-05, "loss": 0.5636, "step": 174960 }, { "epoch": 1.94, "learning_rate": 1.771100924203502e-05, "loss": 0.5917, "step": 174965 }, { "epoch": 1.94, "learning_rate": 1.7710086514896506e-05, "loss": 0.6027, "step": 174970 }, { "epoch": 1.94, "learning_rate": 1.7709163787757997e-05, "loss": 0.6106, "step": 174975 }, { "epoch": 1.94, "learning_rate": 1.770824106061948e-05, "loss": 0.5863, "step": 174980 }, { "epoch": 1.94, "learning_rate": 1.7707318333480973e-05, "loss": 0.5911, "step": 174985 }, { "epoch": 1.94, "learning_rate": 1.7706395606342457e-05, "loss": 0.5924, "step": 174990 }, { "epoch": 1.94, "learning_rate": 1.7705472879203945e-05, "loss": 0.5963, "step": 174995 }, { "epoch": 1.94, "learning_rate": 1.7704550152065433e-05, "loss": 0.6146, "step": 175000 }, { "epoch": 1.94, "eval_loss": 0.600255012512207, "eval_runtime": 69.3047, "eval_samples_per_second": 28.858, "eval_steps_per_second": 14.429, "step": 175000 }, { "epoch": 1.94, "learning_rate": 1.770362742492692e-05, "loss": 0.6156, "step": 175005 }, { "epoch": 1.94, "learning_rate": 1.770270469778841e-05, "loss": 0.6289, "step": 175010 }, { "epoch": 1.94, "learning_rate": 1.7701781970649896e-05, "loss": 0.6058, "step": 175015 }, { "epoch": 1.94, "learning_rate": 1.7700859243511384e-05, "loss": 0.5917, "step": 175020 }, { "epoch": 1.94, "learning_rate": 1.7699936516372872e-05, "loss": 0.6273, "step": 175025 }, { "epoch": 1.94, "learning_rate": 1.769901378923436e-05, "loss": 0.6467, "step": 175030 }, { "epoch": 1.94, "learning_rate": 1.7698091062095844e-05, "loss": 0.5869, "step": 175035 }, { "epoch": 1.94, "learning_rate": 1.7697168334957336e-05, "loss": 0.621, "step": 175040 }, { "epoch": 1.94, "learning_rate": 1.769624560781882e-05, "loss": 0.6499, "step": 175045 }, { "epoch": 1.94, "learning_rate": 1.7695322880680308e-05, "loss": 0.6204, "step": 175050 }, { "epoch": 1.94, "learning_rate": 1.7694400153541796e-05, "loss": 0.6179, "step": 175055 }, { "epoch": 1.94, "learning_rate": 1.7693477426403284e-05, "loss": 0.6471, "step": 175060 }, { "epoch": 1.94, "learning_rate": 1.769255469926477e-05, "loss": 0.6113, "step": 175065 }, { "epoch": 1.94, "learning_rate": 1.769163197212626e-05, "loss": 0.5948, "step": 175070 }, { "epoch": 1.94, "learning_rate": 1.7690709244987747e-05, "loss": 0.5715, "step": 175075 }, { "epoch": 1.94, "learning_rate": 1.7689786517849235e-05, "loss": 0.5875, "step": 175080 }, { "epoch": 1.94, "learning_rate": 1.7688863790710723e-05, "loss": 0.6483, "step": 175085 }, { "epoch": 1.94, "learning_rate": 1.768794106357221e-05, "loss": 0.5967, "step": 175090 }, { "epoch": 1.94, "learning_rate": 1.76870183364337e-05, "loss": 0.6, "step": 175095 }, { "epoch": 1.94, "learning_rate": 1.7686095609295186e-05, "loss": 0.6434, "step": 175100 }, { "epoch": 1.94, "learning_rate": 1.7685172882156674e-05, "loss": 0.6246, "step": 175105 }, { "epoch": 1.94, "learning_rate": 1.768425015501816e-05, "loss": 0.5721, "step": 175110 }, { "epoch": 1.94, "learning_rate": 1.7683327427879646e-05, "loss": 0.6439, "step": 175115 }, { "epoch": 1.94, "learning_rate": 1.7682404700741134e-05, "loss": 0.6222, "step": 175120 }, { "epoch": 1.94, "learning_rate": 1.7681481973602622e-05, "loss": 0.6117, "step": 175125 }, { "epoch": 1.94, "learning_rate": 1.768055924646411e-05, "loss": 0.6129, "step": 175130 }, { "epoch": 1.94, "learning_rate": 1.7679636519325598e-05, "loss": 0.6122, "step": 175135 }, { "epoch": 1.94, "learning_rate": 1.7678713792187086e-05, "loss": 0.5811, "step": 175140 }, { "epoch": 1.94, "learning_rate": 1.7677791065048574e-05, "loss": 0.6129, "step": 175145 }, { "epoch": 1.94, "learning_rate": 1.767686833791006e-05, "loss": 0.6696, "step": 175150 }, { "epoch": 1.94, "learning_rate": 1.767594561077155e-05, "loss": 0.6144, "step": 175155 }, { "epoch": 1.94, "learning_rate": 1.7675022883633037e-05, "loss": 0.6645, "step": 175160 }, { "epoch": 1.94, "learning_rate": 1.7674100156494525e-05, "loss": 0.6353, "step": 175165 }, { "epoch": 1.94, "learning_rate": 1.767317742935601e-05, "loss": 0.601, "step": 175170 }, { "epoch": 1.94, "learning_rate": 1.76722547022175e-05, "loss": 0.7117, "step": 175175 }, { "epoch": 1.94, "learning_rate": 1.7671331975078985e-05, "loss": 0.5748, "step": 175180 }, { "epoch": 1.94, "learning_rate": 1.7670409247940473e-05, "loss": 0.5962, "step": 175185 }, { "epoch": 1.94, "learning_rate": 1.766948652080196e-05, "loss": 0.5787, "step": 175190 }, { "epoch": 1.94, "learning_rate": 1.766856379366345e-05, "loss": 0.5837, "step": 175195 }, { "epoch": 1.94, "learning_rate": 1.7667641066524937e-05, "loss": 0.6117, "step": 175200 }, { "epoch": 1.94, "learning_rate": 1.7666718339386424e-05, "loss": 0.5807, "step": 175205 }, { "epoch": 1.94, "learning_rate": 1.7665795612247912e-05, "loss": 0.6059, "step": 175210 }, { "epoch": 1.94, "learning_rate": 1.76648728851094e-05, "loss": 0.5982, "step": 175215 }, { "epoch": 1.94, "learning_rate": 1.7663950157970888e-05, "loss": 0.5721, "step": 175220 }, { "epoch": 1.94, "learning_rate": 1.7663027430832372e-05, "loss": 0.6241, "step": 175225 }, { "epoch": 1.94, "learning_rate": 1.7662104703693864e-05, "loss": 0.6189, "step": 175230 }, { "epoch": 1.94, "learning_rate": 1.7661181976555348e-05, "loss": 0.5937, "step": 175235 }, { "epoch": 1.94, "learning_rate": 1.766025924941684e-05, "loss": 0.5774, "step": 175240 }, { "epoch": 1.94, "learning_rate": 1.7659336522278324e-05, "loss": 0.6162, "step": 175245 }, { "epoch": 1.94, "learning_rate": 1.7658413795139815e-05, "loss": 0.6756, "step": 175250 }, { "epoch": 1.94, "learning_rate": 1.76574910680013e-05, "loss": 0.6172, "step": 175255 }, { "epoch": 1.94, "learning_rate": 1.7656568340862787e-05, "loss": 0.5341, "step": 175260 }, { "epoch": 1.94, "learning_rate": 1.7655645613724275e-05, "loss": 0.6053, "step": 175265 }, { "epoch": 1.94, "learning_rate": 1.7654722886585763e-05, "loss": 0.6525, "step": 175270 }, { "epoch": 1.94, "learning_rate": 1.765380015944725e-05, "loss": 0.623, "step": 175275 }, { "epoch": 1.94, "learning_rate": 1.7652877432308735e-05, "loss": 0.6121, "step": 175280 }, { "epoch": 1.94, "learning_rate": 1.7651954705170227e-05, "loss": 0.5929, "step": 175285 }, { "epoch": 1.94, "learning_rate": 1.765103197803171e-05, "loss": 0.6296, "step": 175290 }, { "epoch": 1.94, "learning_rate": 1.7650109250893202e-05, "loss": 0.6546, "step": 175295 }, { "epoch": 1.94, "learning_rate": 1.7649186523754687e-05, "loss": 0.5937, "step": 175300 }, { "epoch": 1.94, "learning_rate": 1.7648263796616178e-05, "loss": 0.6184, "step": 175305 }, { "epoch": 1.94, "learning_rate": 1.7647341069477662e-05, "loss": 0.6239, "step": 175310 }, { "epoch": 1.94, "learning_rate": 1.7646418342339154e-05, "loss": 0.6336, "step": 175315 }, { "epoch": 1.94, "learning_rate": 1.7645495615200638e-05, "loss": 0.5811, "step": 175320 }, { "epoch": 1.94, "learning_rate": 1.7644572888062126e-05, "loss": 0.6158, "step": 175325 }, { "epoch": 1.94, "learning_rate": 1.7643650160923614e-05, "loss": 0.6174, "step": 175330 }, { "epoch": 1.94, "learning_rate": 1.76427274337851e-05, "loss": 0.5673, "step": 175335 }, { "epoch": 1.94, "learning_rate": 1.764180470664659e-05, "loss": 0.6746, "step": 175340 }, { "epoch": 1.94, "learning_rate": 1.7640881979508074e-05, "loss": 0.6586, "step": 175345 }, { "epoch": 1.94, "learning_rate": 1.7639959252369565e-05, "loss": 0.5832, "step": 175350 }, { "epoch": 1.94, "learning_rate": 1.763903652523105e-05, "loss": 0.6268, "step": 175355 }, { "epoch": 1.94, "learning_rate": 1.763811379809254e-05, "loss": 0.6019, "step": 175360 }, { "epoch": 1.94, "learning_rate": 1.7637191070954025e-05, "loss": 0.6931, "step": 175365 }, { "epoch": 1.94, "learning_rate": 1.7636268343815517e-05, "loss": 0.6647, "step": 175370 }, { "epoch": 1.94, "learning_rate": 1.7635345616677e-05, "loss": 0.6102, "step": 175375 }, { "epoch": 1.94, "learning_rate": 1.763442288953849e-05, "loss": 0.5758, "step": 175380 }, { "epoch": 1.94, "learning_rate": 1.7633500162399977e-05, "loss": 0.664, "step": 175385 }, { "epoch": 1.94, "learning_rate": 1.7632577435261465e-05, "loss": 0.5768, "step": 175390 }, { "epoch": 1.94, "learning_rate": 1.7631654708122952e-05, "loss": 0.6477, "step": 175395 }, { "epoch": 1.94, "learning_rate": 1.763073198098444e-05, "loss": 0.6138, "step": 175400 }, { "epoch": 1.94, "learning_rate": 1.7629809253845928e-05, "loss": 0.5831, "step": 175405 }, { "epoch": 1.94, "learning_rate": 1.7628886526707413e-05, "loss": 0.6093, "step": 175410 }, { "epoch": 1.94, "learning_rate": 1.7627963799568904e-05, "loss": 0.632, "step": 175415 }, { "epoch": 1.94, "learning_rate": 1.762704107243039e-05, "loss": 0.5552, "step": 175420 }, { "epoch": 1.94, "learning_rate": 1.762611834529188e-05, "loss": 0.6508, "step": 175425 }, { "epoch": 1.94, "learning_rate": 1.7625195618153364e-05, "loss": 0.5908, "step": 175430 }, { "epoch": 1.94, "learning_rate": 1.7624272891014855e-05, "loss": 0.5777, "step": 175435 }, { "epoch": 1.94, "learning_rate": 1.762335016387634e-05, "loss": 0.6176, "step": 175440 }, { "epoch": 1.94, "learning_rate": 1.7622427436737828e-05, "loss": 0.5959, "step": 175445 }, { "epoch": 1.94, "learning_rate": 1.7621504709599315e-05, "loss": 0.6415, "step": 175450 }, { "epoch": 1.94, "learning_rate": 1.7620581982460803e-05, "loss": 0.6233, "step": 175455 }, { "epoch": 1.94, "learning_rate": 1.761965925532229e-05, "loss": 0.5956, "step": 175460 }, { "epoch": 1.94, "learning_rate": 1.761873652818378e-05, "loss": 0.6096, "step": 175465 }, { "epoch": 1.94, "learning_rate": 1.7617813801045267e-05, "loss": 0.5928, "step": 175470 }, { "epoch": 1.94, "learning_rate": 1.7616891073906755e-05, "loss": 0.6496, "step": 175475 }, { "epoch": 1.94, "learning_rate": 1.7615968346768243e-05, "loss": 0.6428, "step": 175480 }, { "epoch": 1.94, "learning_rate": 1.7615045619629727e-05, "loss": 0.591, "step": 175485 }, { "epoch": 1.94, "learning_rate": 1.7614122892491218e-05, "loss": 0.6323, "step": 175490 }, { "epoch": 1.94, "learning_rate": 1.7613200165352703e-05, "loss": 0.5863, "step": 175495 }, { "epoch": 1.94, "learning_rate": 1.761227743821419e-05, "loss": 0.5881, "step": 175500 }, { "epoch": 1.94, "learning_rate": 1.761135471107568e-05, "loss": 0.5837, "step": 175505 }, { "epoch": 1.94, "learning_rate": 1.7610431983937166e-05, "loss": 0.5649, "step": 175510 }, { "epoch": 1.94, "learning_rate": 1.7609509256798654e-05, "loss": 0.6263, "step": 175515 }, { "epoch": 1.94, "learning_rate": 1.7608586529660142e-05, "loss": 0.5844, "step": 175520 }, { "epoch": 1.94, "learning_rate": 1.760766380252163e-05, "loss": 0.5836, "step": 175525 }, { "epoch": 1.94, "learning_rate": 1.7606741075383118e-05, "loss": 0.6127, "step": 175530 }, { "epoch": 1.94, "learning_rate": 1.7605818348244605e-05, "loss": 0.6514, "step": 175535 }, { "epoch": 1.94, "learning_rate": 1.7604895621106093e-05, "loss": 0.5768, "step": 175540 }, { "epoch": 1.94, "learning_rate": 1.760397289396758e-05, "loss": 0.6099, "step": 175545 }, { "epoch": 1.94, "learning_rate": 1.760305016682907e-05, "loss": 0.6198, "step": 175550 }, { "epoch": 1.94, "learning_rate": 1.7602127439690553e-05, "loss": 0.6297, "step": 175555 }, { "epoch": 1.94, "learning_rate": 1.760120471255204e-05, "loss": 0.5815, "step": 175560 }, { "epoch": 1.94, "learning_rate": 1.760028198541353e-05, "loss": 0.5727, "step": 175565 }, { "epoch": 1.94, "learning_rate": 1.7599359258275017e-05, "loss": 0.5771, "step": 175570 }, { "epoch": 1.94, "learning_rate": 1.7598436531136505e-05, "loss": 0.65, "step": 175575 }, { "epoch": 1.94, "learning_rate": 1.7597513803997993e-05, "loss": 0.5784, "step": 175580 }, { "epoch": 1.94, "learning_rate": 1.759659107685948e-05, "loss": 0.6023, "step": 175585 }, { "epoch": 1.94, "learning_rate": 1.759566834972097e-05, "loss": 0.6441, "step": 175590 }, { "epoch": 1.94, "learning_rate": 1.7594745622582456e-05, "loss": 0.6357, "step": 175595 }, { "epoch": 1.94, "learning_rate": 1.7593822895443944e-05, "loss": 0.6163, "step": 175600 }, { "epoch": 1.94, "learning_rate": 1.7592900168305432e-05, "loss": 0.5569, "step": 175605 }, { "epoch": 1.94, "learning_rate": 1.7591977441166916e-05, "loss": 0.5807, "step": 175610 }, { "epoch": 1.94, "learning_rate": 1.7591054714028408e-05, "loss": 0.6582, "step": 175615 }, { "epoch": 1.94, "learning_rate": 1.7590131986889892e-05, "loss": 0.5701, "step": 175620 }, { "epoch": 1.94, "learning_rate": 1.7589209259751383e-05, "loss": 0.6224, "step": 175625 }, { "epoch": 1.94, "learning_rate": 1.7588286532612868e-05, "loss": 0.6919, "step": 175630 }, { "epoch": 1.94, "learning_rate": 1.7587363805474356e-05, "loss": 0.6177, "step": 175635 }, { "epoch": 1.94, "learning_rate": 1.7586441078335844e-05, "loss": 0.587, "step": 175640 }, { "epoch": 1.94, "learning_rate": 1.758551835119733e-05, "loss": 0.6299, "step": 175645 }, { "epoch": 1.94, "learning_rate": 1.758459562405882e-05, "loss": 0.6426, "step": 175650 }, { "epoch": 1.94, "learning_rate": 1.7583672896920307e-05, "loss": 0.6389, "step": 175655 }, { "epoch": 1.95, "learning_rate": 1.7582750169781795e-05, "loss": 0.6055, "step": 175660 }, { "epoch": 1.95, "learning_rate": 1.7581827442643283e-05, "loss": 0.6264, "step": 175665 }, { "epoch": 1.95, "learning_rate": 1.758090471550477e-05, "loss": 0.6435, "step": 175670 }, { "epoch": 1.95, "learning_rate": 1.7579981988366255e-05, "loss": 0.6083, "step": 175675 }, { "epoch": 1.95, "learning_rate": 1.7579059261227746e-05, "loss": 0.6282, "step": 175680 }, { "epoch": 1.95, "learning_rate": 1.757813653408923e-05, "loss": 0.6019, "step": 175685 }, { "epoch": 1.95, "learning_rate": 1.7577213806950722e-05, "loss": 0.656, "step": 175690 }, { "epoch": 1.95, "learning_rate": 1.7576291079812206e-05, "loss": 0.6594, "step": 175695 }, { "epoch": 1.95, "learning_rate": 1.7575368352673698e-05, "loss": 0.5796, "step": 175700 }, { "epoch": 1.95, "learning_rate": 1.7574445625535182e-05, "loss": 0.5909, "step": 175705 }, { "epoch": 1.95, "learning_rate": 1.757352289839667e-05, "loss": 0.5997, "step": 175710 }, { "epoch": 1.95, "learning_rate": 1.7572600171258158e-05, "loss": 0.628, "step": 175715 }, { "epoch": 1.95, "learning_rate": 1.7571677444119646e-05, "loss": 0.6521, "step": 175720 }, { "epoch": 1.95, "learning_rate": 1.7570754716981134e-05, "loss": 0.6831, "step": 175725 }, { "epoch": 1.95, "learning_rate": 1.7569831989842618e-05, "loss": 0.6659, "step": 175730 }, { "epoch": 1.95, "learning_rate": 1.756890926270411e-05, "loss": 0.6758, "step": 175735 }, { "epoch": 1.95, "learning_rate": 1.7567986535565594e-05, "loss": 0.6546, "step": 175740 }, { "epoch": 1.95, "learning_rate": 1.7567063808427085e-05, "loss": 0.5528, "step": 175745 }, { "epoch": 1.95, "learning_rate": 1.756614108128857e-05, "loss": 0.5961, "step": 175750 }, { "epoch": 1.95, "learning_rate": 1.756521835415006e-05, "loss": 0.5657, "step": 175755 }, { "epoch": 1.95, "learning_rate": 1.7564295627011545e-05, "loss": 0.5557, "step": 175760 }, { "epoch": 1.95, "learning_rate": 1.7563372899873033e-05, "loss": 0.5825, "step": 175765 }, { "epoch": 1.95, "learning_rate": 1.756245017273452e-05, "loss": 0.6888, "step": 175770 }, { "epoch": 1.95, "learning_rate": 1.756152744559601e-05, "loss": 0.6415, "step": 175775 }, { "epoch": 1.95, "learning_rate": 1.7560604718457496e-05, "loss": 0.6423, "step": 175780 }, { "epoch": 1.95, "learning_rate": 1.7559681991318984e-05, "loss": 0.5823, "step": 175785 }, { "epoch": 1.95, "learning_rate": 1.7558759264180472e-05, "loss": 0.6692, "step": 175790 }, { "epoch": 1.95, "learning_rate": 1.7557836537041957e-05, "loss": 0.6502, "step": 175795 }, { "epoch": 1.95, "learning_rate": 1.7556913809903448e-05, "loss": 0.5465, "step": 175800 }, { "epoch": 1.95, "learning_rate": 1.7555991082764932e-05, "loss": 0.598, "step": 175805 }, { "epoch": 1.95, "learning_rate": 1.7555068355626424e-05, "loss": 0.6217, "step": 175810 }, { "epoch": 1.95, "learning_rate": 1.7554145628487908e-05, "loss": 0.571, "step": 175815 }, { "epoch": 1.95, "learning_rate": 1.75532229013494e-05, "loss": 0.6082, "step": 175820 }, { "epoch": 1.95, "learning_rate": 1.7552300174210884e-05, "loss": 0.6073, "step": 175825 }, { "epoch": 1.95, "learning_rate": 1.755137744707237e-05, "loss": 0.6337, "step": 175830 }, { "epoch": 1.95, "learning_rate": 1.755045471993386e-05, "loss": 0.578, "step": 175835 }, { "epoch": 1.95, "learning_rate": 1.7549531992795347e-05, "loss": 0.5984, "step": 175840 }, { "epoch": 1.95, "learning_rate": 1.7548609265656835e-05, "loss": 0.6188, "step": 175845 }, { "epoch": 1.95, "learning_rate": 1.7547686538518323e-05, "loss": 0.6151, "step": 175850 }, { "epoch": 1.95, "learning_rate": 1.754676381137981e-05, "loss": 0.5769, "step": 175855 }, { "epoch": 1.95, "learning_rate": 1.75458410842413e-05, "loss": 0.5725, "step": 175860 }, { "epoch": 1.95, "learning_rate": 1.7544918357102787e-05, "loss": 0.6257, "step": 175865 }, { "epoch": 1.95, "learning_rate": 1.754399562996427e-05, "loss": 0.6105, "step": 175870 }, { "epoch": 1.95, "learning_rate": 1.7543072902825762e-05, "loss": 0.5844, "step": 175875 }, { "epoch": 1.95, "learning_rate": 1.7542150175687247e-05, "loss": 0.6348, "step": 175880 }, { "epoch": 1.95, "learning_rate": 1.7541227448548735e-05, "loss": 0.625, "step": 175885 }, { "epoch": 1.95, "learning_rate": 1.7540304721410222e-05, "loss": 0.6657, "step": 175890 }, { "epoch": 1.95, "learning_rate": 1.753938199427171e-05, "loss": 0.6515, "step": 175895 }, { "epoch": 1.95, "learning_rate": 1.7538459267133198e-05, "loss": 0.613, "step": 175900 }, { "epoch": 1.95, "learning_rate": 1.7537536539994686e-05, "loss": 0.6413, "step": 175905 }, { "epoch": 1.95, "learning_rate": 1.7536613812856174e-05, "loss": 0.6685, "step": 175910 }, { "epoch": 1.95, "learning_rate": 1.753569108571766e-05, "loss": 0.6932, "step": 175915 }, { "epoch": 1.95, "learning_rate": 1.753476835857915e-05, "loss": 0.6197, "step": 175920 }, { "epoch": 1.95, "learning_rate": 1.7533845631440637e-05, "loss": 0.6142, "step": 175925 }, { "epoch": 1.95, "learning_rate": 1.7532922904302125e-05, "loss": 0.5702, "step": 175930 }, { "epoch": 1.95, "learning_rate": 1.7532000177163613e-05, "loss": 0.583, "step": 175935 }, { "epoch": 1.95, "learning_rate": 1.7531077450025097e-05, "loss": 0.6079, "step": 175940 }, { "epoch": 1.95, "learning_rate": 1.7530154722886585e-05, "loss": 0.5713, "step": 175945 }, { "epoch": 1.95, "learning_rate": 1.7529231995748073e-05, "loss": 0.6197, "step": 175950 }, { "epoch": 1.95, "learning_rate": 1.752830926860956e-05, "loss": 0.6077, "step": 175955 }, { "epoch": 1.95, "learning_rate": 1.752738654147105e-05, "loss": 0.6072, "step": 175960 }, { "epoch": 1.95, "learning_rate": 1.7526463814332537e-05, "loss": 0.5949, "step": 175965 }, { "epoch": 1.95, "learning_rate": 1.7525541087194025e-05, "loss": 0.641, "step": 175970 }, { "epoch": 1.95, "learning_rate": 1.7524618360055512e-05, "loss": 0.6515, "step": 175975 }, { "epoch": 1.95, "learning_rate": 1.7523695632917e-05, "loss": 0.5889, "step": 175980 }, { "epoch": 1.95, "learning_rate": 1.7522772905778488e-05, "loss": 0.6004, "step": 175985 }, { "epoch": 1.95, "learning_rate": 1.7521850178639976e-05, "loss": 0.6269, "step": 175990 }, { "epoch": 1.95, "learning_rate": 1.752092745150146e-05, "loss": 0.6198, "step": 175995 }, { "epoch": 1.95, "learning_rate": 1.752000472436295e-05, "loss": 0.6051, "step": 176000 }, { "epoch": 1.95, "eval_loss": 0.5834904909133911, "eval_runtime": 69.2146, "eval_samples_per_second": 28.896, "eval_steps_per_second": 14.448, "step": 176000 }, { "epoch": 1.95, "learning_rate": 1.7519081997224436e-05, "loss": 0.6151, "step": 176005 }, { "epoch": 1.95, "learning_rate": 1.7518159270085927e-05, "loss": 0.6008, "step": 176010 }, { "epoch": 1.95, "learning_rate": 1.7517236542947412e-05, "loss": 0.6489, "step": 176015 }, { "epoch": 1.95, "learning_rate": 1.75163138158089e-05, "loss": 0.5967, "step": 176020 }, { "epoch": 1.95, "learning_rate": 1.7515391088670388e-05, "loss": 0.6147, "step": 176025 }, { "epoch": 1.95, "learning_rate": 1.7514468361531875e-05, "loss": 0.6248, "step": 176030 }, { "epoch": 1.95, "learning_rate": 1.7513545634393363e-05, "loss": 0.6004, "step": 176035 }, { "epoch": 1.95, "learning_rate": 1.751262290725485e-05, "loss": 0.5736, "step": 176040 }, { "epoch": 1.95, "learning_rate": 1.751170018011634e-05, "loss": 0.5899, "step": 176045 }, { "epoch": 1.95, "learning_rate": 1.7510777452977827e-05, "loss": 0.6228, "step": 176050 }, { "epoch": 1.95, "learning_rate": 1.7509854725839315e-05, "loss": 0.6464, "step": 176055 }, { "epoch": 1.95, "learning_rate": 1.75089319987008e-05, "loss": 0.6574, "step": 176060 }, { "epoch": 1.95, "learning_rate": 1.750800927156229e-05, "loss": 0.6073, "step": 176065 }, { "epoch": 1.95, "learning_rate": 1.7507086544423775e-05, "loss": 0.5571, "step": 176070 }, { "epoch": 1.95, "learning_rate": 1.7506163817285266e-05, "loss": 0.6433, "step": 176075 }, { "epoch": 1.95, "learning_rate": 1.750524109014675e-05, "loss": 0.6032, "step": 176080 }, { "epoch": 1.95, "learning_rate": 1.7504318363008242e-05, "loss": 0.6387, "step": 176085 }, { "epoch": 1.95, "learning_rate": 1.7503395635869726e-05, "loss": 0.602, "step": 176090 }, { "epoch": 1.95, "learning_rate": 1.7502472908731214e-05, "loss": 0.5732, "step": 176095 }, { "epoch": 1.95, "learning_rate": 1.7501550181592702e-05, "loss": 0.6308, "step": 176100 }, { "epoch": 1.95, "learning_rate": 1.750062745445419e-05, "loss": 0.5513, "step": 176105 }, { "epoch": 1.95, "learning_rate": 1.7499704727315678e-05, "loss": 0.5985, "step": 176110 }, { "epoch": 1.95, "learning_rate": 1.7498782000177162e-05, "loss": 0.6403, "step": 176115 }, { "epoch": 1.95, "learning_rate": 1.7497859273038653e-05, "loss": 0.6515, "step": 176120 }, { "epoch": 1.95, "learning_rate": 1.7496936545900138e-05, "loss": 0.6115, "step": 176125 }, { "epoch": 1.95, "learning_rate": 1.749601381876163e-05, "loss": 0.6734, "step": 176130 }, { "epoch": 1.95, "learning_rate": 1.7495091091623113e-05, "loss": 0.5691, "step": 176135 }, { "epoch": 1.95, "learning_rate": 1.7494168364484605e-05, "loss": 0.5991, "step": 176140 }, { "epoch": 1.95, "learning_rate": 1.749324563734609e-05, "loss": 0.6023, "step": 176145 }, { "epoch": 1.95, "learning_rate": 1.7492322910207577e-05, "loss": 0.6084, "step": 176150 }, { "epoch": 1.95, "learning_rate": 1.7491400183069065e-05, "loss": 0.5729, "step": 176155 }, { "epoch": 1.95, "learning_rate": 1.7490477455930553e-05, "loss": 0.647, "step": 176160 }, { "epoch": 1.95, "learning_rate": 1.748955472879204e-05, "loss": 0.5899, "step": 176165 }, { "epoch": 1.95, "learning_rate": 1.7488632001653525e-05, "loss": 0.6027, "step": 176170 }, { "epoch": 1.95, "learning_rate": 1.7487709274515016e-05, "loss": 0.6103, "step": 176175 }, { "epoch": 1.95, "learning_rate": 1.74867865473765e-05, "loss": 0.6078, "step": 176180 }, { "epoch": 1.95, "learning_rate": 1.7485863820237992e-05, "loss": 0.6166, "step": 176185 }, { "epoch": 1.95, "learning_rate": 1.7484941093099476e-05, "loss": 0.6323, "step": 176190 }, { "epoch": 1.95, "learning_rate": 1.7484018365960968e-05, "loss": 0.6488, "step": 176195 }, { "epoch": 1.95, "learning_rate": 1.7483095638822452e-05, "loss": 0.5595, "step": 176200 }, { "epoch": 1.95, "learning_rate": 1.7482172911683943e-05, "loss": 0.5973, "step": 176205 }, { "epoch": 1.95, "learning_rate": 1.7481250184545428e-05, "loss": 0.6377, "step": 176210 }, { "epoch": 1.95, "learning_rate": 1.7480327457406916e-05, "loss": 0.58, "step": 176215 }, { "epoch": 1.95, "learning_rate": 1.7479404730268403e-05, "loss": 0.6069, "step": 176220 }, { "epoch": 1.95, "learning_rate": 1.747848200312989e-05, "loss": 0.5985, "step": 176225 }, { "epoch": 1.95, "learning_rate": 1.747755927599138e-05, "loss": 0.6467, "step": 176230 }, { "epoch": 1.95, "learning_rate": 1.7476636548852867e-05, "loss": 0.5826, "step": 176235 }, { "epoch": 1.95, "learning_rate": 1.7475713821714355e-05, "loss": 0.6426, "step": 176240 }, { "epoch": 1.95, "learning_rate": 1.747479109457584e-05, "loss": 0.581, "step": 176245 }, { "epoch": 1.95, "learning_rate": 1.747386836743733e-05, "loss": 0.6257, "step": 176250 }, { "epoch": 1.95, "learning_rate": 1.7472945640298815e-05, "loss": 0.6048, "step": 176255 }, { "epoch": 1.95, "learning_rate": 1.7472022913160306e-05, "loss": 0.5886, "step": 176260 }, { "epoch": 1.95, "learning_rate": 1.747110018602179e-05, "loss": 0.6088, "step": 176265 }, { "epoch": 1.95, "learning_rate": 1.747017745888328e-05, "loss": 0.5566, "step": 176270 }, { "epoch": 1.95, "learning_rate": 1.7469254731744766e-05, "loss": 0.6233, "step": 176275 }, { "epoch": 1.95, "learning_rate": 1.7468332004606254e-05, "loss": 0.6366, "step": 176280 }, { "epoch": 1.95, "learning_rate": 1.7467409277467742e-05, "loss": 0.6646, "step": 176285 }, { "epoch": 1.95, "learning_rate": 1.746648655032923e-05, "loss": 0.6546, "step": 176290 }, { "epoch": 1.95, "learning_rate": 1.7465563823190718e-05, "loss": 0.5763, "step": 176295 }, { "epoch": 1.95, "learning_rate": 1.7464641096052206e-05, "loss": 0.5827, "step": 176300 }, { "epoch": 1.95, "learning_rate": 1.7463718368913693e-05, "loss": 0.5986, "step": 176305 }, { "epoch": 1.95, "learning_rate": 1.746279564177518e-05, "loss": 0.6133, "step": 176310 }, { "epoch": 1.95, "learning_rate": 1.746187291463667e-05, "loss": 0.6275, "step": 176315 }, { "epoch": 1.95, "learning_rate": 1.7460950187498154e-05, "loss": 0.5454, "step": 176320 }, { "epoch": 1.95, "learning_rate": 1.746002746035964e-05, "loss": 0.6147, "step": 176325 }, { "epoch": 1.95, "learning_rate": 1.745910473322113e-05, "loss": 0.6049, "step": 176330 }, { "epoch": 1.95, "learning_rate": 1.7458182006082617e-05, "loss": 0.6608, "step": 176335 }, { "epoch": 1.95, "learning_rate": 1.7457259278944105e-05, "loss": 0.6254, "step": 176340 }, { "epoch": 1.95, "learning_rate": 1.7456336551805593e-05, "loss": 0.6548, "step": 176345 }, { "epoch": 1.95, "learning_rate": 1.745541382466708e-05, "loss": 0.6066, "step": 176350 }, { "epoch": 1.95, "learning_rate": 1.745449109752857e-05, "loss": 0.5808, "step": 176355 }, { "epoch": 1.95, "learning_rate": 1.7453568370390056e-05, "loss": 0.5517, "step": 176360 }, { "epoch": 1.95, "learning_rate": 1.7452645643251544e-05, "loss": 0.6243, "step": 176365 }, { "epoch": 1.95, "learning_rate": 1.7451722916113032e-05, "loss": 0.5497, "step": 176370 }, { "epoch": 1.95, "learning_rate": 1.745080018897452e-05, "loss": 0.6083, "step": 176375 }, { "epoch": 1.95, "learning_rate": 1.7449877461836004e-05, "loss": 0.6589, "step": 176380 }, { "epoch": 1.95, "learning_rate": 1.7448954734697496e-05, "loss": 0.6038, "step": 176385 }, { "epoch": 1.95, "learning_rate": 1.744803200755898e-05, "loss": 0.5924, "step": 176390 }, { "epoch": 1.95, "learning_rate": 1.7447109280420468e-05, "loss": 0.5998, "step": 176395 }, { "epoch": 1.95, "learning_rate": 1.7446186553281956e-05, "loss": 0.5955, "step": 176400 }, { "epoch": 1.95, "learning_rate": 1.7445263826143444e-05, "loss": 0.6141, "step": 176405 }, { "epoch": 1.95, "learning_rate": 1.744434109900493e-05, "loss": 0.572, "step": 176410 }, { "epoch": 1.95, "learning_rate": 1.744341837186642e-05, "loss": 0.6054, "step": 176415 }, { "epoch": 1.95, "learning_rate": 1.7442495644727907e-05, "loss": 0.5987, "step": 176420 }, { "epoch": 1.95, "learning_rate": 1.7441572917589395e-05, "loss": 0.6369, "step": 176425 }, { "epoch": 1.95, "learning_rate": 1.7440650190450883e-05, "loss": 0.5954, "step": 176430 }, { "epoch": 1.95, "learning_rate": 1.743972746331237e-05, "loss": 0.6323, "step": 176435 }, { "epoch": 1.95, "learning_rate": 1.743880473617386e-05, "loss": 0.6432, "step": 176440 }, { "epoch": 1.95, "learning_rate": 1.7437882009035343e-05, "loss": 0.6391, "step": 176445 }, { "epoch": 1.95, "learning_rate": 1.7436959281896834e-05, "loss": 0.581, "step": 176450 }, { "epoch": 1.95, "learning_rate": 1.743603655475832e-05, "loss": 0.6327, "step": 176455 }, { "epoch": 1.95, "learning_rate": 1.743511382761981e-05, "loss": 0.6479, "step": 176460 }, { "epoch": 1.95, "learning_rate": 1.7434191100481294e-05, "loss": 0.6108, "step": 176465 }, { "epoch": 1.95, "learning_rate": 1.7433268373342782e-05, "loss": 0.5723, "step": 176470 }, { "epoch": 1.95, "learning_rate": 1.743234564620427e-05, "loss": 0.5317, "step": 176475 }, { "epoch": 1.95, "learning_rate": 1.7431422919065758e-05, "loss": 0.6343, "step": 176480 }, { "epoch": 1.95, "learning_rate": 1.7430500191927246e-05, "loss": 0.6106, "step": 176485 }, { "epoch": 1.95, "learning_rate": 1.7429577464788734e-05, "loss": 0.6453, "step": 176490 }, { "epoch": 1.95, "learning_rate": 1.742865473765022e-05, "loss": 0.6413, "step": 176495 }, { "epoch": 1.95, "learning_rate": 1.7427732010511706e-05, "loss": 0.612, "step": 176500 }, { "epoch": 1.95, "learning_rate": 1.7426809283373197e-05, "loss": 0.589, "step": 176505 }, { "epoch": 1.95, "learning_rate": 1.7425886556234682e-05, "loss": 0.6419, "step": 176510 }, { "epoch": 1.95, "learning_rate": 1.7424963829096173e-05, "loss": 0.5449, "step": 176515 }, { "epoch": 1.95, "learning_rate": 1.7424041101957657e-05, "loss": 0.6132, "step": 176520 }, { "epoch": 1.95, "learning_rate": 1.742311837481915e-05, "loss": 0.613, "step": 176525 }, { "epoch": 1.95, "learning_rate": 1.7422195647680633e-05, "loss": 0.6164, "step": 176530 }, { "epoch": 1.95, "learning_rate": 1.7421272920542124e-05, "loss": 0.6183, "step": 176535 }, { "epoch": 1.95, "learning_rate": 1.742035019340361e-05, "loss": 0.5991, "step": 176540 }, { "epoch": 1.95, "learning_rate": 1.7419427466265097e-05, "loss": 0.6007, "step": 176545 }, { "epoch": 1.95, "learning_rate": 1.7418504739126585e-05, "loss": 0.6214, "step": 176550 }, { "epoch": 1.95, "learning_rate": 1.741758201198807e-05, "loss": 0.6343, "step": 176555 }, { "epoch": 1.95, "learning_rate": 1.741665928484956e-05, "loss": 0.6218, "step": 176560 }, { "epoch": 1.96, "learning_rate": 1.7415736557711045e-05, "loss": 0.6243, "step": 176565 }, { "epoch": 1.96, "learning_rate": 1.7414813830572536e-05, "loss": 0.6118, "step": 176570 }, { "epoch": 1.96, "learning_rate": 1.741389110343402e-05, "loss": 0.5867, "step": 176575 }, { "epoch": 1.96, "learning_rate": 1.741296837629551e-05, "loss": 0.5928, "step": 176580 }, { "epoch": 1.96, "learning_rate": 1.7412045649156996e-05, "loss": 0.6039, "step": 176585 }, { "epoch": 1.96, "learning_rate": 1.7411122922018487e-05, "loss": 0.6174, "step": 176590 }, { "epoch": 1.96, "learning_rate": 1.7410200194879972e-05, "loss": 0.5489, "step": 176595 }, { "epoch": 1.96, "learning_rate": 1.740927746774146e-05, "loss": 0.6235, "step": 176600 }, { "epoch": 1.96, "learning_rate": 1.7408354740602947e-05, "loss": 0.588, "step": 176605 }, { "epoch": 1.96, "learning_rate": 1.7407432013464435e-05, "loss": 0.634, "step": 176610 }, { "epoch": 1.96, "learning_rate": 1.7406509286325923e-05, "loss": 0.6801, "step": 176615 }, { "epoch": 1.96, "learning_rate": 1.740558655918741e-05, "loss": 0.654, "step": 176620 }, { "epoch": 1.96, "learning_rate": 1.74046638320489e-05, "loss": 0.6383, "step": 176625 }, { "epoch": 1.96, "learning_rate": 1.7403741104910383e-05, "loss": 0.6126, "step": 176630 }, { "epoch": 1.96, "learning_rate": 1.7402818377771875e-05, "loss": 0.6075, "step": 176635 }, { "epoch": 1.96, "learning_rate": 1.740189565063336e-05, "loss": 0.6614, "step": 176640 }, { "epoch": 1.96, "learning_rate": 1.740097292349485e-05, "loss": 0.5937, "step": 176645 }, { "epoch": 1.96, "learning_rate": 1.7400050196356335e-05, "loss": 0.6415, "step": 176650 }, { "epoch": 1.96, "learning_rate": 1.7399127469217823e-05, "loss": 0.6148, "step": 176655 }, { "epoch": 1.96, "learning_rate": 1.739820474207931e-05, "loss": 0.6268, "step": 176660 }, { "epoch": 1.96, "learning_rate": 1.7397282014940798e-05, "loss": 0.6104, "step": 176665 }, { "epoch": 1.96, "learning_rate": 1.7396359287802286e-05, "loss": 0.6283, "step": 176670 }, { "epoch": 1.96, "learning_rate": 1.7395436560663774e-05, "loss": 0.6147, "step": 176675 }, { "epoch": 1.96, "learning_rate": 1.7394513833525262e-05, "loss": 0.6066, "step": 176680 }, { "epoch": 1.96, "learning_rate": 1.739359110638675e-05, "loss": 0.625, "step": 176685 }, { "epoch": 1.96, "learning_rate": 1.7392668379248238e-05, "loss": 0.6751, "step": 176690 }, { "epoch": 1.96, "learning_rate": 1.7391745652109725e-05, "loss": 0.6411, "step": 176695 }, { "epoch": 1.96, "learning_rate": 1.7390822924971213e-05, "loss": 0.6051, "step": 176700 }, { "epoch": 1.96, "learning_rate": 1.7389900197832698e-05, "loss": 0.6515, "step": 176705 }, { "epoch": 1.96, "learning_rate": 1.7388977470694186e-05, "loss": 0.6654, "step": 176710 }, { "epoch": 1.96, "learning_rate": 1.7388054743555673e-05, "loss": 0.6506, "step": 176715 }, { "epoch": 1.96, "learning_rate": 1.738713201641716e-05, "loss": 0.5841, "step": 176720 }, { "epoch": 1.96, "learning_rate": 1.738620928927865e-05, "loss": 0.6027, "step": 176725 }, { "epoch": 1.96, "learning_rate": 1.7385286562140137e-05, "loss": 0.6299, "step": 176730 }, { "epoch": 1.96, "learning_rate": 1.7384363835001625e-05, "loss": 0.6524, "step": 176735 }, { "epoch": 1.96, "learning_rate": 1.7383441107863113e-05, "loss": 0.7081, "step": 176740 }, { "epoch": 1.96, "learning_rate": 1.73825183807246e-05, "loss": 0.5672, "step": 176745 }, { "epoch": 1.96, "learning_rate": 1.738159565358609e-05, "loss": 0.6147, "step": 176750 }, { "epoch": 1.96, "learning_rate": 1.7380672926447576e-05, "loss": 0.6007, "step": 176755 }, { "epoch": 1.96, "learning_rate": 1.7379750199309064e-05, "loss": 0.5848, "step": 176760 }, { "epoch": 1.96, "learning_rate": 1.7378827472170552e-05, "loss": 0.5779, "step": 176765 }, { "epoch": 1.96, "learning_rate": 1.737790474503204e-05, "loss": 0.5833, "step": 176770 }, { "epoch": 1.96, "learning_rate": 1.7376982017893524e-05, "loss": 0.6057, "step": 176775 }, { "epoch": 1.96, "learning_rate": 1.7376059290755012e-05, "loss": 0.6362, "step": 176780 }, { "epoch": 1.96, "learning_rate": 1.73751365636165e-05, "loss": 0.5913, "step": 176785 }, { "epoch": 1.96, "learning_rate": 1.7374213836477988e-05, "loss": 0.6306, "step": 176790 }, { "epoch": 1.96, "learning_rate": 1.7373291109339476e-05, "loss": 0.6412, "step": 176795 }, { "epoch": 1.96, "learning_rate": 1.7372368382200963e-05, "loss": 0.633, "step": 176800 }, { "epoch": 1.96, "learning_rate": 1.737144565506245e-05, "loss": 0.6104, "step": 176805 }, { "epoch": 1.96, "learning_rate": 1.737052292792394e-05, "loss": 0.6015, "step": 176810 }, { "epoch": 1.96, "learning_rate": 1.7369600200785427e-05, "loss": 0.643, "step": 176815 }, { "epoch": 1.96, "learning_rate": 1.7368677473646915e-05, "loss": 0.6013, "step": 176820 }, { "epoch": 1.96, "learning_rate": 1.7367754746508403e-05, "loss": 0.5811, "step": 176825 }, { "epoch": 1.96, "learning_rate": 1.7366832019369887e-05, "loss": 0.5953, "step": 176830 }, { "epoch": 1.96, "learning_rate": 1.736590929223138e-05, "loss": 0.6138, "step": 176835 }, { "epoch": 1.96, "learning_rate": 1.7364986565092863e-05, "loss": 0.6167, "step": 176840 }, { "epoch": 1.96, "learning_rate": 1.7364063837954354e-05, "loss": 0.6762, "step": 176845 }, { "epoch": 1.96, "learning_rate": 1.736314111081584e-05, "loss": 0.6335, "step": 176850 }, { "epoch": 1.96, "learning_rate": 1.7362218383677326e-05, "loss": 0.6029, "step": 176855 }, { "epoch": 1.96, "learning_rate": 1.7361295656538814e-05, "loss": 0.6136, "step": 176860 }, { "epoch": 1.96, "learning_rate": 1.7360372929400302e-05, "loss": 0.6673, "step": 176865 }, { "epoch": 1.96, "learning_rate": 1.735945020226179e-05, "loss": 0.5816, "step": 176870 }, { "epoch": 1.96, "learning_rate": 1.7358527475123278e-05, "loss": 0.6166, "step": 176875 }, { "epoch": 1.96, "learning_rate": 1.7357604747984766e-05, "loss": 0.6033, "step": 176880 }, { "epoch": 1.96, "learning_rate": 1.735668202084625e-05, "loss": 0.6195, "step": 176885 }, { "epoch": 1.96, "learning_rate": 1.735575929370774e-05, "loss": 0.6512, "step": 176890 }, { "epoch": 1.96, "learning_rate": 1.7354836566569226e-05, "loss": 0.6143, "step": 176895 }, { "epoch": 1.96, "learning_rate": 1.7353913839430717e-05, "loss": 0.6084, "step": 176900 }, { "epoch": 1.96, "learning_rate": 1.73529911122922e-05, "loss": 0.5832, "step": 176905 }, { "epoch": 1.96, "learning_rate": 1.7352068385153693e-05, "loss": 0.6408, "step": 176910 }, { "epoch": 1.96, "learning_rate": 1.7351145658015177e-05, "loss": 0.5918, "step": 176915 }, { "epoch": 1.96, "learning_rate": 1.735022293087667e-05, "loss": 0.6234, "step": 176920 }, { "epoch": 1.96, "learning_rate": 1.7349300203738153e-05, "loss": 0.5906, "step": 176925 }, { "epoch": 1.96, "learning_rate": 1.734837747659964e-05, "loss": 0.6109, "step": 176930 }, { "epoch": 1.96, "learning_rate": 1.734745474946113e-05, "loss": 0.6242, "step": 176935 }, { "epoch": 1.96, "learning_rate": 1.7346532022322613e-05, "loss": 0.6577, "step": 176940 }, { "epoch": 1.96, "learning_rate": 1.7345609295184104e-05, "loss": 0.5608, "step": 176945 }, { "epoch": 1.96, "learning_rate": 1.734468656804559e-05, "loss": 0.6926, "step": 176950 }, { "epoch": 1.96, "learning_rate": 1.734376384090708e-05, "loss": 0.5998, "step": 176955 }, { "epoch": 1.96, "learning_rate": 1.7342841113768564e-05, "loss": 0.6405, "step": 176960 }, { "epoch": 1.96, "learning_rate": 1.7341918386630056e-05, "loss": 0.6173, "step": 176965 }, { "epoch": 1.96, "learning_rate": 1.734099565949154e-05, "loss": 0.6396, "step": 176970 }, { "epoch": 1.96, "learning_rate": 1.734007293235303e-05, "loss": 0.6241, "step": 176975 }, { "epoch": 1.96, "learning_rate": 1.7339150205214516e-05, "loss": 0.5938, "step": 176980 }, { "epoch": 1.96, "learning_rate": 1.7338227478076004e-05, "loss": 0.6112, "step": 176985 }, { "epoch": 1.96, "learning_rate": 1.733730475093749e-05, "loss": 0.6384, "step": 176990 }, { "epoch": 1.96, "learning_rate": 1.733638202379898e-05, "loss": 0.6628, "step": 176995 }, { "epoch": 1.96, "learning_rate": 1.7335459296660467e-05, "loss": 0.6268, "step": 177000 }, { "epoch": 1.96, "eval_loss": 0.5999476909637451, "eval_runtime": 69.332, "eval_samples_per_second": 28.847, "eval_steps_per_second": 14.423, "step": 177000 }, { "epoch": 1.96, "learning_rate": 1.733453656952195e-05, "loss": 0.6335, "step": 177005 }, { "epoch": 1.96, "learning_rate": 1.7333613842383443e-05, "loss": 0.6396, "step": 177010 }, { "epoch": 1.96, "learning_rate": 1.7332691115244927e-05, "loss": 0.5685, "step": 177015 }, { "epoch": 1.96, "learning_rate": 1.733176838810642e-05, "loss": 0.6279, "step": 177020 }, { "epoch": 1.96, "learning_rate": 1.7330845660967903e-05, "loss": 0.581, "step": 177025 }, { "epoch": 1.96, "learning_rate": 1.7329922933829394e-05, "loss": 0.5933, "step": 177030 }, { "epoch": 1.96, "learning_rate": 1.732900020669088e-05, "loss": 0.6076, "step": 177035 }, { "epoch": 1.96, "learning_rate": 1.7328077479552367e-05, "loss": 0.6083, "step": 177040 }, { "epoch": 1.96, "learning_rate": 1.7327154752413854e-05, "loss": 0.6316, "step": 177045 }, { "epoch": 1.96, "learning_rate": 1.7326232025275342e-05, "loss": 0.6026, "step": 177050 }, { "epoch": 1.96, "learning_rate": 1.732530929813683e-05, "loss": 0.6337, "step": 177055 }, { "epoch": 1.96, "learning_rate": 1.7324386570998318e-05, "loss": 0.6571, "step": 177060 }, { "epoch": 1.96, "learning_rate": 1.7323463843859806e-05, "loss": 0.6086, "step": 177065 }, { "epoch": 1.96, "learning_rate": 1.7322541116721294e-05, "loss": 0.6425, "step": 177070 }, { "epoch": 1.96, "learning_rate": 1.732161838958278e-05, "loss": 0.6516, "step": 177075 }, { "epoch": 1.96, "learning_rate": 1.7320695662444266e-05, "loss": 0.5938, "step": 177080 }, { "epoch": 1.96, "learning_rate": 1.7319772935305757e-05, "loss": 0.5875, "step": 177085 }, { "epoch": 1.96, "learning_rate": 1.731885020816724e-05, "loss": 0.5821, "step": 177090 }, { "epoch": 1.96, "learning_rate": 1.731792748102873e-05, "loss": 0.5907, "step": 177095 }, { "epoch": 1.96, "learning_rate": 1.7317004753890217e-05, "loss": 0.6167, "step": 177100 }, { "epoch": 1.96, "learning_rate": 1.7316082026751705e-05, "loss": 0.6157, "step": 177105 }, { "epoch": 1.96, "learning_rate": 1.7315159299613193e-05, "loss": 0.574, "step": 177110 }, { "epoch": 1.96, "learning_rate": 1.731423657247468e-05, "loss": 0.5943, "step": 177115 }, { "epoch": 1.96, "learning_rate": 1.731331384533617e-05, "loss": 0.63, "step": 177120 }, { "epoch": 1.96, "learning_rate": 1.7312391118197657e-05, "loss": 0.6181, "step": 177125 }, { "epoch": 1.96, "learning_rate": 1.7311468391059144e-05, "loss": 0.6403, "step": 177130 }, { "epoch": 1.96, "learning_rate": 1.7310545663920632e-05, "loss": 0.5937, "step": 177135 }, { "epoch": 1.96, "learning_rate": 1.730962293678212e-05, "loss": 0.585, "step": 177140 }, { "epoch": 1.96, "learning_rate": 1.7308700209643608e-05, "loss": 0.6339, "step": 177145 }, { "epoch": 1.96, "learning_rate": 1.7307777482505096e-05, "loss": 0.6557, "step": 177150 }, { "epoch": 1.96, "learning_rate": 1.730685475536658e-05, "loss": 0.6344, "step": 177155 }, { "epoch": 1.96, "learning_rate": 1.7305932028228068e-05, "loss": 0.6011, "step": 177160 }, { "epoch": 1.96, "learning_rate": 1.7305009301089556e-05, "loss": 0.6405, "step": 177165 }, { "epoch": 1.96, "learning_rate": 1.7304086573951044e-05, "loss": 0.5961, "step": 177170 }, { "epoch": 1.96, "learning_rate": 1.7303163846812532e-05, "loss": 0.5573, "step": 177175 }, { "epoch": 1.96, "learning_rate": 1.730224111967402e-05, "loss": 0.6049, "step": 177180 }, { "epoch": 1.96, "learning_rate": 1.7301318392535507e-05, "loss": 0.6084, "step": 177185 }, { "epoch": 1.96, "learning_rate": 1.7300395665396995e-05, "loss": 0.5969, "step": 177190 }, { "epoch": 1.96, "learning_rate": 1.7299472938258483e-05, "loss": 0.5864, "step": 177195 }, { "epoch": 1.96, "learning_rate": 1.729855021111997e-05, "loss": 0.6408, "step": 177200 }, { "epoch": 1.96, "learning_rate": 1.729762748398146e-05, "loss": 0.595, "step": 177205 }, { "epoch": 1.96, "learning_rate": 1.7296704756842947e-05, "loss": 0.6753, "step": 177210 }, { "epoch": 1.96, "learning_rate": 1.729578202970443e-05, "loss": 0.6266, "step": 177215 }, { "epoch": 1.96, "learning_rate": 1.7294859302565922e-05, "loss": 0.625, "step": 177220 }, { "epoch": 1.96, "learning_rate": 1.7293936575427407e-05, "loss": 0.6285, "step": 177225 }, { "epoch": 1.96, "learning_rate": 1.7293013848288895e-05, "loss": 0.5325, "step": 177230 }, { "epoch": 1.96, "learning_rate": 1.7292091121150383e-05, "loss": 0.6226, "step": 177235 }, { "epoch": 1.96, "learning_rate": 1.729116839401187e-05, "loss": 0.6485, "step": 177240 }, { "epoch": 1.96, "learning_rate": 1.7290245666873358e-05, "loss": 0.6159, "step": 177245 }, { "epoch": 1.96, "learning_rate": 1.7289322939734846e-05, "loss": 0.6449, "step": 177250 }, { "epoch": 1.96, "learning_rate": 1.7288400212596334e-05, "loss": 0.593, "step": 177255 }, { "epoch": 1.96, "learning_rate": 1.7287477485457822e-05, "loss": 0.63, "step": 177260 }, { "epoch": 1.96, "learning_rate": 1.728655475831931e-05, "loss": 0.6236, "step": 177265 }, { "epoch": 1.96, "learning_rate": 1.7285632031180794e-05, "loss": 0.6209, "step": 177270 }, { "epoch": 1.96, "learning_rate": 1.7284709304042285e-05, "loss": 0.5982, "step": 177275 }, { "epoch": 1.96, "learning_rate": 1.728378657690377e-05, "loss": 0.5555, "step": 177280 }, { "epoch": 1.96, "learning_rate": 1.728286384976526e-05, "loss": 0.557, "step": 177285 }, { "epoch": 1.96, "learning_rate": 1.7281941122626745e-05, "loss": 0.6538, "step": 177290 }, { "epoch": 1.96, "learning_rate": 1.7281018395488237e-05, "loss": 0.599, "step": 177295 }, { "epoch": 1.96, "learning_rate": 1.728009566834972e-05, "loss": 0.5955, "step": 177300 }, { "epoch": 1.96, "learning_rate": 1.727917294121121e-05, "loss": 0.6375, "step": 177305 }, { "epoch": 1.96, "learning_rate": 1.7278250214072697e-05, "loss": 0.564, "step": 177310 }, { "epoch": 1.96, "learning_rate": 1.7277327486934185e-05, "loss": 0.6426, "step": 177315 }, { "epoch": 1.96, "learning_rate": 1.7276404759795673e-05, "loss": 0.6246, "step": 177320 }, { "epoch": 1.96, "learning_rate": 1.7275482032657157e-05, "loss": 0.5417, "step": 177325 }, { "epoch": 1.96, "learning_rate": 1.7274559305518648e-05, "loss": 0.6501, "step": 177330 }, { "epoch": 1.96, "learning_rate": 1.7273636578380133e-05, "loss": 0.646, "step": 177335 }, { "epoch": 1.96, "learning_rate": 1.7272713851241624e-05, "loss": 0.6338, "step": 177340 }, { "epoch": 1.96, "learning_rate": 1.727179112410311e-05, "loss": 0.676, "step": 177345 }, { "epoch": 1.96, "learning_rate": 1.72708683969646e-05, "loss": 0.5693, "step": 177350 }, { "epoch": 1.96, "learning_rate": 1.7269945669826084e-05, "loss": 0.5876, "step": 177355 }, { "epoch": 1.96, "learning_rate": 1.7269022942687575e-05, "loss": 0.6149, "step": 177360 }, { "epoch": 1.96, "learning_rate": 1.726810021554906e-05, "loss": 0.6854, "step": 177365 }, { "epoch": 1.96, "learning_rate": 1.7267177488410548e-05, "loss": 0.6361, "step": 177370 }, { "epoch": 1.96, "learning_rate": 1.7266254761272036e-05, "loss": 0.6248, "step": 177375 }, { "epoch": 1.96, "learning_rate": 1.7265332034133523e-05, "loss": 0.6397, "step": 177380 }, { "epoch": 1.96, "learning_rate": 1.726440930699501e-05, "loss": 0.6347, "step": 177385 }, { "epoch": 1.96, "learning_rate": 1.7263486579856496e-05, "loss": 0.6149, "step": 177390 }, { "epoch": 1.96, "learning_rate": 1.7262563852717987e-05, "loss": 0.5738, "step": 177395 }, { "epoch": 1.96, "learning_rate": 1.726164112557947e-05, "loss": 0.641, "step": 177400 }, { "epoch": 1.96, "learning_rate": 1.7260718398440963e-05, "loss": 0.5718, "step": 177405 }, { "epoch": 1.96, "learning_rate": 1.7259795671302447e-05, "loss": 0.5809, "step": 177410 }, { "epoch": 1.96, "learning_rate": 1.7258872944163938e-05, "loss": 0.5917, "step": 177415 }, { "epoch": 1.96, "learning_rate": 1.7257950217025423e-05, "loss": 0.6281, "step": 177420 }, { "epoch": 1.96, "learning_rate": 1.725702748988691e-05, "loss": 0.575, "step": 177425 }, { "epoch": 1.96, "learning_rate": 1.72561047627484e-05, "loss": 0.5834, "step": 177430 }, { "epoch": 1.96, "learning_rate": 1.7255182035609886e-05, "loss": 0.5771, "step": 177435 }, { "epoch": 1.96, "learning_rate": 1.7254259308471374e-05, "loss": 0.6669, "step": 177440 }, { "epoch": 1.96, "learning_rate": 1.7253336581332862e-05, "loss": 0.6096, "step": 177445 }, { "epoch": 1.96, "learning_rate": 1.725241385419435e-05, "loss": 0.6188, "step": 177450 }, { "epoch": 1.96, "learning_rate": 1.7251491127055838e-05, "loss": 0.652, "step": 177455 }, { "epoch": 1.96, "learning_rate": 1.7250568399917326e-05, "loss": 0.6094, "step": 177460 }, { "epoch": 1.97, "learning_rate": 1.724964567277881e-05, "loss": 0.6405, "step": 177465 }, { "epoch": 1.97, "learning_rate": 1.72487229456403e-05, "loss": 0.6485, "step": 177470 }, { "epoch": 1.97, "learning_rate": 1.7247800218501786e-05, "loss": 0.5742, "step": 177475 }, { "epoch": 1.97, "learning_rate": 1.7246877491363274e-05, "loss": 0.6191, "step": 177480 }, { "epoch": 1.97, "learning_rate": 1.724595476422476e-05, "loss": 0.6114, "step": 177485 }, { "epoch": 1.97, "learning_rate": 1.724503203708625e-05, "loss": 0.5948, "step": 177490 }, { "epoch": 1.97, "learning_rate": 1.7244109309947737e-05, "loss": 0.5826, "step": 177495 }, { "epoch": 1.97, "learning_rate": 1.7243186582809225e-05, "loss": 0.5401, "step": 177500 }, { "epoch": 1.97, "learning_rate": 1.7242263855670713e-05, "loss": 0.6315, "step": 177505 }, { "epoch": 1.97, "learning_rate": 1.72413411285322e-05, "loss": 0.6308, "step": 177510 }, { "epoch": 1.97, "learning_rate": 1.724041840139369e-05, "loss": 0.6444, "step": 177515 }, { "epoch": 1.97, "learning_rate": 1.7239495674255176e-05, "loss": 0.6552, "step": 177520 }, { "epoch": 1.97, "learning_rate": 1.7238572947116664e-05, "loss": 0.6226, "step": 177525 }, { "epoch": 1.97, "learning_rate": 1.7237650219978152e-05, "loss": 0.6314, "step": 177530 }, { "epoch": 1.97, "learning_rate": 1.723672749283964e-05, "loss": 0.6713, "step": 177535 }, { "epoch": 1.97, "learning_rate": 1.7235804765701124e-05, "loss": 0.5948, "step": 177540 }, { "epoch": 1.97, "learning_rate": 1.7234882038562612e-05, "loss": 0.5691, "step": 177545 }, { "epoch": 1.97, "learning_rate": 1.72339593114241e-05, "loss": 0.6148, "step": 177550 }, { "epoch": 1.97, "learning_rate": 1.7233036584285588e-05, "loss": 0.5998, "step": 177555 }, { "epoch": 1.97, "learning_rate": 1.7232113857147076e-05, "loss": 0.6102, "step": 177560 }, { "epoch": 1.97, "learning_rate": 1.7231191130008564e-05, "loss": 0.6274, "step": 177565 }, { "epoch": 1.97, "learning_rate": 1.723026840287005e-05, "loss": 0.6182, "step": 177570 }, { "epoch": 1.97, "learning_rate": 1.722934567573154e-05, "loss": 0.6213, "step": 177575 }, { "epoch": 1.97, "learning_rate": 1.7228422948593027e-05, "loss": 0.588, "step": 177580 }, { "epoch": 1.97, "learning_rate": 1.7227500221454515e-05, "loss": 0.6792, "step": 177585 }, { "epoch": 1.97, "learning_rate": 1.7226577494316003e-05, "loss": 0.614, "step": 177590 }, { "epoch": 1.97, "learning_rate": 1.722565476717749e-05, "loss": 0.5948, "step": 177595 }, { "epoch": 1.97, "learning_rate": 1.7224732040038975e-05, "loss": 0.5992, "step": 177600 }, { "epoch": 1.97, "learning_rate": 1.7223809312900466e-05, "loss": 0.645, "step": 177605 }, { "epoch": 1.97, "learning_rate": 1.722288658576195e-05, "loss": 0.5894, "step": 177610 }, { "epoch": 1.97, "learning_rate": 1.722196385862344e-05, "loss": 0.6228, "step": 177615 }, { "epoch": 1.97, "learning_rate": 1.7221041131484927e-05, "loss": 0.7046, "step": 177620 }, { "epoch": 1.97, "learning_rate": 1.7220118404346414e-05, "loss": 0.5954, "step": 177625 }, { "epoch": 1.97, "learning_rate": 1.7219195677207902e-05, "loss": 0.6339, "step": 177630 }, { "epoch": 1.97, "learning_rate": 1.721827295006939e-05, "loss": 0.5782, "step": 177635 }, { "epoch": 1.97, "learning_rate": 1.7217350222930878e-05, "loss": 0.5756, "step": 177640 }, { "epoch": 1.97, "learning_rate": 1.7216427495792366e-05, "loss": 0.6143, "step": 177645 }, { "epoch": 1.97, "learning_rate": 1.7215504768653854e-05, "loss": 0.6069, "step": 177650 }, { "epoch": 1.97, "learning_rate": 1.7214582041515338e-05, "loss": 0.6226, "step": 177655 }, { "epoch": 1.97, "learning_rate": 1.721365931437683e-05, "loss": 0.6005, "step": 177660 }, { "epoch": 1.97, "learning_rate": 1.7212736587238314e-05, "loss": 0.5914, "step": 177665 }, { "epoch": 1.97, "learning_rate": 1.7211813860099805e-05, "loss": 0.617, "step": 177670 }, { "epoch": 1.97, "learning_rate": 1.721089113296129e-05, "loss": 0.6449, "step": 177675 }, { "epoch": 1.97, "learning_rate": 1.720996840582278e-05, "loss": 0.5838, "step": 177680 }, { "epoch": 1.97, "learning_rate": 1.7209045678684265e-05, "loss": 0.6632, "step": 177685 }, { "epoch": 1.97, "learning_rate": 1.7208122951545753e-05, "loss": 0.6549, "step": 177690 }, { "epoch": 1.97, "learning_rate": 1.720720022440724e-05, "loss": 0.5688, "step": 177695 }, { "epoch": 1.97, "learning_rate": 1.720627749726873e-05, "loss": 0.6284, "step": 177700 }, { "epoch": 1.97, "learning_rate": 1.7205354770130217e-05, "loss": 0.6154, "step": 177705 }, { "epoch": 1.97, "learning_rate": 1.7204432042991704e-05, "loss": 0.5769, "step": 177710 }, { "epoch": 1.97, "learning_rate": 1.7203509315853192e-05, "loss": 0.5666, "step": 177715 }, { "epoch": 1.97, "learning_rate": 1.7202586588714677e-05, "loss": 0.5786, "step": 177720 }, { "epoch": 1.97, "learning_rate": 1.7201663861576168e-05, "loss": 0.6395, "step": 177725 }, { "epoch": 1.97, "learning_rate": 1.7200741134437652e-05, "loss": 0.6, "step": 177730 }, { "epoch": 1.97, "learning_rate": 1.7199818407299144e-05, "loss": 0.6156, "step": 177735 }, { "epoch": 1.97, "learning_rate": 1.7198895680160628e-05, "loss": 0.6153, "step": 177740 }, { "epoch": 1.97, "learning_rate": 1.719797295302212e-05, "loss": 0.5853, "step": 177745 }, { "epoch": 1.97, "learning_rate": 1.7197050225883604e-05, "loss": 0.6311, "step": 177750 }, { "epoch": 1.97, "learning_rate": 1.719612749874509e-05, "loss": 0.6545, "step": 177755 }, { "epoch": 1.97, "learning_rate": 1.719520477160658e-05, "loss": 0.6328, "step": 177760 }, { "epoch": 1.97, "learning_rate": 1.7194282044468067e-05, "loss": 0.5752, "step": 177765 }, { "epoch": 1.97, "learning_rate": 1.7193359317329555e-05, "loss": 0.6502, "step": 177770 }, { "epoch": 1.97, "learning_rate": 1.719243659019104e-05, "loss": 0.6331, "step": 177775 }, { "epoch": 1.97, "learning_rate": 1.719151386305253e-05, "loss": 0.6097, "step": 177780 }, { "epoch": 1.97, "learning_rate": 1.7190591135914015e-05, "loss": 0.5977, "step": 177785 }, { "epoch": 1.97, "learning_rate": 1.7189668408775507e-05, "loss": 0.5755, "step": 177790 }, { "epoch": 1.97, "learning_rate": 1.718874568163699e-05, "loss": 0.6248, "step": 177795 }, { "epoch": 1.97, "learning_rate": 1.7187822954498482e-05, "loss": 0.6242, "step": 177800 }, { "epoch": 1.97, "learning_rate": 1.7186900227359967e-05, "loss": 0.6515, "step": 177805 }, { "epoch": 1.97, "learning_rate": 1.7185977500221455e-05, "loss": 0.639, "step": 177810 }, { "epoch": 1.97, "learning_rate": 1.7185054773082942e-05, "loss": 0.5838, "step": 177815 }, { "epoch": 1.97, "learning_rate": 1.718413204594443e-05, "loss": 0.606, "step": 177820 }, { "epoch": 1.97, "learning_rate": 1.7183209318805918e-05, "loss": 0.6255, "step": 177825 }, { "epoch": 1.97, "learning_rate": 1.7182286591667406e-05, "loss": 0.5974, "step": 177830 }, { "epoch": 1.97, "learning_rate": 1.7181363864528894e-05, "loss": 0.5758, "step": 177835 }, { "epoch": 1.97, "learning_rate": 1.718044113739038e-05, "loss": 0.5788, "step": 177840 }, { "epoch": 1.97, "learning_rate": 1.717951841025187e-05, "loss": 0.6307, "step": 177845 }, { "epoch": 1.97, "learning_rate": 1.7178595683113354e-05, "loss": 0.6205, "step": 177850 }, { "epoch": 1.97, "learning_rate": 1.7177672955974845e-05, "loss": 0.6432, "step": 177855 }, { "epoch": 1.97, "learning_rate": 1.717675022883633e-05, "loss": 0.5846, "step": 177860 }, { "epoch": 1.97, "learning_rate": 1.717582750169782e-05, "loss": 0.6246, "step": 177865 }, { "epoch": 1.97, "learning_rate": 1.7174904774559305e-05, "loss": 0.6279, "step": 177870 }, { "epoch": 1.97, "learning_rate": 1.7173982047420793e-05, "loss": 0.5614, "step": 177875 }, { "epoch": 1.97, "learning_rate": 1.717305932028228e-05, "loss": 0.6337, "step": 177880 }, { "epoch": 1.97, "learning_rate": 1.717213659314377e-05, "loss": 0.6529, "step": 177885 }, { "epoch": 1.97, "learning_rate": 1.7171213866005257e-05, "loss": 0.5899, "step": 177890 }, { "epoch": 1.97, "learning_rate": 1.7170291138866745e-05, "loss": 0.6041, "step": 177895 }, { "epoch": 1.97, "learning_rate": 1.7169368411728233e-05, "loss": 0.5905, "step": 177900 }, { "epoch": 1.97, "learning_rate": 1.716844568458972e-05, "loss": 0.5761, "step": 177905 }, { "epoch": 1.97, "learning_rate": 1.7167522957451208e-05, "loss": 0.5808, "step": 177910 }, { "epoch": 1.97, "learning_rate": 1.7166600230312693e-05, "loss": 0.6025, "step": 177915 }, { "epoch": 1.97, "learning_rate": 1.7165677503174184e-05, "loss": 0.6627, "step": 177920 }, { "epoch": 1.97, "learning_rate": 1.716475477603567e-05, "loss": 0.6734, "step": 177925 }, { "epoch": 1.97, "learning_rate": 1.7163832048897156e-05, "loss": 0.643, "step": 177930 }, { "epoch": 1.97, "learning_rate": 1.7162909321758644e-05, "loss": 0.6202, "step": 177935 }, { "epoch": 1.97, "learning_rate": 1.7161986594620132e-05, "loss": 0.6296, "step": 177940 }, { "epoch": 1.97, "learning_rate": 1.716106386748162e-05, "loss": 0.5988, "step": 177945 }, { "epoch": 1.97, "learning_rate": 1.7160141140343108e-05, "loss": 0.6379, "step": 177950 }, { "epoch": 1.97, "learning_rate": 1.7159218413204595e-05, "loss": 0.6171, "step": 177955 }, { "epoch": 1.97, "learning_rate": 1.7158295686066083e-05, "loss": 0.6049, "step": 177960 }, { "epoch": 1.97, "learning_rate": 1.715737295892757e-05, "loss": 0.5965, "step": 177965 }, { "epoch": 1.97, "learning_rate": 1.715645023178906e-05, "loss": 0.5619, "step": 177970 }, { "epoch": 1.97, "learning_rate": 1.7155527504650547e-05, "loss": 0.6285, "step": 177975 }, { "epoch": 1.97, "learning_rate": 1.7154604777512035e-05, "loss": 0.6177, "step": 177980 }, { "epoch": 1.97, "learning_rate": 1.715368205037352e-05, "loss": 0.6346, "step": 177985 }, { "epoch": 1.97, "learning_rate": 1.7152759323235007e-05, "loss": 0.6405, "step": 177990 }, { "epoch": 1.97, "learning_rate": 1.7151836596096495e-05, "loss": 0.5991, "step": 177995 }, { "epoch": 1.97, "learning_rate": 1.7150913868957983e-05, "loss": 0.6436, "step": 178000 }, { "epoch": 1.97, "eval_loss": 0.5964639186859131, "eval_runtime": 69.265, "eval_samples_per_second": 28.875, "eval_steps_per_second": 14.437, "step": 178000 }, { "epoch": 1.97, "learning_rate": 1.714999114181947e-05, "loss": 0.6718, "step": 178005 }, { "epoch": 1.97, "learning_rate": 1.714906841468096e-05, "loss": 0.6307, "step": 178010 }, { "epoch": 1.97, "learning_rate": 1.7148145687542446e-05, "loss": 0.636, "step": 178015 }, { "epoch": 1.97, "learning_rate": 1.7147222960403934e-05, "loss": 0.6144, "step": 178020 }, { "epoch": 1.97, "learning_rate": 1.7146300233265422e-05, "loss": 0.5964, "step": 178025 }, { "epoch": 1.97, "learning_rate": 1.714537750612691e-05, "loss": 0.6712, "step": 178030 }, { "epoch": 1.97, "learning_rate": 1.7144454778988398e-05, "loss": 0.5766, "step": 178035 }, { "epoch": 1.97, "learning_rate": 1.7143532051849882e-05, "loss": 0.6168, "step": 178040 }, { "epoch": 1.97, "learning_rate": 1.7142609324711373e-05, "loss": 0.5956, "step": 178045 }, { "epoch": 1.97, "learning_rate": 1.7141686597572858e-05, "loss": 0.6029, "step": 178050 }, { "epoch": 1.97, "learning_rate": 1.714076387043435e-05, "loss": 0.6528, "step": 178055 }, { "epoch": 1.97, "learning_rate": 1.7139841143295834e-05, "loss": 0.5419, "step": 178060 }, { "epoch": 1.97, "learning_rate": 1.713891841615732e-05, "loss": 0.6016, "step": 178065 }, { "epoch": 1.97, "learning_rate": 1.713799568901881e-05, "loss": 0.6246, "step": 178070 }, { "epoch": 1.97, "learning_rate": 1.7137072961880297e-05, "loss": 0.6318, "step": 178075 }, { "epoch": 1.97, "learning_rate": 1.7136150234741785e-05, "loss": 0.6636, "step": 178080 }, { "epoch": 1.97, "learning_rate": 1.7135227507603273e-05, "loss": 0.5952, "step": 178085 }, { "epoch": 1.97, "learning_rate": 1.713430478046476e-05, "loss": 0.649, "step": 178090 }, { "epoch": 1.97, "learning_rate": 1.713338205332625e-05, "loss": 0.623, "step": 178095 }, { "epoch": 1.97, "learning_rate": 1.7132459326187736e-05, "loss": 0.6112, "step": 178100 }, { "epoch": 1.97, "learning_rate": 1.713153659904922e-05, "loss": 0.5609, "step": 178105 }, { "epoch": 1.97, "learning_rate": 1.7130613871910712e-05, "loss": 0.5663, "step": 178110 }, { "epoch": 1.97, "learning_rate": 1.7129691144772196e-05, "loss": 0.5922, "step": 178115 }, { "epoch": 1.97, "learning_rate": 1.7128768417633688e-05, "loss": 0.6306, "step": 178120 }, { "epoch": 1.97, "learning_rate": 1.7127845690495172e-05, "loss": 0.5927, "step": 178125 }, { "epoch": 1.97, "learning_rate": 1.7126922963356663e-05, "loss": 0.594, "step": 178130 }, { "epoch": 1.97, "learning_rate": 1.7126000236218148e-05, "loss": 0.5022, "step": 178135 }, { "epoch": 1.97, "learning_rate": 1.7125077509079636e-05, "loss": 0.6795, "step": 178140 }, { "epoch": 1.97, "learning_rate": 1.7124154781941124e-05, "loss": 0.6514, "step": 178145 }, { "epoch": 1.97, "learning_rate": 1.712323205480261e-05, "loss": 0.6323, "step": 178150 }, { "epoch": 1.97, "learning_rate": 1.71223093276641e-05, "loss": 0.6098, "step": 178155 }, { "epoch": 1.97, "learning_rate": 1.7121386600525584e-05, "loss": 0.6405, "step": 178160 }, { "epoch": 1.97, "learning_rate": 1.7120463873387075e-05, "loss": 0.5793, "step": 178165 }, { "epoch": 1.97, "learning_rate": 1.711954114624856e-05, "loss": 0.6352, "step": 178170 }, { "epoch": 1.97, "learning_rate": 1.711861841911005e-05, "loss": 0.6406, "step": 178175 }, { "epoch": 1.97, "learning_rate": 1.7117695691971535e-05, "loss": 0.6461, "step": 178180 }, { "epoch": 1.97, "learning_rate": 1.7116772964833026e-05, "loss": 0.6773, "step": 178185 }, { "epoch": 1.97, "learning_rate": 1.711585023769451e-05, "loss": 0.613, "step": 178190 }, { "epoch": 1.97, "learning_rate": 1.7114927510556e-05, "loss": 0.626, "step": 178195 }, { "epoch": 1.97, "learning_rate": 1.7114004783417486e-05, "loss": 0.6342, "step": 178200 }, { "epoch": 1.97, "learning_rate": 1.7113082056278974e-05, "loss": 0.6278, "step": 178205 }, { "epoch": 1.97, "learning_rate": 1.7112159329140462e-05, "loss": 0.5921, "step": 178210 }, { "epoch": 1.97, "learning_rate": 1.711123660200195e-05, "loss": 0.6277, "step": 178215 }, { "epoch": 1.97, "learning_rate": 1.7110313874863438e-05, "loss": 0.587, "step": 178220 }, { "epoch": 1.97, "learning_rate": 1.7109391147724922e-05, "loss": 0.5929, "step": 178225 }, { "epoch": 1.97, "learning_rate": 1.7108468420586414e-05, "loss": 0.6367, "step": 178230 }, { "epoch": 1.97, "learning_rate": 1.7107545693447898e-05, "loss": 0.6026, "step": 178235 }, { "epoch": 1.97, "learning_rate": 1.710662296630939e-05, "loss": 0.5827, "step": 178240 }, { "epoch": 1.97, "learning_rate": 1.7105700239170874e-05, "loss": 0.5918, "step": 178245 }, { "epoch": 1.97, "learning_rate": 1.7104777512032365e-05, "loss": 0.6016, "step": 178250 }, { "epoch": 1.97, "learning_rate": 1.710385478489385e-05, "loss": 0.5633, "step": 178255 }, { "epoch": 1.97, "learning_rate": 1.7102932057755337e-05, "loss": 0.5806, "step": 178260 }, { "epoch": 1.97, "learning_rate": 1.7102009330616825e-05, "loss": 0.6022, "step": 178265 }, { "epoch": 1.97, "learning_rate": 1.7101086603478313e-05, "loss": 0.6293, "step": 178270 }, { "epoch": 1.97, "learning_rate": 1.71001638763398e-05, "loss": 0.5934, "step": 178275 }, { "epoch": 1.97, "learning_rate": 1.709924114920129e-05, "loss": 0.6902, "step": 178280 }, { "epoch": 1.97, "learning_rate": 1.7098318422062777e-05, "loss": 0.5678, "step": 178285 }, { "epoch": 1.97, "learning_rate": 1.7097395694924264e-05, "loss": 0.6505, "step": 178290 }, { "epoch": 1.97, "learning_rate": 1.7096472967785752e-05, "loss": 0.65, "step": 178295 }, { "epoch": 1.97, "learning_rate": 1.7095550240647237e-05, "loss": 0.6215, "step": 178300 }, { "epoch": 1.97, "learning_rate": 1.7094627513508728e-05, "loss": 0.5807, "step": 178305 }, { "epoch": 1.97, "learning_rate": 1.7093704786370212e-05, "loss": 0.613, "step": 178310 }, { "epoch": 1.97, "learning_rate": 1.70927820592317e-05, "loss": 0.5913, "step": 178315 }, { "epoch": 1.97, "learning_rate": 1.7091859332093188e-05, "loss": 0.5895, "step": 178320 }, { "epoch": 1.97, "learning_rate": 1.7090936604954676e-05, "loss": 0.625, "step": 178325 }, { "epoch": 1.97, "learning_rate": 1.7090013877816164e-05, "loss": 0.6111, "step": 178330 }, { "epoch": 1.97, "learning_rate": 1.708909115067765e-05, "loss": 0.5941, "step": 178335 }, { "epoch": 1.97, "learning_rate": 1.708816842353914e-05, "loss": 0.5363, "step": 178340 }, { "epoch": 1.97, "learning_rate": 1.7087245696400627e-05, "loss": 0.6155, "step": 178345 }, { "epoch": 1.97, "learning_rate": 1.7086322969262115e-05, "loss": 0.5468, "step": 178350 }, { "epoch": 1.97, "learning_rate": 1.7085400242123603e-05, "loss": 0.6458, "step": 178355 }, { "epoch": 1.97, "learning_rate": 1.708447751498509e-05, "loss": 0.6616, "step": 178360 }, { "epoch": 1.97, "learning_rate": 1.708355478784658e-05, "loss": 0.6508, "step": 178365 }, { "epoch": 1.98, "learning_rate": 1.7082632060708063e-05, "loss": 0.6643, "step": 178370 }, { "epoch": 1.98, "learning_rate": 1.708170933356955e-05, "loss": 0.6619, "step": 178375 }, { "epoch": 1.98, "learning_rate": 1.708078660643104e-05, "loss": 0.6261, "step": 178380 }, { "epoch": 1.98, "learning_rate": 1.7079863879292527e-05, "loss": 0.6097, "step": 178385 }, { "epoch": 1.98, "learning_rate": 1.7078941152154015e-05, "loss": 0.6858, "step": 178390 }, { "epoch": 1.98, "learning_rate": 1.7078018425015502e-05, "loss": 0.6083, "step": 178395 }, { "epoch": 1.98, "learning_rate": 1.707709569787699e-05, "loss": 0.6104, "step": 178400 }, { "epoch": 1.98, "learning_rate": 1.7076172970738478e-05, "loss": 0.5991, "step": 178405 }, { "epoch": 1.98, "learning_rate": 1.7075250243599966e-05, "loss": 0.6035, "step": 178410 }, { "epoch": 1.98, "learning_rate": 1.7074327516461454e-05, "loss": 0.6454, "step": 178415 }, { "epoch": 1.98, "learning_rate": 1.707340478932294e-05, "loss": 0.6418, "step": 178420 }, { "epoch": 1.98, "learning_rate": 1.7072482062184426e-05, "loss": 0.6253, "step": 178425 }, { "epoch": 1.98, "learning_rate": 1.7071559335045917e-05, "loss": 0.5845, "step": 178430 }, { "epoch": 1.98, "learning_rate": 1.7070636607907402e-05, "loss": 0.5873, "step": 178435 }, { "epoch": 1.98, "learning_rate": 1.7069713880768893e-05, "loss": 0.6462, "step": 178440 }, { "epoch": 1.98, "learning_rate": 1.7068791153630378e-05, "loss": 0.613, "step": 178445 }, { "epoch": 1.98, "learning_rate": 1.7067868426491865e-05, "loss": 0.5964, "step": 178450 }, { "epoch": 1.98, "learning_rate": 1.7066945699353353e-05, "loss": 0.5636, "step": 178455 }, { "epoch": 1.98, "learning_rate": 1.706602297221484e-05, "loss": 0.6206, "step": 178460 }, { "epoch": 1.98, "learning_rate": 1.706510024507633e-05, "loss": 0.5807, "step": 178465 }, { "epoch": 1.98, "learning_rate": 1.7064177517937817e-05, "loss": 0.6101, "step": 178470 }, { "epoch": 1.98, "learning_rate": 1.7063254790799305e-05, "loss": 0.6375, "step": 178475 }, { "epoch": 1.98, "learning_rate": 1.7062332063660792e-05, "loss": 0.61, "step": 178480 }, { "epoch": 1.98, "learning_rate": 1.706140933652228e-05, "loss": 0.6618, "step": 178485 }, { "epoch": 1.98, "learning_rate": 1.7060486609383765e-05, "loss": 0.6164, "step": 178490 }, { "epoch": 1.98, "learning_rate": 1.7059563882245256e-05, "loss": 0.5724, "step": 178495 }, { "epoch": 1.98, "learning_rate": 1.705864115510674e-05, "loss": 0.5916, "step": 178500 }, { "epoch": 1.98, "learning_rate": 1.7057718427968232e-05, "loss": 0.594, "step": 178505 }, { "epoch": 1.98, "learning_rate": 1.7056795700829716e-05, "loss": 0.6752, "step": 178510 }, { "epoch": 1.98, "learning_rate": 1.7055872973691207e-05, "loss": 0.6028, "step": 178515 }, { "epoch": 1.98, "learning_rate": 1.7054950246552692e-05, "loss": 0.5788, "step": 178520 }, { "epoch": 1.98, "learning_rate": 1.705402751941418e-05, "loss": 0.6067, "step": 178525 }, { "epoch": 1.98, "learning_rate": 1.7053104792275668e-05, "loss": 0.6388, "step": 178530 }, { "epoch": 1.98, "learning_rate": 1.7052182065137155e-05, "loss": 0.5879, "step": 178535 }, { "epoch": 1.98, "learning_rate": 1.7051259337998643e-05, "loss": 0.5597, "step": 178540 }, { "epoch": 1.98, "learning_rate": 1.7050336610860128e-05, "loss": 0.5984, "step": 178545 }, { "epoch": 1.98, "learning_rate": 1.704941388372162e-05, "loss": 0.6379, "step": 178550 }, { "epoch": 1.98, "learning_rate": 1.7048491156583103e-05, "loss": 0.6362, "step": 178555 }, { "epoch": 1.98, "learning_rate": 1.7047568429444595e-05, "loss": 0.6417, "step": 178560 }, { "epoch": 1.98, "learning_rate": 1.704664570230608e-05, "loss": 0.6037, "step": 178565 }, { "epoch": 1.98, "learning_rate": 1.704572297516757e-05, "loss": 0.581, "step": 178570 }, { "epoch": 1.98, "learning_rate": 1.7044800248029055e-05, "loss": 0.6045, "step": 178575 }, { "epoch": 1.98, "learning_rate": 1.7043877520890543e-05, "loss": 0.5739, "step": 178580 }, { "epoch": 1.98, "learning_rate": 1.704295479375203e-05, "loss": 0.6085, "step": 178585 }, { "epoch": 1.98, "learning_rate": 1.704203206661352e-05, "loss": 0.6302, "step": 178590 }, { "epoch": 1.98, "learning_rate": 1.7041109339475006e-05, "loss": 0.6686, "step": 178595 }, { "epoch": 1.98, "learning_rate": 1.704018661233649e-05, "loss": 0.6436, "step": 178600 }, { "epoch": 1.98, "learning_rate": 1.7039263885197982e-05, "loss": 0.587, "step": 178605 }, { "epoch": 1.98, "learning_rate": 1.7038341158059466e-05, "loss": 0.5983, "step": 178610 }, { "epoch": 1.98, "learning_rate": 1.7037418430920958e-05, "loss": 0.6412, "step": 178615 }, { "epoch": 1.98, "learning_rate": 1.7036495703782442e-05, "loss": 0.5822, "step": 178620 }, { "epoch": 1.98, "learning_rate": 1.7035572976643933e-05, "loss": 0.5702, "step": 178625 }, { "epoch": 1.98, "learning_rate": 1.7034650249505418e-05, "loss": 0.5919, "step": 178630 }, { "epoch": 1.98, "learning_rate": 1.703372752236691e-05, "loss": 0.6206, "step": 178635 }, { "epoch": 1.98, "learning_rate": 1.7032804795228393e-05, "loss": 0.5958, "step": 178640 }, { "epoch": 1.98, "learning_rate": 1.703188206808988e-05, "loss": 0.6457, "step": 178645 }, { "epoch": 1.98, "learning_rate": 1.703095934095137e-05, "loss": 0.6202, "step": 178650 }, { "epoch": 1.98, "learning_rate": 1.7030036613812857e-05, "loss": 0.6343, "step": 178655 }, { "epoch": 1.98, "learning_rate": 1.7029113886674345e-05, "loss": 0.6204, "step": 178660 }, { "epoch": 1.98, "learning_rate": 1.7028191159535833e-05, "loss": 0.6451, "step": 178665 }, { "epoch": 1.98, "learning_rate": 1.702726843239732e-05, "loss": 0.6604, "step": 178670 }, { "epoch": 1.98, "learning_rate": 1.7026345705258805e-05, "loss": 0.5868, "step": 178675 }, { "epoch": 1.98, "learning_rate": 1.7025422978120296e-05, "loss": 0.6023, "step": 178680 }, { "epoch": 1.98, "learning_rate": 1.702450025098178e-05, "loss": 0.6209, "step": 178685 }, { "epoch": 1.98, "learning_rate": 1.7023577523843272e-05, "loss": 0.6499, "step": 178690 }, { "epoch": 1.98, "learning_rate": 1.7022654796704756e-05, "loss": 0.6192, "step": 178695 }, { "epoch": 1.98, "learning_rate": 1.7021732069566244e-05, "loss": 0.6254, "step": 178700 }, { "epoch": 1.98, "learning_rate": 1.7020809342427732e-05, "loss": 0.6112, "step": 178705 }, { "epoch": 1.98, "learning_rate": 1.701988661528922e-05, "loss": 0.6113, "step": 178710 }, { "epoch": 1.98, "learning_rate": 1.7018963888150708e-05, "loss": 0.5848, "step": 178715 }, { "epoch": 1.98, "learning_rate": 1.7018041161012196e-05, "loss": 0.6169, "step": 178720 }, { "epoch": 1.98, "learning_rate": 1.7017118433873684e-05, "loss": 0.6854, "step": 178725 }, { "epoch": 1.98, "learning_rate": 1.701619570673517e-05, "loss": 0.5773, "step": 178730 }, { "epoch": 1.98, "learning_rate": 1.701527297959666e-05, "loss": 0.6309, "step": 178735 }, { "epoch": 1.98, "learning_rate": 1.7014350252458147e-05, "loss": 0.6199, "step": 178740 }, { "epoch": 1.98, "learning_rate": 1.7013427525319635e-05, "loss": 0.6177, "step": 178745 }, { "epoch": 1.98, "learning_rate": 1.701250479818112e-05, "loss": 0.628, "step": 178750 }, { "epoch": 1.98, "learning_rate": 1.7011582071042607e-05, "loss": 0.5665, "step": 178755 }, { "epoch": 1.98, "learning_rate": 1.7010659343904095e-05, "loss": 0.6184, "step": 178760 }, { "epoch": 1.98, "learning_rate": 1.7009736616765583e-05, "loss": 0.5719, "step": 178765 }, { "epoch": 1.98, "learning_rate": 1.700881388962707e-05, "loss": 0.6042, "step": 178770 }, { "epoch": 1.98, "learning_rate": 1.700789116248856e-05, "loss": 0.6514, "step": 178775 }, { "epoch": 1.98, "learning_rate": 1.7006968435350046e-05, "loss": 0.6023, "step": 178780 }, { "epoch": 1.98, "learning_rate": 1.7006045708211534e-05, "loss": 0.5997, "step": 178785 }, { "epoch": 1.98, "learning_rate": 1.7005122981073022e-05, "loss": 0.6718, "step": 178790 }, { "epoch": 1.98, "learning_rate": 1.700420025393451e-05, "loss": 0.638, "step": 178795 }, { "epoch": 1.98, "learning_rate": 1.7003277526795998e-05, "loss": 0.6116, "step": 178800 }, { "epoch": 1.98, "learning_rate": 1.7002354799657486e-05, "loss": 0.6121, "step": 178805 }, { "epoch": 1.98, "learning_rate": 1.7001432072518974e-05, "loss": 0.5861, "step": 178810 }, { "epoch": 1.98, "learning_rate": 1.700050934538046e-05, "loss": 0.6126, "step": 178815 }, { "epoch": 1.98, "learning_rate": 1.6999586618241946e-05, "loss": 0.6111, "step": 178820 }, { "epoch": 1.98, "learning_rate": 1.6998663891103434e-05, "loss": 0.6172, "step": 178825 }, { "epoch": 1.98, "learning_rate": 1.699774116396492e-05, "loss": 0.5947, "step": 178830 }, { "epoch": 1.98, "learning_rate": 1.699681843682641e-05, "loss": 0.6901, "step": 178835 }, { "epoch": 1.98, "learning_rate": 1.6995895709687897e-05, "loss": 0.6205, "step": 178840 }, { "epoch": 1.98, "learning_rate": 1.6994972982549385e-05, "loss": 0.6629, "step": 178845 }, { "epoch": 1.98, "learning_rate": 1.6994050255410873e-05, "loss": 0.5863, "step": 178850 }, { "epoch": 1.98, "learning_rate": 1.699312752827236e-05, "loss": 0.5601, "step": 178855 }, { "epoch": 1.98, "learning_rate": 1.699220480113385e-05, "loss": 0.6501, "step": 178860 }, { "epoch": 1.98, "learning_rate": 1.6991282073995336e-05, "loss": 0.6039, "step": 178865 }, { "epoch": 1.98, "learning_rate": 1.6990359346856824e-05, "loss": 0.6043, "step": 178870 }, { "epoch": 1.98, "learning_rate": 1.698943661971831e-05, "loss": 0.6368, "step": 178875 }, { "epoch": 1.98, "learning_rate": 1.69885138925798e-05, "loss": 0.6249, "step": 178880 }, { "epoch": 1.98, "learning_rate": 1.6987591165441284e-05, "loss": 0.6074, "step": 178885 }, { "epoch": 1.98, "learning_rate": 1.6986668438302776e-05, "loss": 0.6253, "step": 178890 }, { "epoch": 1.98, "learning_rate": 1.698574571116426e-05, "loss": 0.593, "step": 178895 }, { "epoch": 1.98, "learning_rate": 1.6984822984025748e-05, "loss": 0.5882, "step": 178900 }, { "epoch": 1.98, "learning_rate": 1.6983900256887236e-05, "loss": 0.6019, "step": 178905 }, { "epoch": 1.98, "learning_rate": 1.6982977529748724e-05, "loss": 0.6099, "step": 178910 }, { "epoch": 1.98, "learning_rate": 1.698205480261021e-05, "loss": 0.5808, "step": 178915 }, { "epoch": 1.98, "learning_rate": 1.69811320754717e-05, "loss": 0.6049, "step": 178920 }, { "epoch": 1.98, "learning_rate": 1.6980209348333187e-05, "loss": 0.5901, "step": 178925 }, { "epoch": 1.98, "learning_rate": 1.6979286621194672e-05, "loss": 0.6078, "step": 178930 }, { "epoch": 1.98, "learning_rate": 1.6978363894056163e-05, "loss": 0.619, "step": 178935 }, { "epoch": 1.98, "learning_rate": 1.6977441166917647e-05, "loss": 0.6876, "step": 178940 }, { "epoch": 1.98, "learning_rate": 1.697651843977914e-05, "loss": 0.6281, "step": 178945 }, { "epoch": 1.98, "learning_rate": 1.6975595712640623e-05, "loss": 0.5813, "step": 178950 }, { "epoch": 1.98, "learning_rate": 1.6974672985502114e-05, "loss": 0.5795, "step": 178955 }, { "epoch": 1.98, "learning_rate": 1.69737502583636e-05, "loss": 0.6383, "step": 178960 }, { "epoch": 1.98, "learning_rate": 1.697282753122509e-05, "loss": 0.5951, "step": 178965 }, { "epoch": 1.98, "learning_rate": 1.6971904804086575e-05, "loss": 0.65, "step": 178970 }, { "epoch": 1.98, "learning_rate": 1.6970982076948062e-05, "loss": 0.5798, "step": 178975 }, { "epoch": 1.98, "learning_rate": 1.697005934980955e-05, "loss": 0.5895, "step": 178980 }, { "epoch": 1.98, "learning_rate": 1.6969136622671035e-05, "loss": 0.581, "step": 178985 }, { "epoch": 1.98, "learning_rate": 1.6968213895532526e-05, "loss": 0.6684, "step": 178990 }, { "epoch": 1.98, "learning_rate": 1.696729116839401e-05, "loss": 0.6119, "step": 178995 }, { "epoch": 1.98, "learning_rate": 1.69663684412555e-05, "loss": 0.6167, "step": 179000 }, { "epoch": 1.98, "eval_loss": 0.5788506865501404, "eval_runtime": 69.4395, "eval_samples_per_second": 28.802, "eval_steps_per_second": 14.401, "step": 179000 }, { "epoch": 1.98, "learning_rate": 1.6965445714116986e-05, "loss": 0.5813, "step": 179005 }, { "epoch": 1.98, "learning_rate": 1.6964522986978477e-05, "loss": 0.61, "step": 179010 }, { "epoch": 1.98, "learning_rate": 1.6963600259839962e-05, "loss": 0.62, "step": 179015 }, { "epoch": 1.98, "learning_rate": 1.6962677532701453e-05, "loss": 0.6049, "step": 179020 }, { "epoch": 1.98, "learning_rate": 1.6961754805562937e-05, "loss": 0.5854, "step": 179025 }, { "epoch": 1.98, "learning_rate": 1.6960832078424425e-05, "loss": 0.6176, "step": 179030 }, { "epoch": 1.98, "learning_rate": 1.6959909351285913e-05, "loss": 0.552, "step": 179035 }, { "epoch": 1.98, "learning_rate": 1.69589866241474e-05, "loss": 0.6355, "step": 179040 }, { "epoch": 1.98, "learning_rate": 1.695806389700889e-05, "loss": 0.5934, "step": 179045 }, { "epoch": 1.98, "learning_rate": 1.6957141169870377e-05, "loss": 0.5437, "step": 179050 }, { "epoch": 1.98, "learning_rate": 1.6956218442731865e-05, "loss": 0.6197, "step": 179055 }, { "epoch": 1.98, "learning_rate": 1.695529571559335e-05, "loss": 0.6024, "step": 179060 }, { "epoch": 1.98, "learning_rate": 1.695437298845484e-05, "loss": 0.6154, "step": 179065 }, { "epoch": 1.98, "learning_rate": 1.6953450261316325e-05, "loss": 0.645, "step": 179070 }, { "epoch": 1.98, "learning_rate": 1.6952527534177816e-05, "loss": 0.6564, "step": 179075 }, { "epoch": 1.98, "learning_rate": 1.69516048070393e-05, "loss": 0.5881, "step": 179080 }, { "epoch": 1.98, "learning_rate": 1.6950682079900788e-05, "loss": 0.6173, "step": 179085 }, { "epoch": 1.98, "learning_rate": 1.6949759352762276e-05, "loss": 0.6432, "step": 179090 }, { "epoch": 1.98, "learning_rate": 1.6948836625623764e-05, "loss": 0.6057, "step": 179095 }, { "epoch": 1.98, "learning_rate": 1.6947913898485252e-05, "loss": 0.6457, "step": 179100 }, { "epoch": 1.98, "learning_rate": 1.694699117134674e-05, "loss": 0.5926, "step": 179105 }, { "epoch": 1.98, "learning_rate": 1.6946068444208228e-05, "loss": 0.5874, "step": 179110 }, { "epoch": 1.98, "learning_rate": 1.6945145717069715e-05, "loss": 0.623, "step": 179115 }, { "epoch": 1.98, "learning_rate": 1.6944222989931203e-05, "loss": 0.6341, "step": 179120 }, { "epoch": 1.98, "learning_rate": 1.694330026279269e-05, "loss": 0.638, "step": 179125 }, { "epoch": 1.98, "learning_rate": 1.694237753565418e-05, "loss": 0.6243, "step": 179130 }, { "epoch": 1.98, "learning_rate": 1.6941454808515663e-05, "loss": 0.5578, "step": 179135 }, { "epoch": 1.98, "learning_rate": 1.694053208137715e-05, "loss": 0.5961, "step": 179140 }, { "epoch": 1.98, "learning_rate": 1.693960935423864e-05, "loss": 0.6083, "step": 179145 }, { "epoch": 1.98, "learning_rate": 1.6938686627100127e-05, "loss": 0.6595, "step": 179150 }, { "epoch": 1.98, "learning_rate": 1.6937763899961615e-05, "loss": 0.6442, "step": 179155 }, { "epoch": 1.98, "learning_rate": 1.6936841172823103e-05, "loss": 0.5767, "step": 179160 }, { "epoch": 1.98, "learning_rate": 1.693591844568459e-05, "loss": 0.5814, "step": 179165 }, { "epoch": 1.98, "learning_rate": 1.693499571854608e-05, "loss": 0.6008, "step": 179170 }, { "epoch": 1.98, "learning_rate": 1.6934072991407566e-05, "loss": 0.6239, "step": 179175 }, { "epoch": 1.98, "learning_rate": 1.6933150264269054e-05, "loss": 0.5923, "step": 179180 }, { "epoch": 1.98, "learning_rate": 1.6932227537130542e-05, "loss": 0.6431, "step": 179185 }, { "epoch": 1.98, "learning_rate": 1.693130480999203e-05, "loss": 0.5758, "step": 179190 }, { "epoch": 1.98, "learning_rate": 1.6930382082853518e-05, "loss": 0.6331, "step": 179195 }, { "epoch": 1.98, "learning_rate": 1.6929459355715005e-05, "loss": 0.6513, "step": 179200 }, { "epoch": 1.98, "learning_rate": 1.692853662857649e-05, "loss": 0.5964, "step": 179205 }, { "epoch": 1.98, "learning_rate": 1.6927613901437978e-05, "loss": 0.6142, "step": 179210 }, { "epoch": 1.98, "learning_rate": 1.6926691174299466e-05, "loss": 0.6229, "step": 179215 }, { "epoch": 1.98, "learning_rate": 1.6925768447160953e-05, "loss": 0.59, "step": 179220 }, { "epoch": 1.98, "learning_rate": 1.692484572002244e-05, "loss": 0.6405, "step": 179225 }, { "epoch": 1.98, "learning_rate": 1.692392299288393e-05, "loss": 0.5865, "step": 179230 }, { "epoch": 1.98, "learning_rate": 1.6923000265745417e-05, "loss": 0.6062, "step": 179235 }, { "epoch": 1.98, "learning_rate": 1.6922077538606905e-05, "loss": 0.6389, "step": 179240 }, { "epoch": 1.98, "learning_rate": 1.6921154811468393e-05, "loss": 0.6293, "step": 179245 }, { "epoch": 1.98, "learning_rate": 1.692023208432988e-05, "loss": 0.6123, "step": 179250 }, { "epoch": 1.98, "learning_rate": 1.691930935719137e-05, "loss": 0.5913, "step": 179255 }, { "epoch": 1.98, "learning_rate": 1.6918386630052853e-05, "loss": 0.6472, "step": 179260 }, { "epoch": 1.98, "learning_rate": 1.6917463902914344e-05, "loss": 0.5791, "step": 179265 }, { "epoch": 1.99, "learning_rate": 1.691654117577583e-05, "loss": 0.6222, "step": 179270 }, { "epoch": 1.99, "learning_rate": 1.691561844863732e-05, "loss": 0.5834, "step": 179275 }, { "epoch": 1.99, "learning_rate": 1.6914695721498804e-05, "loss": 0.5543, "step": 179280 }, { "epoch": 1.99, "learning_rate": 1.6913772994360292e-05, "loss": 0.6004, "step": 179285 }, { "epoch": 1.99, "learning_rate": 1.691285026722178e-05, "loss": 0.629, "step": 179290 }, { "epoch": 1.99, "learning_rate": 1.6911927540083268e-05, "loss": 0.6246, "step": 179295 }, { "epoch": 1.99, "learning_rate": 1.6911004812944756e-05, "loss": 0.6565, "step": 179300 }, { "epoch": 1.99, "learning_rate": 1.6910082085806243e-05, "loss": 0.6688, "step": 179305 }, { "epoch": 1.99, "learning_rate": 1.690915935866773e-05, "loss": 0.6117, "step": 179310 }, { "epoch": 1.99, "learning_rate": 1.6908236631529216e-05, "loss": 0.6319, "step": 179315 }, { "epoch": 1.99, "learning_rate": 1.6907313904390707e-05, "loss": 0.6494, "step": 179320 }, { "epoch": 1.99, "learning_rate": 1.690639117725219e-05, "loss": 0.5756, "step": 179325 }, { "epoch": 1.99, "learning_rate": 1.6905468450113683e-05, "loss": 0.5966, "step": 179330 }, { "epoch": 1.99, "learning_rate": 1.6904545722975167e-05, "loss": 0.6334, "step": 179335 }, { "epoch": 1.99, "learning_rate": 1.690362299583666e-05, "loss": 0.6038, "step": 179340 }, { "epoch": 1.99, "learning_rate": 1.6902700268698143e-05, "loss": 0.688, "step": 179345 }, { "epoch": 1.99, "learning_rate": 1.6901777541559634e-05, "loss": 0.6611, "step": 179350 }, { "epoch": 1.99, "learning_rate": 1.690085481442112e-05, "loss": 0.6414, "step": 179355 }, { "epoch": 1.99, "learning_rate": 1.6899932087282606e-05, "loss": 0.5944, "step": 179360 }, { "epoch": 1.99, "learning_rate": 1.6899009360144094e-05, "loss": 0.6472, "step": 179365 }, { "epoch": 1.99, "learning_rate": 1.689808663300558e-05, "loss": 0.6054, "step": 179370 }, { "epoch": 1.99, "learning_rate": 1.689716390586707e-05, "loss": 0.6012, "step": 179375 }, { "epoch": 1.99, "learning_rate": 1.6896241178728554e-05, "loss": 0.6245, "step": 179380 }, { "epoch": 1.99, "learning_rate": 1.6895318451590046e-05, "loss": 0.6076, "step": 179385 }, { "epoch": 1.99, "learning_rate": 1.689439572445153e-05, "loss": 0.6425, "step": 179390 }, { "epoch": 1.99, "learning_rate": 1.689347299731302e-05, "loss": 0.533, "step": 179395 }, { "epoch": 1.99, "learning_rate": 1.6892550270174506e-05, "loss": 0.6011, "step": 179400 }, { "epoch": 1.99, "learning_rate": 1.6891627543035997e-05, "loss": 0.6632, "step": 179405 }, { "epoch": 1.99, "learning_rate": 1.689070481589748e-05, "loss": 0.5966, "step": 179410 }, { "epoch": 1.99, "learning_rate": 1.688978208875897e-05, "loss": 0.6468, "step": 179415 }, { "epoch": 1.99, "learning_rate": 1.6888859361620457e-05, "loss": 0.5706, "step": 179420 }, { "epoch": 1.99, "learning_rate": 1.6887936634481945e-05, "loss": 0.6037, "step": 179425 }, { "epoch": 1.99, "learning_rate": 1.6887013907343433e-05, "loss": 0.6219, "step": 179430 }, { "epoch": 1.99, "learning_rate": 1.6886091180204917e-05, "loss": 0.5921, "step": 179435 }, { "epoch": 1.99, "learning_rate": 1.688516845306641e-05, "loss": 0.6052, "step": 179440 }, { "epoch": 1.99, "learning_rate": 1.6884245725927893e-05, "loss": 0.5852, "step": 179445 }, { "epoch": 1.99, "learning_rate": 1.6883322998789384e-05, "loss": 0.6344, "step": 179450 }, { "epoch": 1.99, "learning_rate": 1.688240027165087e-05, "loss": 0.629, "step": 179455 }, { "epoch": 1.99, "learning_rate": 1.688147754451236e-05, "loss": 0.6049, "step": 179460 }, { "epoch": 1.99, "learning_rate": 1.6880554817373844e-05, "loss": 0.6025, "step": 179465 }, { "epoch": 1.99, "learning_rate": 1.6879632090235332e-05, "loss": 0.551, "step": 179470 }, { "epoch": 1.99, "learning_rate": 1.687870936309682e-05, "loss": 0.6236, "step": 179475 }, { "epoch": 1.99, "learning_rate": 1.6877786635958308e-05, "loss": 0.606, "step": 179480 }, { "epoch": 1.99, "learning_rate": 1.6876863908819796e-05, "loss": 0.6027, "step": 179485 }, { "epoch": 1.99, "learning_rate": 1.6875941181681284e-05, "loss": 0.6699, "step": 179490 }, { "epoch": 1.99, "learning_rate": 1.687501845454277e-05, "loss": 0.5942, "step": 179495 }, { "epoch": 1.99, "learning_rate": 1.687409572740426e-05, "loss": 0.6668, "step": 179500 }, { "epoch": 1.99, "learning_rate": 1.6873173000265747e-05, "loss": 0.6403, "step": 179505 }, { "epoch": 1.99, "learning_rate": 1.6872250273127232e-05, "loss": 0.628, "step": 179510 }, { "epoch": 1.99, "learning_rate": 1.6871327545988723e-05, "loss": 0.6156, "step": 179515 }, { "epoch": 1.99, "learning_rate": 1.6870404818850207e-05, "loss": 0.6253, "step": 179520 }, { "epoch": 1.99, "learning_rate": 1.6869482091711695e-05, "loss": 0.5708, "step": 179525 }, { "epoch": 1.99, "learning_rate": 1.6868559364573183e-05, "loss": 0.6228, "step": 179530 }, { "epoch": 1.99, "learning_rate": 1.686763663743467e-05, "loss": 0.6067, "step": 179535 }, { "epoch": 1.99, "learning_rate": 1.686671391029616e-05, "loss": 0.6124, "step": 179540 }, { "epoch": 1.99, "learning_rate": 1.6865791183157647e-05, "loss": 0.6596, "step": 179545 }, { "epoch": 1.99, "learning_rate": 1.6864868456019134e-05, "loss": 0.6244, "step": 179550 }, { "epoch": 1.99, "learning_rate": 1.6863945728880622e-05, "loss": 0.6472, "step": 179555 }, { "epoch": 1.99, "learning_rate": 1.686302300174211e-05, "loss": 0.6015, "step": 179560 }, { "epoch": 1.99, "learning_rate": 1.6862100274603598e-05, "loss": 0.5978, "step": 179565 }, { "epoch": 1.99, "learning_rate": 1.6861177547465086e-05, "loss": 0.6348, "step": 179570 }, { "epoch": 1.99, "learning_rate": 1.6860254820326574e-05, "loss": 0.576, "step": 179575 }, { "epoch": 1.99, "learning_rate": 1.685933209318806e-05, "loss": 0.5368, "step": 179580 }, { "epoch": 1.99, "learning_rate": 1.6858409366049546e-05, "loss": 0.5901, "step": 179585 }, { "epoch": 1.99, "learning_rate": 1.6857486638911034e-05, "loss": 0.6096, "step": 179590 }, { "epoch": 1.99, "learning_rate": 1.6856563911772522e-05, "loss": 0.6332, "step": 179595 }, { "epoch": 1.99, "learning_rate": 1.685564118463401e-05, "loss": 0.6499, "step": 179600 }, { "epoch": 1.99, "learning_rate": 1.6854718457495497e-05, "loss": 0.5819, "step": 179605 }, { "epoch": 1.99, "learning_rate": 1.6853795730356985e-05, "loss": 0.6524, "step": 179610 }, { "epoch": 1.99, "learning_rate": 1.6852873003218473e-05, "loss": 0.6161, "step": 179615 }, { "epoch": 1.99, "learning_rate": 1.685195027607996e-05, "loss": 0.5866, "step": 179620 }, { "epoch": 1.99, "learning_rate": 1.685102754894145e-05, "loss": 0.6553, "step": 179625 }, { "epoch": 1.99, "learning_rate": 1.6850104821802937e-05, "loss": 0.6399, "step": 179630 }, { "epoch": 1.99, "learning_rate": 1.6849182094664425e-05, "loss": 0.6162, "step": 179635 }, { "epoch": 1.99, "learning_rate": 1.6848259367525912e-05, "loss": 0.6323, "step": 179640 }, { "epoch": 1.99, "learning_rate": 1.6847336640387397e-05, "loss": 0.6351, "step": 179645 }, { "epoch": 1.99, "learning_rate": 1.6846413913248888e-05, "loss": 0.6149, "step": 179650 }, { "epoch": 1.99, "learning_rate": 1.6845491186110373e-05, "loss": 0.6605, "step": 179655 }, { "epoch": 1.99, "learning_rate": 1.684456845897186e-05, "loss": 0.5637, "step": 179660 }, { "epoch": 1.99, "learning_rate": 1.6843645731833348e-05, "loss": 0.619, "step": 179665 }, { "epoch": 1.99, "learning_rate": 1.6842723004694836e-05, "loss": 0.5963, "step": 179670 }, { "epoch": 1.99, "learning_rate": 1.6841800277556324e-05, "loss": 0.6174, "step": 179675 }, { "epoch": 1.99, "learning_rate": 1.6840877550417812e-05, "loss": 0.6349, "step": 179680 }, { "epoch": 1.99, "learning_rate": 1.68399548232793e-05, "loss": 0.5668, "step": 179685 }, { "epoch": 1.99, "learning_rate": 1.6839032096140787e-05, "loss": 0.6134, "step": 179690 }, { "epoch": 1.99, "learning_rate": 1.6838109369002275e-05, "loss": 0.5923, "step": 179695 }, { "epoch": 1.99, "learning_rate": 1.683718664186376e-05, "loss": 0.6607, "step": 179700 }, { "epoch": 1.99, "learning_rate": 1.683626391472525e-05, "loss": 0.561, "step": 179705 }, { "epoch": 1.99, "learning_rate": 1.6835341187586735e-05, "loss": 0.6208, "step": 179710 }, { "epoch": 1.99, "learning_rate": 1.6834418460448227e-05, "loss": 0.6337, "step": 179715 }, { "epoch": 1.99, "learning_rate": 1.683349573330971e-05, "loss": 0.68, "step": 179720 }, { "epoch": 1.99, "learning_rate": 1.6832573006171202e-05, "loss": 0.6123, "step": 179725 }, { "epoch": 1.99, "learning_rate": 1.6831650279032687e-05, "loss": 0.5703, "step": 179730 }, { "epoch": 1.99, "learning_rate": 1.6830727551894175e-05, "loss": 0.5898, "step": 179735 }, { "epoch": 1.99, "learning_rate": 1.6829804824755663e-05, "loss": 0.6478, "step": 179740 }, { "epoch": 1.99, "learning_rate": 1.682888209761715e-05, "loss": 0.6371, "step": 179745 }, { "epoch": 1.99, "learning_rate": 1.6827959370478638e-05, "loss": 0.5853, "step": 179750 }, { "epoch": 1.99, "learning_rate": 1.6827036643340123e-05, "loss": 0.5793, "step": 179755 }, { "epoch": 1.99, "learning_rate": 1.6826113916201614e-05, "loss": 0.6252, "step": 179760 }, { "epoch": 1.99, "learning_rate": 1.68251911890631e-05, "loss": 0.5952, "step": 179765 }, { "epoch": 1.99, "learning_rate": 1.682426846192459e-05, "loss": 0.6521, "step": 179770 }, { "epoch": 1.99, "learning_rate": 1.6823345734786074e-05, "loss": 0.6004, "step": 179775 }, { "epoch": 1.99, "learning_rate": 1.6822423007647565e-05, "loss": 0.6129, "step": 179780 }, { "epoch": 1.99, "learning_rate": 1.682150028050905e-05, "loss": 0.5973, "step": 179785 }, { "epoch": 1.99, "learning_rate": 1.682057755337054e-05, "loss": 0.6157, "step": 179790 }, { "epoch": 1.99, "learning_rate": 1.6819654826232026e-05, "loss": 0.6187, "step": 179795 }, { "epoch": 1.99, "learning_rate": 1.6818732099093513e-05, "loss": 0.5942, "step": 179800 }, { "epoch": 1.99, "learning_rate": 1.6817809371955e-05, "loss": 0.6271, "step": 179805 }, { "epoch": 1.99, "learning_rate": 1.681688664481649e-05, "loss": 0.5902, "step": 179810 }, { "epoch": 1.99, "learning_rate": 1.6815963917677977e-05, "loss": 0.6493, "step": 179815 }, { "epoch": 1.99, "learning_rate": 1.681504119053946e-05, "loss": 0.5835, "step": 179820 }, { "epoch": 1.99, "learning_rate": 1.6814118463400953e-05, "loss": 0.6049, "step": 179825 }, { "epoch": 1.99, "learning_rate": 1.6813195736262437e-05, "loss": 0.6145, "step": 179830 }, { "epoch": 1.99, "learning_rate": 1.681227300912393e-05, "loss": 0.6462, "step": 179835 }, { "epoch": 1.99, "learning_rate": 1.6811350281985413e-05, "loss": 0.6108, "step": 179840 }, { "epoch": 1.99, "learning_rate": 1.6810427554846904e-05, "loss": 0.6493, "step": 179845 }, { "epoch": 1.99, "learning_rate": 1.680950482770839e-05, "loss": 0.622, "step": 179850 }, { "epoch": 1.99, "learning_rate": 1.6808582100569876e-05, "loss": 0.5966, "step": 179855 }, { "epoch": 1.99, "learning_rate": 1.6807659373431364e-05, "loss": 0.5783, "step": 179860 }, { "epoch": 1.99, "learning_rate": 1.6806736646292852e-05, "loss": 0.6543, "step": 179865 }, { "epoch": 1.99, "learning_rate": 1.680581391915434e-05, "loss": 0.5911, "step": 179870 }, { "epoch": 1.99, "learning_rate": 1.6804891192015828e-05, "loss": 0.6472, "step": 179875 }, { "epoch": 1.99, "learning_rate": 1.6803968464877316e-05, "loss": 0.5908, "step": 179880 }, { "epoch": 1.99, "learning_rate": 1.6803045737738803e-05, "loss": 0.6021, "step": 179885 }, { "epoch": 1.99, "learning_rate": 1.680212301060029e-05, "loss": 0.5859, "step": 179890 }, { "epoch": 1.99, "learning_rate": 1.6801200283461776e-05, "loss": 0.5902, "step": 179895 }, { "epoch": 1.99, "learning_rate": 1.6800277556323267e-05, "loss": 0.5813, "step": 179900 }, { "epoch": 1.99, "learning_rate": 1.679935482918475e-05, "loss": 0.5774, "step": 179905 }, { "epoch": 1.99, "learning_rate": 1.679843210204624e-05, "loss": 0.5683, "step": 179910 }, { "epoch": 1.99, "learning_rate": 1.6797509374907727e-05, "loss": 0.5669, "step": 179915 }, { "epoch": 1.99, "learning_rate": 1.6796586647769215e-05, "loss": 0.6635, "step": 179920 }, { "epoch": 1.99, "learning_rate": 1.6795663920630703e-05, "loss": 0.6436, "step": 179925 }, { "epoch": 1.99, "learning_rate": 1.679474119349219e-05, "loss": 0.5727, "step": 179930 }, { "epoch": 1.99, "learning_rate": 1.679381846635368e-05, "loss": 0.575, "step": 179935 }, { "epoch": 1.99, "learning_rate": 1.6792895739215166e-05, "loss": 0.6184, "step": 179940 }, { "epoch": 1.99, "learning_rate": 1.6791973012076654e-05, "loss": 0.636, "step": 179945 }, { "epoch": 1.99, "learning_rate": 1.6791050284938142e-05, "loss": 0.6273, "step": 179950 }, { "epoch": 1.99, "learning_rate": 1.679012755779963e-05, "loss": 0.5492, "step": 179955 }, { "epoch": 1.99, "learning_rate": 1.6789204830661118e-05, "loss": 0.706, "step": 179960 }, { "epoch": 1.99, "learning_rate": 1.6788282103522606e-05, "loss": 0.6139, "step": 179965 }, { "epoch": 1.99, "learning_rate": 1.678735937638409e-05, "loss": 0.6296, "step": 179970 }, { "epoch": 1.99, "learning_rate": 1.6786436649245578e-05, "loss": 0.6389, "step": 179975 }, { "epoch": 1.99, "learning_rate": 1.6785513922107066e-05, "loss": 0.5905, "step": 179980 }, { "epoch": 1.99, "learning_rate": 1.6784591194968554e-05, "loss": 0.6143, "step": 179985 }, { "epoch": 1.99, "learning_rate": 1.678366846783004e-05, "loss": 0.6014, "step": 179990 }, { "epoch": 1.99, "learning_rate": 1.678274574069153e-05, "loss": 0.5925, "step": 179995 }, { "epoch": 1.99, "learning_rate": 1.6781823013553017e-05, "loss": 0.5647, "step": 180000 }, { "epoch": 1.99, "eval_loss": 0.5669443607330322, "eval_runtime": 69.4233, "eval_samples_per_second": 28.809, "eval_steps_per_second": 14.404, "step": 180000 }, { "epoch": 1.99, "learning_rate": 1.6780900286414505e-05, "loss": 0.5648, "step": 180005 }, { "epoch": 1.99, "learning_rate": 1.6779977559275993e-05, "loss": 0.6154, "step": 180010 }, { "epoch": 1.99, "learning_rate": 1.677905483213748e-05, "loss": 0.6058, "step": 180015 }, { "epoch": 1.99, "learning_rate": 1.677813210499897e-05, "loss": 0.6237, "step": 180020 }, { "epoch": 1.99, "learning_rate": 1.6777209377860456e-05, "loss": 0.6106, "step": 180025 }, { "epoch": 1.99, "learning_rate": 1.677628665072194e-05, "loss": 0.6155, "step": 180030 }, { "epoch": 1.99, "learning_rate": 1.6775363923583432e-05, "loss": 0.5497, "step": 180035 }, { "epoch": 1.99, "learning_rate": 1.6774441196444917e-05, "loss": 0.6255, "step": 180040 }, { "epoch": 1.99, "learning_rate": 1.6773518469306404e-05, "loss": 0.5596, "step": 180045 }, { "epoch": 1.99, "learning_rate": 1.6772595742167892e-05, "loss": 0.5884, "step": 180050 }, { "epoch": 1.99, "learning_rate": 1.677167301502938e-05, "loss": 0.5703, "step": 180055 }, { "epoch": 1.99, "learning_rate": 1.6770750287890868e-05, "loss": 0.6135, "step": 180060 }, { "epoch": 1.99, "learning_rate": 1.6769827560752356e-05, "loss": 0.6061, "step": 180065 }, { "epoch": 1.99, "learning_rate": 1.6768904833613844e-05, "loss": 0.6213, "step": 180070 }, { "epoch": 1.99, "learning_rate": 1.676798210647533e-05, "loss": 0.6221, "step": 180075 }, { "epoch": 1.99, "learning_rate": 1.676705937933682e-05, "loss": 0.6187, "step": 180080 }, { "epoch": 1.99, "learning_rate": 1.6766136652198304e-05, "loss": 0.6485, "step": 180085 }, { "epoch": 1.99, "learning_rate": 1.6765213925059795e-05, "loss": 0.6047, "step": 180090 }, { "epoch": 1.99, "learning_rate": 1.676429119792128e-05, "loss": 0.6368, "step": 180095 }, { "epoch": 1.99, "learning_rate": 1.676336847078277e-05, "loss": 0.6314, "step": 180100 }, { "epoch": 1.99, "learning_rate": 1.6762445743644255e-05, "loss": 0.5673, "step": 180105 }, { "epoch": 1.99, "learning_rate": 1.6761523016505746e-05, "loss": 0.5656, "step": 180110 }, { "epoch": 1.99, "learning_rate": 1.676060028936723e-05, "loss": 0.654, "step": 180115 }, { "epoch": 1.99, "learning_rate": 1.675967756222872e-05, "loss": 0.5887, "step": 180120 }, { "epoch": 1.99, "learning_rate": 1.6758754835090207e-05, "loss": 0.6131, "step": 180125 }, { "epoch": 1.99, "learning_rate": 1.6757832107951694e-05, "loss": 0.5977, "step": 180130 }, { "epoch": 1.99, "learning_rate": 1.6756909380813182e-05, "loss": 0.6389, "step": 180135 }, { "epoch": 1.99, "learning_rate": 1.675598665367467e-05, "loss": 0.6004, "step": 180140 }, { "epoch": 1.99, "learning_rate": 1.6755063926536158e-05, "loss": 0.6546, "step": 180145 }, { "epoch": 1.99, "learning_rate": 1.6754141199397642e-05, "loss": 0.5806, "step": 180150 }, { "epoch": 1.99, "learning_rate": 1.6753218472259134e-05, "loss": 0.6722, "step": 180155 }, { "epoch": 1.99, "learning_rate": 1.6752295745120618e-05, "loss": 0.6076, "step": 180160 }, { "epoch": 1.99, "learning_rate": 1.675137301798211e-05, "loss": 0.5936, "step": 180165 }, { "epoch": 1.99, "learning_rate": 1.6750450290843594e-05, "loss": 0.6229, "step": 180170 }, { "epoch": 2.0, "learning_rate": 1.6749527563705085e-05, "loss": 0.6178, "step": 180175 }, { "epoch": 2.0, "learning_rate": 1.674860483656657e-05, "loss": 0.6192, "step": 180180 }, { "epoch": 2.0, "learning_rate": 1.6747682109428057e-05, "loss": 0.5797, "step": 180185 }, { "epoch": 2.0, "learning_rate": 1.6746759382289545e-05, "loss": 0.6455, "step": 180190 }, { "epoch": 2.0, "learning_rate": 1.6745836655151033e-05, "loss": 0.6011, "step": 180195 }, { "epoch": 2.0, "learning_rate": 1.674491392801252e-05, "loss": 0.6463, "step": 180200 }, { "epoch": 2.0, "learning_rate": 1.6743991200874005e-05, "loss": 0.6074, "step": 180205 }, { "epoch": 2.0, "learning_rate": 1.6743068473735497e-05, "loss": 0.6323, "step": 180210 }, { "epoch": 2.0, "learning_rate": 1.674214574659698e-05, "loss": 0.5859, "step": 180215 }, { "epoch": 2.0, "learning_rate": 1.6741223019458472e-05, "loss": 0.5683, "step": 180220 }, { "epoch": 2.0, "learning_rate": 1.6740300292319957e-05, "loss": 0.6295, "step": 180225 }, { "epoch": 2.0, "learning_rate": 1.6739377565181448e-05, "loss": 0.5837, "step": 180230 }, { "epoch": 2.0, "learning_rate": 1.6738454838042932e-05, "loss": 0.6157, "step": 180235 }, { "epoch": 2.0, "learning_rate": 1.673753211090442e-05, "loss": 0.5816, "step": 180240 }, { "epoch": 2.0, "learning_rate": 1.6736609383765908e-05, "loss": 0.6228, "step": 180245 }, { "epoch": 2.0, "learning_rate": 1.6735686656627396e-05, "loss": 0.5808, "step": 180250 }, { "epoch": 2.0, "learning_rate": 1.6734763929488884e-05, "loss": 0.5896, "step": 180255 }, { "epoch": 2.0, "learning_rate": 1.6733841202350372e-05, "loss": 0.6185, "step": 180260 }, { "epoch": 2.0, "learning_rate": 1.673291847521186e-05, "loss": 0.6346, "step": 180265 }, { "epoch": 2.0, "learning_rate": 1.6731995748073344e-05, "loss": 0.6776, "step": 180270 }, { "epoch": 2.0, "learning_rate": 1.6731073020934835e-05, "loss": 0.6515, "step": 180275 }, { "epoch": 2.0, "learning_rate": 1.673015029379632e-05, "loss": 0.6276, "step": 180280 }, { "epoch": 2.0, "learning_rate": 1.672922756665781e-05, "loss": 0.5842, "step": 180285 }, { "epoch": 2.0, "learning_rate": 1.6728304839519295e-05, "loss": 0.6359, "step": 180290 }, { "epoch": 2.0, "learning_rate": 1.6727382112380787e-05, "loss": 0.5823, "step": 180295 }, { "epoch": 2.0, "learning_rate": 1.672645938524227e-05, "loss": 0.5896, "step": 180300 }, { "epoch": 2.0, "learning_rate": 1.672553665810376e-05, "loss": 0.5975, "step": 180305 }, { "epoch": 2.0, "learning_rate": 1.6724613930965247e-05, "loss": 0.5812, "step": 180310 }, { "epoch": 2.0, "learning_rate": 1.6723691203826735e-05, "loss": 0.5782, "step": 180315 }, { "epoch": 2.0, "learning_rate": 1.6722768476688223e-05, "loss": 0.6085, "step": 180320 }, { "epoch": 2.0, "learning_rate": 1.672184574954971e-05, "loss": 0.6178, "step": 180325 }, { "epoch": 2.0, "learning_rate": 1.6720923022411198e-05, "loss": 0.5508, "step": 180330 }, { "epoch": 2.0, "learning_rate": 1.6720000295272686e-05, "loss": 0.657, "step": 180335 }, { "epoch": 2.0, "learning_rate": 1.6719077568134174e-05, "loss": 0.6451, "step": 180340 }, { "epoch": 2.0, "learning_rate": 1.671815484099566e-05, "loss": 0.6353, "step": 180345 }, { "epoch": 2.0, "learning_rate": 1.671723211385715e-05, "loss": 0.6226, "step": 180350 }, { "epoch": 2.0, "learning_rate": 1.6716309386718634e-05, "loss": 0.6151, "step": 180355 }, { "epoch": 2.0, "learning_rate": 1.6715386659580122e-05, "loss": 0.6023, "step": 180360 }, { "epoch": 2.0, "learning_rate": 1.671446393244161e-05, "loss": 0.5551, "step": 180365 }, { "epoch": 2.0, "learning_rate": 1.6713541205303098e-05, "loss": 0.5761, "step": 180370 }, { "epoch": 2.0, "learning_rate": 1.6712618478164585e-05, "loss": 0.6223, "step": 180375 }, { "epoch": 2.0, "learning_rate": 1.6711695751026073e-05, "loss": 0.6359, "step": 180380 }, { "epoch": 2.0, "learning_rate": 1.671077302388756e-05, "loss": 0.5638, "step": 180385 }, { "epoch": 2.0, "learning_rate": 1.670985029674905e-05, "loss": 0.5899, "step": 180390 }, { "epoch": 2.0, "learning_rate": 1.6708927569610537e-05, "loss": 0.6664, "step": 180395 }, { "epoch": 2.0, "learning_rate": 1.6708004842472025e-05, "loss": 0.5898, "step": 180400 }, { "epoch": 2.0, "learning_rate": 1.6707082115333513e-05, "loss": 0.6095, "step": 180405 }, { "epoch": 2.0, "learning_rate": 1.6706159388195e-05, "loss": 0.6528, "step": 180410 }, { "epoch": 2.0, "learning_rate": 1.6705236661056485e-05, "loss": 0.6236, "step": 180415 }, { "epoch": 2.0, "learning_rate": 1.6704313933917973e-05, "loss": 0.5888, "step": 180420 }, { "epoch": 2.0, "learning_rate": 1.670339120677946e-05, "loss": 0.6123, "step": 180425 }, { "epoch": 2.0, "learning_rate": 1.670246847964095e-05, "loss": 0.6376, "step": 180430 }, { "epoch": 2.0, "learning_rate": 1.6701545752502436e-05, "loss": 0.6435, "step": 180435 }, { "epoch": 2.0, "learning_rate": 1.6700623025363924e-05, "loss": 0.6342, "step": 180440 }, { "epoch": 2.0, "learning_rate": 1.6699700298225412e-05, "loss": 0.6172, "step": 180445 }, { "epoch": 2.0, "learning_rate": 1.66987775710869e-05, "loss": 0.5778, "step": 180450 }, { "epoch": 2.0, "learning_rate": 1.6697854843948388e-05, "loss": 0.5702, "step": 180455 }, { "epoch": 2.0, "learning_rate": 1.6696932116809876e-05, "loss": 0.6484, "step": 180460 }, { "epoch": 2.0, "learning_rate": 1.6696009389671363e-05, "loss": 0.6019, "step": 180465 }, { "epoch": 2.0, "learning_rate": 1.6695086662532848e-05, "loss": 0.6168, "step": 180470 }, { "epoch": 2.0, "learning_rate": 1.669416393539434e-05, "loss": 0.6509, "step": 180475 }, { "epoch": 2.0, "learning_rate": 1.6693241208255824e-05, "loss": 0.6125, "step": 180480 }, { "epoch": 2.0, "learning_rate": 1.6692318481117315e-05, "loss": 0.5613, "step": 180485 }, { "epoch": 2.0, "learning_rate": 1.66913957539788e-05, "loss": 0.5954, "step": 180490 }, { "epoch": 2.0, "learning_rate": 1.6690473026840287e-05, "loss": 0.674, "step": 180495 }, { "epoch": 2.0, "learning_rate": 1.6689550299701775e-05, "loss": 0.5703, "step": 180500 }, { "epoch": 2.0, "learning_rate": 1.6688627572563263e-05, "loss": 0.5901, "step": 180505 }, { "epoch": 2.0, "learning_rate": 1.668770484542475e-05, "loss": 0.6302, "step": 180510 }, { "epoch": 2.0, "learning_rate": 1.668678211828624e-05, "loss": 0.6051, "step": 180515 }, { "epoch": 2.0, "learning_rate": 1.6685859391147726e-05, "loss": 0.5515, "step": 180520 }, { "epoch": 2.0, "learning_rate": 1.6684936664009214e-05, "loss": 0.5921, "step": 180525 }, { "epoch": 2.0, "learning_rate": 1.6684013936870702e-05, "loss": 0.6365, "step": 180530 }, { "epoch": 2.0, "learning_rate": 1.6683091209732186e-05, "loss": 0.5872, "step": 180535 }, { "epoch": 2.0, "learning_rate": 1.6682168482593678e-05, "loss": 0.5861, "step": 180540 }, { "epoch": 2.0, "learning_rate": 1.6681245755455162e-05, "loss": 0.6118, "step": 180545 }, { "epoch": 2.0, "learning_rate": 1.6680323028316653e-05, "loss": 0.6325, "step": 180550 }, { "epoch": 2.0, "learning_rate": 1.6679400301178138e-05, "loss": 0.6459, "step": 180555 }, { "epoch": 2.0, "learning_rate": 1.667847757403963e-05, "loss": 0.5969, "step": 180560 }, { "epoch": 2.0, "learning_rate": 1.6677554846901114e-05, "loss": 0.625, "step": 180565 }, { "epoch": 2.0, "learning_rate": 1.66766321197626e-05, "loss": 0.6376, "step": 180570 }, { "epoch": 2.0, "learning_rate": 1.667570939262409e-05, "loss": 0.6004, "step": 180575 }, { "epoch": 2.0, "learning_rate": 1.6674786665485577e-05, "loss": 0.618, "step": 180580 }, { "epoch": 2.0, "learning_rate": 1.6673863938347065e-05, "loss": 0.6064, "step": 180585 }, { "epoch": 2.0, "learning_rate": 1.667294121120855e-05, "loss": 0.6455, "step": 180590 }, { "epoch": 2.0, "learning_rate": 1.667201848407004e-05, "loss": 0.5723, "step": 180595 }, { "epoch": 2.0, "learning_rate": 1.6671095756931525e-05, "loss": 0.5994, "step": 180600 }, { "epoch": 2.0, "learning_rate": 1.6670173029793016e-05, "loss": 0.6083, "step": 180605 }, { "epoch": 2.0, "learning_rate": 1.66692503026545e-05, "loss": 0.6063, "step": 180610 }, { "epoch": 2.0, "learning_rate": 1.6668327575515992e-05, "loss": 0.6517, "step": 180615 }, { "epoch": 2.0, "learning_rate": 1.6667404848377477e-05, "loss": 0.5714, "step": 180620 }, { "epoch": 2.0, "learning_rate": 1.6666482121238964e-05, "loss": 0.5762, "step": 180625 }, { "epoch": 2.0, "learning_rate": 1.6665559394100452e-05, "loss": 0.6078, "step": 180630 }, { "epoch": 2.0, "learning_rate": 1.666463666696194e-05, "loss": 0.6374, "step": 180635 }, { "epoch": 2.0, "learning_rate": 1.6663713939823428e-05, "loss": 0.6191, "step": 180640 }, { "epoch": 2.0, "learning_rate": 1.6662791212684912e-05, "loss": 0.615, "step": 180645 }, { "epoch": 2.0, "learning_rate": 1.6661868485546404e-05, "loss": 0.5843, "step": 180650 }, { "epoch": 2.0, "learning_rate": 1.6660945758407888e-05, "loss": 0.6196, "step": 180655 }, { "epoch": 2.0, "learning_rate": 1.666002303126938e-05, "loss": 0.629, "step": 180660 }, { "epoch": 2.0, "learning_rate": 1.6659100304130864e-05, "loss": 0.5478, "step": 180665 }, { "epoch": 2.0, "learning_rate": 1.6658177576992355e-05, "loss": 0.6454, "step": 180670 }, { "epoch": 2.0, "learning_rate": 1.665725484985384e-05, "loss": 0.5976, "step": 180675 }, { "epoch": 2.0, "learning_rate": 1.665633212271533e-05, "loss": 0.6169, "step": 180680 }, { "epoch": 2.0, "learning_rate": 1.6655409395576815e-05, "loss": 0.5592, "step": 180685 }, { "epoch": 2.0, "learning_rate": 1.6654486668438303e-05, "loss": 0.6251, "step": 180690 }, { "epoch": 2.0, "learning_rate": 1.665356394129979e-05, "loss": 0.5856, "step": 180695 }, { "epoch": 2.0, "learning_rate": 1.665264121416128e-05, "loss": 0.6346, "step": 180700 }, { "epoch": 2.0, "learning_rate": 1.6651718487022767e-05, "loss": 0.603, "step": 180705 }, { "epoch": 2.0, "learning_rate": 1.6650795759884254e-05, "loss": 0.5521, "step": 180710 }, { "epoch": 2.0, "learning_rate": 1.6649873032745742e-05, "loss": 0.6529, "step": 180715 }, { "epoch": 2.0, "learning_rate": 1.664895030560723e-05, "loss": 0.6073, "step": 180720 }, { "epoch": 2.0, "learning_rate": 1.6648027578468718e-05, "loss": 0.5988, "step": 180725 }, { "epoch": 2.0, "learning_rate": 1.6647104851330202e-05, "loss": 0.5856, "step": 180730 }, { "epoch": 2.0, "learning_rate": 1.6646182124191694e-05, "loss": 0.5625, "step": 180735 }, { "epoch": 2.0, "learning_rate": 1.6645259397053178e-05, "loss": 0.6029, "step": 180740 }, { "epoch": 2.0, "learning_rate": 1.6644336669914666e-05, "loss": 0.6122, "step": 180745 }, { "epoch": 2.0, "learning_rate": 1.6643413942776154e-05, "loss": 0.6172, "step": 180750 }, { "epoch": 2.0, "learning_rate": 1.664249121563764e-05, "loss": 0.6162, "step": 180755 }, { "epoch": 2.0, "learning_rate": 1.664156848849913e-05, "loss": 0.5482, "step": 180760 }, { "epoch": 2.0, "learning_rate": 1.6640645761360617e-05, "loss": 0.6209, "step": 180765 }, { "epoch": 2.0, "learning_rate": 1.6639723034222105e-05, "loss": 0.5345, "step": 180770 }, { "epoch": 2.0, "learning_rate": 1.6638800307083593e-05, "loss": 0.6048, "step": 180775 }, { "epoch": 2.0, "learning_rate": 1.663787757994508e-05, "loss": 0.6011, "step": 180780 }, { "epoch": 2.0, "learning_rate": 1.663695485280657e-05, "loss": 0.6352, "step": 180785 }, { "epoch": 2.0, "learning_rate": 1.6636032125668057e-05, "loss": 0.5537, "step": 180790 }, { "epoch": 2.0, "learning_rate": 1.6635109398529544e-05, "loss": 0.6292, "step": 180795 }, { "epoch": 2.0, "learning_rate": 1.663418667139103e-05, "loss": 0.6351, "step": 180800 }, { "epoch": 2.0, "learning_rate": 1.6633263944252517e-05, "loss": 0.6276, "step": 180805 }, { "epoch": 2.0, "learning_rate": 1.6632341217114005e-05, "loss": 0.6005, "step": 180810 }, { "epoch": 2.0, "learning_rate": 1.6631418489975492e-05, "loss": 0.6066, "step": 180815 }, { "epoch": 2.0, "learning_rate": 1.663049576283698e-05, "loss": 0.6038, "step": 180820 }, { "epoch": 2.0, "learning_rate": 1.6629573035698468e-05, "loss": 0.6732, "step": 180825 }, { "epoch": 2.0, "learning_rate": 1.6628650308559956e-05, "loss": 0.6196, "step": 180830 }, { "epoch": 2.0, "learning_rate": 1.6627727581421444e-05, "loss": 0.6068, "step": 180835 }, { "epoch": 2.0, "learning_rate": 1.662680485428293e-05, "loss": 0.6071, "step": 180840 }, { "epoch": 2.0, "learning_rate": 1.662588212714442e-05, "loss": 0.5612, "step": 180845 }, { "epoch": 2.0, "learning_rate": 1.6624959400005907e-05, "loss": 0.6085, "step": 180850 }, { "epoch": 2.0, "learning_rate": 1.6624036672867392e-05, "loss": 0.6304, "step": 180855 }, { "epoch": 2.0, "learning_rate": 1.6623113945728883e-05, "loss": 0.5942, "step": 180860 }, { "epoch": 2.0, "learning_rate": 1.6622191218590368e-05, "loss": 0.5859, "step": 180865 }, { "epoch": 2.0, "learning_rate": 1.662126849145186e-05, "loss": 0.6007, "step": 180870 }, { "epoch": 2.0, "learning_rate": 1.6620345764313343e-05, "loss": 0.6105, "step": 180875 }, { "epoch": 2.0, "learning_rate": 1.661942303717483e-05, "loss": 0.5951, "step": 180880 }, { "epoch": 2.0, "learning_rate": 1.661850031003632e-05, "loss": 0.5741, "step": 180885 }, { "epoch": 2.0, "learning_rate": 1.6617577582897807e-05, "loss": 0.5749, "step": 180890 }, { "epoch": 2.0, "learning_rate": 1.6616654855759295e-05, "loss": 0.5745, "step": 180895 }, { "epoch": 2.0, "learning_rate": 1.6615732128620782e-05, "loss": 0.6067, "step": 180900 }, { "epoch": 2.0, "learning_rate": 1.661480940148227e-05, "loss": 0.6103, "step": 180905 }, { "epoch": 2.0, "learning_rate": 1.6613886674343758e-05, "loss": 0.5799, "step": 180910 }, { "epoch": 2.0, "learning_rate": 1.6612963947205246e-05, "loss": 0.6257, "step": 180915 }, { "epoch": 2.0, "learning_rate": 1.661204122006673e-05, "loss": 0.6263, "step": 180920 }, { "epoch": 2.0, "learning_rate": 1.6611118492928222e-05, "loss": 0.5685, "step": 180925 }, { "epoch": 2.0, "learning_rate": 1.6610195765789706e-05, "loss": 0.5983, "step": 180930 }, { "epoch": 2.0, "learning_rate": 1.6609273038651197e-05, "loss": 0.6509, "step": 180935 }, { "epoch": 2.0, "learning_rate": 1.6608350311512682e-05, "loss": 0.6145, "step": 180940 }, { "epoch": 2.0, "learning_rate": 1.6607427584374173e-05, "loss": 0.6261, "step": 180945 }, { "epoch": 2.0, "learning_rate": 1.6606504857235658e-05, "loss": 0.5989, "step": 180950 }, { "epoch": 2.0, "learning_rate": 1.6605582130097145e-05, "loss": 0.5952, "step": 180955 }, { "epoch": 2.0, "learning_rate": 1.6604659402958633e-05, "loss": 0.5622, "step": 180960 }, { "epoch": 2.0, "learning_rate": 1.660373667582012e-05, "loss": 0.5788, "step": 180965 }, { "epoch": 2.0, "learning_rate": 1.660281394868161e-05, "loss": 0.573, "step": 180970 }, { "epoch": 2.0, "learning_rate": 1.6601891221543093e-05, "loss": 0.6388, "step": 180975 }, { "epoch": 2.0, "learning_rate": 1.6600968494404585e-05, "loss": 0.5986, "step": 180980 }, { "epoch": 2.0, "learning_rate": 1.660004576726607e-05, "loss": 0.6195, "step": 180985 }, { "epoch": 2.0, "learning_rate": 1.659912304012756e-05, "loss": 0.6074, "step": 180990 }, { "epoch": 2.0, "learning_rate": 1.6598200312989045e-05, "loss": 0.6011, "step": 180995 }, { "epoch": 2.0, "learning_rate": 1.6597277585850536e-05, "loss": 0.6038, "step": 181000 }, { "epoch": 2.0, "eval_loss": 0.6008647680282593, "eval_runtime": 69.3154, "eval_samples_per_second": 28.854, "eval_steps_per_second": 14.427, "step": 181000 }, { "epoch": 2.0, "learning_rate": 1.659635485871202e-05, "loss": 0.5998, "step": 181005 }, { "epoch": 2.0, "learning_rate": 1.659543213157351e-05, "loss": 0.6277, "step": 181010 }, { "epoch": 2.0, "learning_rate": 1.6594509404434996e-05, "loss": 0.6855, "step": 181015 }, { "epoch": 2.0, "learning_rate": 1.6593586677296484e-05, "loss": 0.5904, "step": 181020 }, { "epoch": 2.0, "learning_rate": 1.6592663950157972e-05, "loss": 0.6921, "step": 181025 }, { "epoch": 2.0, "learning_rate": 1.6591741223019456e-05, "loss": 0.5998, "step": 181030 }, { "epoch": 2.0, "learning_rate": 1.6590818495880948e-05, "loss": 0.5858, "step": 181035 }, { "epoch": 2.0, "learning_rate": 1.6589895768742432e-05, "loss": 0.6081, "step": 181040 }, { "epoch": 2.0, "learning_rate": 1.6588973041603923e-05, "loss": 0.5869, "step": 181045 }, { "epoch": 2.0, "learning_rate": 1.6588050314465408e-05, "loss": 0.6009, "step": 181050 }, { "epoch": 2.0, "learning_rate": 1.65871275873269e-05, "loss": 0.6739, "step": 181055 }, { "epoch": 2.0, "learning_rate": 1.6586204860188383e-05, "loss": 0.5798, "step": 181060 }, { "epoch": 2.0, "learning_rate": 1.6585282133049875e-05, "loss": 0.6148, "step": 181065 }, { "epoch": 2.0, "learning_rate": 1.658435940591136e-05, "loss": 0.5413, "step": 181070 }, { "epoch": 2.0, "learning_rate": 1.6583436678772847e-05, "loss": 0.599, "step": 181075 }, { "epoch": 2.01, "learning_rate": 1.6582513951634335e-05, "loss": 0.56, "step": 181080 }, { "epoch": 2.01, "learning_rate": 1.6581591224495823e-05, "loss": 0.6106, "step": 181085 }, { "epoch": 2.01, "learning_rate": 1.658066849735731e-05, "loss": 0.6777, "step": 181090 }, { "epoch": 2.01, "learning_rate": 1.65797457702188e-05, "loss": 0.6606, "step": 181095 }, { "epoch": 2.01, "learning_rate": 1.6578823043080286e-05, "loss": 0.6281, "step": 181100 }, { "epoch": 2.01, "learning_rate": 1.657790031594177e-05, "loss": 0.641, "step": 181105 }, { "epoch": 2.01, "learning_rate": 1.6576977588803262e-05, "loss": 0.6371, "step": 181110 }, { "epoch": 2.01, "learning_rate": 1.6576054861664746e-05, "loss": 0.5994, "step": 181115 }, { "epoch": 2.01, "learning_rate": 1.6575132134526238e-05, "loss": 0.6073, "step": 181120 }, { "epoch": 2.01, "learning_rate": 1.6574209407387722e-05, "loss": 0.5363, "step": 181125 }, { "epoch": 2.01, "learning_rate": 1.657328668024921e-05, "loss": 0.6394, "step": 181130 }, { "epoch": 2.01, "learning_rate": 1.6572363953110698e-05, "loss": 0.572, "step": 181135 }, { "epoch": 2.01, "learning_rate": 1.6571441225972186e-05, "loss": 0.594, "step": 181140 }, { "epoch": 2.01, "learning_rate": 1.6570518498833674e-05, "loss": 0.6102, "step": 181145 }, { "epoch": 2.01, "learning_rate": 1.656959577169516e-05, "loss": 0.6004, "step": 181150 }, { "epoch": 2.01, "learning_rate": 1.656867304455665e-05, "loss": 0.5976, "step": 181155 }, { "epoch": 2.01, "learning_rate": 1.6567750317418137e-05, "loss": 0.6365, "step": 181160 }, { "epoch": 2.01, "learning_rate": 1.6566827590279625e-05, "loss": 0.5996, "step": 181165 }, { "epoch": 2.01, "learning_rate": 1.6565904863141113e-05, "loss": 0.6286, "step": 181170 }, { "epoch": 2.01, "learning_rate": 1.65649821360026e-05, "loss": 0.6312, "step": 181175 }, { "epoch": 2.01, "learning_rate": 1.6564059408864085e-05, "loss": 0.5955, "step": 181180 }, { "epoch": 2.01, "learning_rate": 1.6563136681725573e-05, "loss": 0.606, "step": 181185 }, { "epoch": 2.01, "learning_rate": 1.656221395458706e-05, "loss": 0.6101, "step": 181190 }, { "epoch": 2.01, "learning_rate": 1.656129122744855e-05, "loss": 0.6151, "step": 181195 }, { "epoch": 2.01, "learning_rate": 1.6560368500310036e-05, "loss": 0.6156, "step": 181200 }, { "epoch": 2.01, "learning_rate": 1.6559445773171524e-05, "loss": 0.5954, "step": 181205 }, { "epoch": 2.01, "learning_rate": 1.6558523046033012e-05, "loss": 0.6089, "step": 181210 }, { "epoch": 2.01, "learning_rate": 1.65576003188945e-05, "loss": 0.5629, "step": 181215 }, { "epoch": 2.01, "learning_rate": 1.6556677591755988e-05, "loss": 0.5883, "step": 181220 }, { "epoch": 2.01, "learning_rate": 1.6555754864617476e-05, "loss": 0.6205, "step": 181225 }, { "epoch": 2.01, "learning_rate": 1.6554832137478964e-05, "loss": 0.6067, "step": 181230 }, { "epoch": 2.01, "learning_rate": 1.655390941034045e-05, "loss": 0.6481, "step": 181235 }, { "epoch": 2.01, "learning_rate": 1.655298668320194e-05, "loss": 0.5694, "step": 181240 }, { "epoch": 2.01, "learning_rate": 1.6552063956063427e-05, "loss": 0.5734, "step": 181245 }, { "epoch": 2.01, "learning_rate": 1.655114122892491e-05, "loss": 0.5793, "step": 181250 }, { "epoch": 2.01, "learning_rate": 1.65502185017864e-05, "loss": 0.588, "step": 181255 }, { "epoch": 2.01, "learning_rate": 1.6549295774647887e-05, "loss": 0.6332, "step": 181260 }, { "epoch": 2.01, "learning_rate": 1.6548373047509375e-05, "loss": 0.6189, "step": 181265 }, { "epoch": 2.01, "learning_rate": 1.6547450320370863e-05, "loss": 0.6121, "step": 181270 }, { "epoch": 2.01, "learning_rate": 1.654652759323235e-05, "loss": 0.6651, "step": 181275 }, { "epoch": 2.01, "learning_rate": 1.654560486609384e-05, "loss": 0.5753, "step": 181280 }, { "epoch": 2.01, "learning_rate": 1.6544682138955327e-05, "loss": 0.602, "step": 181285 }, { "epoch": 2.01, "learning_rate": 1.6543759411816814e-05, "loss": 0.5872, "step": 181290 }, { "epoch": 2.01, "learning_rate": 1.6542836684678302e-05, "loss": 0.5977, "step": 181295 }, { "epoch": 2.01, "learning_rate": 1.654191395753979e-05, "loss": 0.554, "step": 181300 }, { "epoch": 2.01, "learning_rate": 1.6540991230401275e-05, "loss": 0.5759, "step": 181305 }, { "epoch": 2.01, "learning_rate": 1.6540068503262766e-05, "loss": 0.5995, "step": 181310 }, { "epoch": 2.01, "learning_rate": 1.653914577612425e-05, "loss": 0.5484, "step": 181315 }, { "epoch": 2.01, "learning_rate": 1.653822304898574e-05, "loss": 0.5898, "step": 181320 }, { "epoch": 2.01, "learning_rate": 1.6537300321847226e-05, "loss": 0.5988, "step": 181325 }, { "epoch": 2.01, "learning_rate": 1.6536377594708714e-05, "loss": 0.5865, "step": 181330 }, { "epoch": 2.01, "learning_rate": 1.65354548675702e-05, "loss": 0.5526, "step": 181335 }, { "epoch": 2.01, "learning_rate": 1.653453214043169e-05, "loss": 0.5732, "step": 181340 }, { "epoch": 2.01, "learning_rate": 1.6533609413293177e-05, "loss": 0.6206, "step": 181345 }, { "epoch": 2.01, "learning_rate": 1.6532686686154665e-05, "loss": 0.6401, "step": 181350 }, { "epoch": 2.01, "learning_rate": 1.6531763959016153e-05, "loss": 0.6593, "step": 181355 }, { "epoch": 2.01, "learning_rate": 1.6530841231877637e-05, "loss": 0.583, "step": 181360 }, { "epoch": 2.01, "learning_rate": 1.652991850473913e-05, "loss": 0.6423, "step": 181365 }, { "epoch": 2.01, "learning_rate": 1.6528995777600613e-05, "loss": 0.6826, "step": 181370 }, { "epoch": 2.01, "learning_rate": 1.6528073050462104e-05, "loss": 0.5662, "step": 181375 }, { "epoch": 2.01, "learning_rate": 1.652715032332359e-05, "loss": 0.605, "step": 181380 }, { "epoch": 2.01, "learning_rate": 1.652622759618508e-05, "loss": 0.5949, "step": 181385 }, { "epoch": 2.01, "learning_rate": 1.6525304869046565e-05, "loss": 0.6106, "step": 181390 }, { "epoch": 2.01, "learning_rate": 1.6524382141908056e-05, "loss": 0.6442, "step": 181395 }, { "epoch": 2.01, "learning_rate": 1.652345941476954e-05, "loss": 0.6278, "step": 181400 }, { "epoch": 2.01, "learning_rate": 1.6522536687631028e-05, "loss": 0.6074, "step": 181405 }, { "epoch": 2.01, "learning_rate": 1.6521613960492516e-05, "loss": 0.5785, "step": 181410 }, { "epoch": 2.01, "learning_rate": 1.6520691233354e-05, "loss": 0.6133, "step": 181415 }, { "epoch": 2.01, "learning_rate": 1.651976850621549e-05, "loss": 0.5938, "step": 181420 }, { "epoch": 2.01, "learning_rate": 1.6518845779076976e-05, "loss": 0.587, "step": 181425 }, { "epoch": 2.01, "learning_rate": 1.6517923051938467e-05, "loss": 0.6091, "step": 181430 }, { "epoch": 2.01, "learning_rate": 1.6517000324799952e-05, "loss": 0.6115, "step": 181435 }, { "epoch": 2.01, "learning_rate": 1.6516077597661443e-05, "loss": 0.6343, "step": 181440 }, { "epoch": 2.01, "learning_rate": 1.6515154870522927e-05, "loss": 0.6008, "step": 181445 }, { "epoch": 2.01, "learning_rate": 1.651423214338442e-05, "loss": 0.652, "step": 181450 }, { "epoch": 2.01, "learning_rate": 1.6513309416245903e-05, "loss": 0.676, "step": 181455 }, { "epoch": 2.01, "learning_rate": 1.651238668910739e-05, "loss": 0.6057, "step": 181460 }, { "epoch": 2.01, "learning_rate": 1.651146396196888e-05, "loss": 0.6489, "step": 181465 }, { "epoch": 2.01, "learning_rate": 1.6510541234830367e-05, "loss": 0.5939, "step": 181470 }, { "epoch": 2.01, "learning_rate": 1.6509618507691855e-05, "loss": 0.6084, "step": 181475 }, { "epoch": 2.01, "learning_rate": 1.650869578055334e-05, "loss": 0.6346, "step": 181480 }, { "epoch": 2.01, "learning_rate": 1.650777305341483e-05, "loss": 0.5555, "step": 181485 }, { "epoch": 2.01, "learning_rate": 1.6506850326276315e-05, "loss": 0.5606, "step": 181490 }, { "epoch": 2.01, "learning_rate": 1.6505927599137806e-05, "loss": 0.6162, "step": 181495 }, { "epoch": 2.01, "learning_rate": 1.650500487199929e-05, "loss": 0.6496, "step": 181500 }, { "epoch": 2.01, "learning_rate": 1.650408214486078e-05, "loss": 0.6543, "step": 181505 }, { "epoch": 2.01, "learning_rate": 1.6503159417722266e-05, "loss": 0.5866, "step": 181510 }, { "epoch": 2.01, "learning_rate": 1.6502236690583754e-05, "loss": 0.6283, "step": 181515 }, { "epoch": 2.01, "learning_rate": 1.6501313963445242e-05, "loss": 0.6004, "step": 181520 }, { "epoch": 2.01, "learning_rate": 1.650039123630673e-05, "loss": 0.6341, "step": 181525 }, { "epoch": 2.01, "learning_rate": 1.6499468509168218e-05, "loss": 0.6172, "step": 181530 }, { "epoch": 2.01, "learning_rate": 1.6498545782029705e-05, "loss": 0.6019, "step": 181535 }, { "epoch": 2.01, "learning_rate": 1.6497623054891193e-05, "loss": 0.6134, "step": 181540 }, { "epoch": 2.01, "learning_rate": 1.649670032775268e-05, "loss": 0.6224, "step": 181545 }, { "epoch": 2.01, "learning_rate": 1.649577760061417e-05, "loss": 0.5928, "step": 181550 }, { "epoch": 2.01, "learning_rate": 1.6494854873475657e-05, "loss": 0.6299, "step": 181555 }, { "epoch": 2.01, "learning_rate": 1.6493932146337145e-05, "loss": 0.6536, "step": 181560 }, { "epoch": 2.01, "learning_rate": 1.649300941919863e-05, "loss": 0.6216, "step": 181565 }, { "epoch": 2.01, "learning_rate": 1.6492086692060117e-05, "loss": 0.6507, "step": 181570 }, { "epoch": 2.01, "learning_rate": 1.6491163964921605e-05, "loss": 0.5947, "step": 181575 }, { "epoch": 2.01, "learning_rate": 1.6490241237783093e-05, "loss": 0.5674, "step": 181580 }, { "epoch": 2.01, "learning_rate": 1.648931851064458e-05, "loss": 0.6146, "step": 181585 }, { "epoch": 2.01, "learning_rate": 1.648839578350607e-05, "loss": 0.5585, "step": 181590 }, { "epoch": 2.01, "learning_rate": 1.6487473056367556e-05, "loss": 0.6058, "step": 181595 }, { "epoch": 2.01, "learning_rate": 1.6486550329229044e-05, "loss": 0.6129, "step": 181600 }, { "epoch": 2.01, "learning_rate": 1.6485627602090532e-05, "loss": 0.6485, "step": 181605 }, { "epoch": 2.01, "learning_rate": 1.648470487495202e-05, "loss": 0.6079, "step": 181610 }, { "epoch": 2.01, "learning_rate": 1.6483782147813508e-05, "loss": 0.623, "step": 181615 }, { "epoch": 2.01, "learning_rate": 1.6482859420674995e-05, "loss": 0.6375, "step": 181620 }, { "epoch": 2.01, "learning_rate": 1.6481936693536483e-05, "loss": 0.5958, "step": 181625 }, { "epoch": 2.01, "learning_rate": 1.648101396639797e-05, "loss": 0.6236, "step": 181630 }, { "epoch": 2.01, "learning_rate": 1.6480091239259456e-05, "loss": 0.6213, "step": 181635 }, { "epoch": 2.01, "learning_rate": 1.6479168512120943e-05, "loss": 0.5384, "step": 181640 }, { "epoch": 2.01, "learning_rate": 1.647824578498243e-05, "loss": 0.6242, "step": 181645 }, { "epoch": 2.01, "learning_rate": 1.647732305784392e-05, "loss": 0.5333, "step": 181650 }, { "epoch": 2.01, "learning_rate": 1.6476400330705407e-05, "loss": 0.6033, "step": 181655 }, { "epoch": 2.01, "learning_rate": 1.6475477603566895e-05, "loss": 0.616, "step": 181660 }, { "epoch": 2.01, "learning_rate": 1.6474554876428383e-05, "loss": 0.6636, "step": 181665 }, { "epoch": 2.01, "learning_rate": 1.647363214928987e-05, "loss": 0.6046, "step": 181670 }, { "epoch": 2.01, "learning_rate": 1.647270942215136e-05, "loss": 0.6526, "step": 181675 }, { "epoch": 2.01, "learning_rate": 1.6471786695012846e-05, "loss": 0.6328, "step": 181680 }, { "epoch": 2.01, "learning_rate": 1.6470863967874334e-05, "loss": 0.5814, "step": 181685 }, { "epoch": 2.01, "learning_rate": 1.646994124073582e-05, "loss": 0.6288, "step": 181690 }, { "epoch": 2.01, "learning_rate": 1.646901851359731e-05, "loss": 0.5745, "step": 181695 }, { "epoch": 2.01, "learning_rate": 1.6468095786458794e-05, "loss": 0.6241, "step": 181700 }, { "epoch": 2.01, "learning_rate": 1.6467173059320285e-05, "loss": 0.5726, "step": 181705 }, { "epoch": 2.01, "learning_rate": 1.646625033218177e-05, "loss": 0.6309, "step": 181710 }, { "epoch": 2.01, "learning_rate": 1.6465327605043258e-05, "loss": 0.6225, "step": 181715 }, { "epoch": 2.01, "learning_rate": 1.6464404877904746e-05, "loss": 0.592, "step": 181720 }, { "epoch": 2.01, "learning_rate": 1.6463482150766233e-05, "loss": 0.5989, "step": 181725 }, { "epoch": 2.01, "learning_rate": 1.646255942362772e-05, "loss": 0.5662, "step": 181730 }, { "epoch": 2.01, "learning_rate": 1.646163669648921e-05, "loss": 0.5892, "step": 181735 }, { "epoch": 2.01, "learning_rate": 1.6460713969350697e-05, "loss": 0.6677, "step": 181740 }, { "epoch": 2.01, "learning_rate": 1.645979124221218e-05, "loss": 0.6255, "step": 181745 }, { "epoch": 2.01, "learning_rate": 1.6458868515073673e-05, "loss": 0.5999, "step": 181750 }, { "epoch": 2.01, "learning_rate": 1.6457945787935157e-05, "loss": 0.6546, "step": 181755 }, { "epoch": 2.01, "learning_rate": 1.645702306079665e-05, "loss": 0.5962, "step": 181760 }, { "epoch": 2.01, "learning_rate": 1.6456100333658133e-05, "loss": 0.5992, "step": 181765 }, { "epoch": 2.01, "learning_rate": 1.6455177606519624e-05, "loss": 0.5517, "step": 181770 }, { "epoch": 2.01, "learning_rate": 1.645425487938111e-05, "loss": 0.6276, "step": 181775 }, { "epoch": 2.01, "learning_rate": 1.64533321522426e-05, "loss": 0.6034, "step": 181780 }, { "epoch": 2.01, "learning_rate": 1.6452409425104084e-05, "loss": 0.6286, "step": 181785 }, { "epoch": 2.01, "learning_rate": 1.6451486697965572e-05, "loss": 0.6823, "step": 181790 }, { "epoch": 2.01, "learning_rate": 1.645056397082706e-05, "loss": 0.611, "step": 181795 }, { "epoch": 2.01, "learning_rate": 1.6449641243688544e-05, "loss": 0.6007, "step": 181800 }, { "epoch": 2.01, "learning_rate": 1.6448718516550036e-05, "loss": 0.5922, "step": 181805 }, { "epoch": 2.01, "learning_rate": 1.644779578941152e-05, "loss": 0.594, "step": 181810 }, { "epoch": 2.01, "learning_rate": 1.644687306227301e-05, "loss": 0.5771, "step": 181815 }, { "epoch": 2.01, "learning_rate": 1.6445950335134496e-05, "loss": 0.5638, "step": 181820 }, { "epoch": 2.01, "learning_rate": 1.6445027607995987e-05, "loss": 0.5965, "step": 181825 }, { "epoch": 2.01, "learning_rate": 1.644410488085747e-05, "loss": 0.6259, "step": 181830 }, { "epoch": 2.01, "learning_rate": 1.6443182153718963e-05, "loss": 0.5959, "step": 181835 }, { "epoch": 2.01, "learning_rate": 1.6442259426580447e-05, "loss": 0.5991, "step": 181840 }, { "epoch": 2.01, "learning_rate": 1.6441336699441935e-05, "loss": 0.5787, "step": 181845 }, { "epoch": 2.01, "learning_rate": 1.6440413972303423e-05, "loss": 0.6161, "step": 181850 }, { "epoch": 2.01, "learning_rate": 1.643949124516491e-05, "loss": 0.5844, "step": 181855 }, { "epoch": 2.01, "learning_rate": 1.64385685180264e-05, "loss": 0.6165, "step": 181860 }, { "epoch": 2.01, "learning_rate": 1.6437645790887883e-05, "loss": 0.6039, "step": 181865 }, { "epoch": 2.01, "learning_rate": 1.6436723063749374e-05, "loss": 0.6092, "step": 181870 }, { "epoch": 2.01, "learning_rate": 1.643580033661086e-05, "loss": 0.6, "step": 181875 }, { "epoch": 2.01, "learning_rate": 1.643487760947235e-05, "loss": 0.6032, "step": 181880 }, { "epoch": 2.01, "learning_rate": 1.6433954882333834e-05, "loss": 0.5603, "step": 181885 }, { "epoch": 2.01, "learning_rate": 1.6433032155195326e-05, "loss": 0.6136, "step": 181890 }, { "epoch": 2.01, "learning_rate": 1.643210942805681e-05, "loss": 0.5691, "step": 181895 }, { "epoch": 2.01, "learning_rate": 1.6431186700918298e-05, "loss": 0.6478, "step": 181900 }, { "epoch": 2.01, "learning_rate": 1.6430263973779786e-05, "loss": 0.6342, "step": 181905 }, { "epoch": 2.01, "learning_rate": 1.6429341246641274e-05, "loss": 0.5946, "step": 181910 }, { "epoch": 2.01, "learning_rate": 1.642841851950276e-05, "loss": 0.6727, "step": 181915 }, { "epoch": 2.01, "learning_rate": 1.642749579236425e-05, "loss": 0.5551, "step": 181920 }, { "epoch": 2.01, "learning_rate": 1.6426573065225737e-05, "loss": 0.6342, "step": 181925 }, { "epoch": 2.01, "learning_rate": 1.6425650338087225e-05, "loss": 0.6292, "step": 181930 }, { "epoch": 2.01, "learning_rate": 1.6424727610948713e-05, "loss": 0.5899, "step": 181935 }, { "epoch": 2.01, "learning_rate": 1.6423804883810197e-05, "loss": 0.5653, "step": 181940 }, { "epoch": 2.01, "learning_rate": 1.642288215667169e-05, "loss": 0.5714, "step": 181945 }, { "epoch": 2.01, "learning_rate": 1.6421959429533173e-05, "loss": 0.6338, "step": 181950 }, { "epoch": 2.01, "learning_rate": 1.642103670239466e-05, "loss": 0.6461, "step": 181955 }, { "epoch": 2.01, "learning_rate": 1.642011397525615e-05, "loss": 0.6152, "step": 181960 }, { "epoch": 2.01, "learning_rate": 1.6419191248117637e-05, "loss": 0.6309, "step": 181965 }, { "epoch": 2.01, "learning_rate": 1.6418268520979125e-05, "loss": 0.5877, "step": 181970 }, { "epoch": 2.01, "learning_rate": 1.6417345793840612e-05, "loss": 0.6682, "step": 181975 }, { "epoch": 2.02, "learning_rate": 1.64164230667021e-05, "loss": 0.6118, "step": 181980 }, { "epoch": 2.02, "learning_rate": 1.6415500339563588e-05, "loss": 0.5915, "step": 181985 }, { "epoch": 2.02, "learning_rate": 1.6414577612425076e-05, "loss": 0.6468, "step": 181990 }, { "epoch": 2.02, "learning_rate": 1.6413654885286564e-05, "loss": 0.6075, "step": 181995 }, { "epoch": 2.02, "learning_rate": 1.641273215814805e-05, "loss": 0.6082, "step": 182000 }, { "epoch": 2.02, "eval_loss": 0.5799068808555603, "eval_runtime": 69.216, "eval_samples_per_second": 28.895, "eval_steps_per_second": 14.448, "step": 182000 }, { "epoch": 2.02, "learning_rate": 1.641180943100954e-05, "loss": 0.5913, "step": 182005 }, { "epoch": 2.02, "learning_rate": 1.6410886703871027e-05, "loss": 0.539, "step": 182010 }, { "epoch": 2.02, "learning_rate": 1.6409963976732512e-05, "loss": 0.6164, "step": 182015 }, { "epoch": 2.02, "learning_rate": 1.6409041249594e-05, "loss": 0.6047, "step": 182020 }, { "epoch": 2.02, "learning_rate": 1.6408118522455487e-05, "loss": 0.609, "step": 182025 }, { "epoch": 2.02, "learning_rate": 1.6407195795316975e-05, "loss": 0.5964, "step": 182030 }, { "epoch": 2.02, "learning_rate": 1.6406273068178463e-05, "loss": 0.5558, "step": 182035 }, { "epoch": 2.02, "learning_rate": 1.640535034103995e-05, "loss": 0.5696, "step": 182040 }, { "epoch": 2.02, "learning_rate": 1.640442761390144e-05, "loss": 0.5958, "step": 182045 }, { "epoch": 2.02, "learning_rate": 1.6403504886762927e-05, "loss": 0.6093, "step": 182050 }, { "epoch": 2.02, "learning_rate": 1.6402582159624415e-05, "loss": 0.6303, "step": 182055 }, { "epoch": 2.02, "learning_rate": 1.6401659432485902e-05, "loss": 0.6038, "step": 182060 }, { "epoch": 2.02, "learning_rate": 1.640073670534739e-05, "loss": 0.6229, "step": 182065 }, { "epoch": 2.02, "learning_rate": 1.6399813978208878e-05, "loss": 0.5738, "step": 182070 }, { "epoch": 2.02, "learning_rate": 1.6398891251070363e-05, "loss": 0.5703, "step": 182075 }, { "epoch": 2.02, "learning_rate": 1.6397968523931854e-05, "loss": 0.6314, "step": 182080 }, { "epoch": 2.02, "learning_rate": 1.6397045796793338e-05, "loss": 0.5914, "step": 182085 }, { "epoch": 2.02, "learning_rate": 1.6396123069654826e-05, "loss": 0.6256, "step": 182090 }, { "epoch": 2.02, "learning_rate": 1.6395200342516314e-05, "loss": 0.6143, "step": 182095 }, { "epoch": 2.02, "learning_rate": 1.6394277615377802e-05, "loss": 0.6102, "step": 182100 }, { "epoch": 2.02, "learning_rate": 1.639335488823929e-05, "loss": 0.6215, "step": 182105 }, { "epoch": 2.02, "learning_rate": 1.6392432161100777e-05, "loss": 0.5613, "step": 182110 }, { "epoch": 2.02, "learning_rate": 1.6391509433962265e-05, "loss": 0.6086, "step": 182115 }, { "epoch": 2.02, "learning_rate": 1.6390586706823753e-05, "loss": 0.6031, "step": 182120 }, { "epoch": 2.02, "learning_rate": 1.638966397968524e-05, "loss": 0.5927, "step": 182125 }, { "epoch": 2.02, "learning_rate": 1.6388741252546725e-05, "loss": 0.6459, "step": 182130 }, { "epoch": 2.02, "learning_rate": 1.6387818525408217e-05, "loss": 0.6139, "step": 182135 }, { "epoch": 2.02, "learning_rate": 1.63868957982697e-05, "loss": 0.6022, "step": 182140 }, { "epoch": 2.02, "learning_rate": 1.6385973071131192e-05, "loss": 0.5587, "step": 182145 }, { "epoch": 2.02, "learning_rate": 1.6385050343992677e-05, "loss": 0.6211, "step": 182150 }, { "epoch": 2.02, "learning_rate": 1.6384127616854168e-05, "loss": 0.6152, "step": 182155 }, { "epoch": 2.02, "learning_rate": 1.6383204889715653e-05, "loss": 0.6299, "step": 182160 }, { "epoch": 2.02, "learning_rate": 1.638228216257714e-05, "loss": 0.5697, "step": 182165 }, { "epoch": 2.02, "learning_rate": 1.6381359435438628e-05, "loss": 0.5426, "step": 182170 }, { "epoch": 2.02, "learning_rate": 1.6380436708300116e-05, "loss": 0.5749, "step": 182175 }, { "epoch": 2.02, "learning_rate": 1.6379513981161604e-05, "loss": 0.6282, "step": 182180 }, { "epoch": 2.02, "learning_rate": 1.6378591254023092e-05, "loss": 0.549, "step": 182185 }, { "epoch": 2.02, "learning_rate": 1.637766852688458e-05, "loss": 0.6249, "step": 182190 }, { "epoch": 2.02, "learning_rate": 1.6376745799746064e-05, "loss": 0.5804, "step": 182195 }, { "epoch": 2.02, "learning_rate": 1.6375823072607555e-05, "loss": 0.6284, "step": 182200 }, { "epoch": 2.02, "learning_rate": 1.637490034546904e-05, "loss": 0.5899, "step": 182205 }, { "epoch": 2.02, "learning_rate": 1.637397761833053e-05, "loss": 0.5904, "step": 182210 }, { "epoch": 2.02, "learning_rate": 1.6373054891192016e-05, "loss": 0.6243, "step": 182215 }, { "epoch": 2.02, "learning_rate": 1.6372132164053507e-05, "loss": 0.6382, "step": 182220 }, { "epoch": 2.02, "learning_rate": 1.637120943691499e-05, "loss": 0.6061, "step": 182225 }, { "epoch": 2.02, "learning_rate": 1.637028670977648e-05, "loss": 0.5575, "step": 182230 }, { "epoch": 2.02, "learning_rate": 1.6369363982637967e-05, "loss": 0.6012, "step": 182235 }, { "epoch": 2.02, "learning_rate": 1.6368441255499455e-05, "loss": 0.6503, "step": 182240 }, { "epoch": 2.02, "learning_rate": 1.6367518528360943e-05, "loss": 0.6158, "step": 182245 }, { "epoch": 2.02, "learning_rate": 1.6366595801222427e-05, "loss": 0.6211, "step": 182250 }, { "epoch": 2.02, "learning_rate": 1.636567307408392e-05, "loss": 0.6505, "step": 182255 }, { "epoch": 2.02, "learning_rate": 1.6364750346945403e-05, "loss": 0.6041, "step": 182260 }, { "epoch": 2.02, "learning_rate": 1.6363827619806894e-05, "loss": 0.6346, "step": 182265 }, { "epoch": 2.02, "learning_rate": 1.636290489266838e-05, "loss": 0.6033, "step": 182270 }, { "epoch": 2.02, "learning_rate": 1.636198216552987e-05, "loss": 0.5951, "step": 182275 }, { "epoch": 2.02, "learning_rate": 1.6361059438391354e-05, "loss": 0.582, "step": 182280 }, { "epoch": 2.02, "learning_rate": 1.6360136711252842e-05, "loss": 0.644, "step": 182285 }, { "epoch": 2.02, "learning_rate": 1.635921398411433e-05, "loss": 0.5885, "step": 182290 }, { "epoch": 2.02, "learning_rate": 1.6358291256975818e-05, "loss": 0.6071, "step": 182295 }, { "epoch": 2.02, "learning_rate": 1.6357368529837306e-05, "loss": 0.5886, "step": 182300 }, { "epoch": 2.02, "learning_rate": 1.6356445802698793e-05, "loss": 0.5728, "step": 182305 }, { "epoch": 2.02, "learning_rate": 1.635552307556028e-05, "loss": 0.6989, "step": 182310 }, { "epoch": 2.02, "learning_rate": 1.6354600348421766e-05, "loss": 0.5989, "step": 182315 }, { "epoch": 2.02, "learning_rate": 1.6353677621283257e-05, "loss": 0.6258, "step": 182320 }, { "epoch": 2.02, "learning_rate": 1.635275489414474e-05, "loss": 0.5946, "step": 182325 }, { "epoch": 2.02, "learning_rate": 1.6351832167006233e-05, "loss": 0.5621, "step": 182330 }, { "epoch": 2.02, "learning_rate": 1.6350909439867717e-05, "loss": 0.5882, "step": 182335 }, { "epoch": 2.02, "learning_rate": 1.634998671272921e-05, "loss": 0.5696, "step": 182340 }, { "epoch": 2.02, "learning_rate": 1.6349063985590693e-05, "loss": 0.5875, "step": 182345 }, { "epoch": 2.02, "learning_rate": 1.634814125845218e-05, "loss": 0.6315, "step": 182350 }, { "epoch": 2.02, "learning_rate": 1.634721853131367e-05, "loss": 0.6086, "step": 182355 }, { "epoch": 2.02, "learning_rate": 1.6346295804175156e-05, "loss": 0.6251, "step": 182360 }, { "epoch": 2.02, "learning_rate": 1.6345373077036644e-05, "loss": 0.6554, "step": 182365 }, { "epoch": 2.02, "learning_rate": 1.6344450349898132e-05, "loss": 0.5755, "step": 182370 }, { "epoch": 2.02, "learning_rate": 1.634352762275962e-05, "loss": 0.6374, "step": 182375 }, { "epoch": 2.02, "learning_rate": 1.6342604895621108e-05, "loss": 0.5786, "step": 182380 }, { "epoch": 2.02, "learning_rate": 1.6341682168482596e-05, "loss": 0.5927, "step": 182385 }, { "epoch": 2.02, "learning_rate": 1.634075944134408e-05, "loss": 0.5809, "step": 182390 }, { "epoch": 2.02, "learning_rate": 1.633983671420557e-05, "loss": 0.6228, "step": 182395 }, { "epoch": 2.02, "learning_rate": 1.6338913987067056e-05, "loss": 0.5838, "step": 182400 }, { "epoch": 2.02, "learning_rate": 1.6337991259928544e-05, "loss": 0.6137, "step": 182405 }, { "epoch": 2.02, "learning_rate": 1.633706853279003e-05, "loss": 0.5895, "step": 182410 }, { "epoch": 2.02, "learning_rate": 1.633614580565152e-05, "loss": 0.6425, "step": 182415 }, { "epoch": 2.02, "learning_rate": 1.6335223078513007e-05, "loss": 0.5956, "step": 182420 }, { "epoch": 2.02, "learning_rate": 1.6334300351374495e-05, "loss": 0.6012, "step": 182425 }, { "epoch": 2.02, "learning_rate": 1.6333377624235983e-05, "loss": 0.584, "step": 182430 }, { "epoch": 2.02, "learning_rate": 1.633245489709747e-05, "loss": 0.5537, "step": 182435 }, { "epoch": 2.02, "learning_rate": 1.633153216995896e-05, "loss": 0.5872, "step": 182440 }, { "epoch": 2.02, "learning_rate": 1.6330609442820446e-05, "loss": 0.5923, "step": 182445 }, { "epoch": 2.02, "learning_rate": 1.6329686715681934e-05, "loss": 0.6119, "step": 182450 }, { "epoch": 2.02, "learning_rate": 1.6328763988543422e-05, "loss": 0.5818, "step": 182455 }, { "epoch": 2.02, "learning_rate": 1.6327841261404907e-05, "loss": 0.6108, "step": 182460 }, { "epoch": 2.02, "learning_rate": 1.6326918534266398e-05, "loss": 0.6087, "step": 182465 }, { "epoch": 2.02, "learning_rate": 1.6325995807127882e-05, "loss": 0.6201, "step": 182470 }, { "epoch": 2.02, "learning_rate": 1.632507307998937e-05, "loss": 0.5795, "step": 182475 }, { "epoch": 2.02, "learning_rate": 1.6324150352850858e-05, "loss": 0.6014, "step": 182480 }, { "epoch": 2.02, "learning_rate": 1.6323227625712346e-05, "loss": 0.6037, "step": 182485 }, { "epoch": 2.02, "learning_rate": 1.6322304898573834e-05, "loss": 0.6322, "step": 182490 }, { "epoch": 2.02, "learning_rate": 1.632138217143532e-05, "loss": 0.6048, "step": 182495 }, { "epoch": 2.02, "learning_rate": 1.632045944429681e-05, "loss": 0.6286, "step": 182500 }, { "epoch": 2.02, "learning_rate": 1.6319536717158297e-05, "loss": 0.6145, "step": 182505 }, { "epoch": 2.02, "learning_rate": 1.6318613990019785e-05, "loss": 0.5823, "step": 182510 }, { "epoch": 2.02, "learning_rate": 1.631769126288127e-05, "loss": 0.6308, "step": 182515 }, { "epoch": 2.02, "learning_rate": 1.631676853574276e-05, "loss": 0.5678, "step": 182520 }, { "epoch": 2.02, "learning_rate": 1.6315845808604245e-05, "loss": 0.6117, "step": 182525 }, { "epoch": 2.02, "learning_rate": 1.6314923081465736e-05, "loss": 0.6751, "step": 182530 }, { "epoch": 2.02, "learning_rate": 1.631400035432722e-05, "loss": 0.6515, "step": 182535 }, { "epoch": 2.02, "learning_rate": 1.6313077627188712e-05, "loss": 0.6331, "step": 182540 }, { "epoch": 2.02, "learning_rate": 1.6312154900050197e-05, "loss": 0.6096, "step": 182545 }, { "epoch": 2.02, "learning_rate": 1.6311232172911684e-05, "loss": 0.6427, "step": 182550 }, { "epoch": 2.02, "learning_rate": 1.6310309445773172e-05, "loss": 0.5843, "step": 182555 }, { "epoch": 2.02, "learning_rate": 1.630938671863466e-05, "loss": 0.6153, "step": 182560 }, { "epoch": 2.02, "learning_rate": 1.6308463991496148e-05, "loss": 0.5969, "step": 182565 }, { "epoch": 2.02, "learning_rate": 1.6307541264357636e-05, "loss": 0.6413, "step": 182570 }, { "epoch": 2.02, "learning_rate": 1.6306618537219124e-05, "loss": 0.6287, "step": 182575 }, { "epoch": 2.02, "learning_rate": 1.6305695810080608e-05, "loss": 0.6362, "step": 182580 }, { "epoch": 2.02, "learning_rate": 1.63047730829421e-05, "loss": 0.5467, "step": 182585 }, { "epoch": 2.02, "learning_rate": 1.6303850355803584e-05, "loss": 0.6186, "step": 182590 }, { "epoch": 2.02, "learning_rate": 1.6302927628665075e-05, "loss": 0.5729, "step": 182595 }, { "epoch": 2.02, "learning_rate": 1.630200490152656e-05, "loss": 0.5965, "step": 182600 }, { "epoch": 2.02, "learning_rate": 1.630108217438805e-05, "loss": 0.5996, "step": 182605 }, { "epoch": 2.02, "learning_rate": 1.6300159447249535e-05, "loss": 0.6275, "step": 182610 }, { "epoch": 2.02, "learning_rate": 1.6299236720111023e-05, "loss": 0.6937, "step": 182615 }, { "epoch": 2.02, "learning_rate": 1.629831399297251e-05, "loss": 0.5762, "step": 182620 }, { "epoch": 2.02, "learning_rate": 1.6297391265834e-05, "loss": 0.6206, "step": 182625 }, { "epoch": 2.02, "learning_rate": 1.6296468538695487e-05, "loss": 0.6093, "step": 182630 }, { "epoch": 2.02, "learning_rate": 1.629554581155697e-05, "loss": 0.6636, "step": 182635 }, { "epoch": 2.02, "learning_rate": 1.6294623084418462e-05, "loss": 0.6525, "step": 182640 }, { "epoch": 2.02, "learning_rate": 1.6293700357279947e-05, "loss": 0.5761, "step": 182645 }, { "epoch": 2.02, "learning_rate": 1.6292777630141438e-05, "loss": 0.6102, "step": 182650 }, { "epoch": 2.02, "learning_rate": 1.6291854903002922e-05, "loss": 0.6453, "step": 182655 }, { "epoch": 2.02, "learning_rate": 1.6290932175864414e-05, "loss": 0.6312, "step": 182660 }, { "epoch": 2.02, "learning_rate": 1.6290009448725898e-05, "loss": 0.5657, "step": 182665 }, { "epoch": 2.02, "learning_rate": 1.6289086721587386e-05, "loss": 0.6331, "step": 182670 }, { "epoch": 2.02, "learning_rate": 1.6288163994448874e-05, "loss": 0.6733, "step": 182675 }, { "epoch": 2.02, "learning_rate": 1.6287241267310362e-05, "loss": 0.6053, "step": 182680 }, { "epoch": 2.02, "learning_rate": 1.628631854017185e-05, "loss": 0.6075, "step": 182685 }, { "epoch": 2.02, "learning_rate": 1.6285395813033337e-05, "loss": 0.6511, "step": 182690 }, { "epoch": 2.02, "learning_rate": 1.6284473085894825e-05, "loss": 0.6487, "step": 182695 }, { "epoch": 2.02, "learning_rate": 1.628355035875631e-05, "loss": 0.5374, "step": 182700 }, { "epoch": 2.02, "learning_rate": 1.62826276316178e-05, "loss": 0.5274, "step": 182705 }, { "epoch": 2.02, "learning_rate": 1.6281704904479285e-05, "loss": 0.5936, "step": 182710 }, { "epoch": 2.02, "learning_rate": 1.6280782177340777e-05, "loss": 0.5907, "step": 182715 }, { "epoch": 2.02, "learning_rate": 1.627985945020226e-05, "loss": 0.6213, "step": 182720 }, { "epoch": 2.02, "learning_rate": 1.6278936723063752e-05, "loss": 0.5997, "step": 182725 }, { "epoch": 2.02, "learning_rate": 1.6278013995925237e-05, "loss": 0.6339, "step": 182730 }, { "epoch": 2.02, "learning_rate": 1.6277091268786725e-05, "loss": 0.5407, "step": 182735 }, { "epoch": 2.02, "learning_rate": 1.6276168541648213e-05, "loss": 0.6048, "step": 182740 }, { "epoch": 2.02, "learning_rate": 1.62752458145097e-05, "loss": 0.6603, "step": 182745 }, { "epoch": 2.02, "learning_rate": 1.6274323087371188e-05, "loss": 0.6118, "step": 182750 }, { "epoch": 2.02, "learning_rate": 1.6273400360232676e-05, "loss": 0.5967, "step": 182755 }, { "epoch": 2.02, "learning_rate": 1.6272477633094164e-05, "loss": 0.5565, "step": 182760 }, { "epoch": 2.02, "learning_rate": 1.6271554905955652e-05, "loss": 0.5758, "step": 182765 }, { "epoch": 2.02, "learning_rate": 1.627063217881714e-05, "loss": 0.5661, "step": 182770 }, { "epoch": 2.02, "learning_rate": 1.6269709451678624e-05, "loss": 0.5567, "step": 182775 }, { "epoch": 2.02, "learning_rate": 1.6268786724540115e-05, "loss": 0.6244, "step": 182780 }, { "epoch": 2.02, "learning_rate": 1.62678639974016e-05, "loss": 0.5879, "step": 182785 }, { "epoch": 2.02, "learning_rate": 1.6266941270263088e-05, "loss": 0.6074, "step": 182790 }, { "epoch": 2.02, "learning_rate": 1.6266018543124575e-05, "loss": 0.6084, "step": 182795 }, { "epoch": 2.02, "learning_rate": 1.6265095815986063e-05, "loss": 0.6606, "step": 182800 }, { "epoch": 2.02, "learning_rate": 1.626417308884755e-05, "loss": 0.5491, "step": 182805 }, { "epoch": 2.02, "learning_rate": 1.626325036170904e-05, "loss": 0.6161, "step": 182810 }, { "epoch": 2.02, "learning_rate": 1.6262327634570527e-05, "loss": 0.6373, "step": 182815 }, { "epoch": 2.02, "learning_rate": 1.6261404907432015e-05, "loss": 0.5881, "step": 182820 }, { "epoch": 2.02, "learning_rate": 1.6260482180293503e-05, "loss": 0.6925, "step": 182825 }, { "epoch": 2.02, "learning_rate": 1.625955945315499e-05, "loss": 0.6133, "step": 182830 }, { "epoch": 2.02, "learning_rate": 1.6258636726016478e-05, "loss": 0.5648, "step": 182835 }, { "epoch": 2.02, "learning_rate": 1.6257713998877966e-05, "loss": 0.5699, "step": 182840 }, { "epoch": 2.02, "learning_rate": 1.625679127173945e-05, "loss": 0.634, "step": 182845 }, { "epoch": 2.02, "learning_rate": 1.625586854460094e-05, "loss": 0.5982, "step": 182850 }, { "epoch": 2.02, "learning_rate": 1.6254945817462426e-05, "loss": 0.6331, "step": 182855 }, { "epoch": 2.02, "learning_rate": 1.6254023090323914e-05, "loss": 0.6449, "step": 182860 }, { "epoch": 2.02, "learning_rate": 1.6253100363185402e-05, "loss": 0.6497, "step": 182865 }, { "epoch": 2.02, "learning_rate": 1.625217763604689e-05, "loss": 0.6742, "step": 182870 }, { "epoch": 2.02, "learning_rate": 1.6251254908908378e-05, "loss": 0.5776, "step": 182875 }, { "epoch": 2.02, "learning_rate": 1.6250332181769866e-05, "loss": 0.6159, "step": 182880 }, { "epoch": 2.03, "learning_rate": 1.6249409454631353e-05, "loss": 0.6006, "step": 182885 }, { "epoch": 2.03, "learning_rate": 1.624848672749284e-05, "loss": 0.649, "step": 182890 }, { "epoch": 2.03, "learning_rate": 1.624756400035433e-05, "loss": 0.5513, "step": 182895 }, { "epoch": 2.03, "learning_rate": 1.6246641273215814e-05, "loss": 0.6607, "step": 182900 }, { "epoch": 2.03, "learning_rate": 1.6245718546077305e-05, "loss": 0.5953, "step": 182905 }, { "epoch": 2.03, "learning_rate": 1.624479581893879e-05, "loss": 0.6427, "step": 182910 }, { "epoch": 2.03, "learning_rate": 1.624387309180028e-05, "loss": 0.6207, "step": 182915 }, { "epoch": 2.03, "learning_rate": 1.6242950364661765e-05, "loss": 0.6216, "step": 182920 }, { "epoch": 2.03, "learning_rate": 1.6242027637523253e-05, "loss": 0.6293, "step": 182925 }, { "epoch": 2.03, "learning_rate": 1.624110491038474e-05, "loss": 0.6482, "step": 182930 }, { "epoch": 2.03, "learning_rate": 1.624018218324623e-05, "loss": 0.5842, "step": 182935 }, { "epoch": 2.03, "learning_rate": 1.6239259456107716e-05, "loss": 0.5804, "step": 182940 }, { "epoch": 2.03, "learning_rate": 1.6238336728969204e-05, "loss": 0.6478, "step": 182945 }, { "epoch": 2.03, "learning_rate": 1.6237414001830692e-05, "loss": 0.622, "step": 182950 }, { "epoch": 2.03, "learning_rate": 1.623649127469218e-05, "loss": 0.6093, "step": 182955 }, { "epoch": 2.03, "learning_rate": 1.6235568547553668e-05, "loss": 0.5925, "step": 182960 }, { "epoch": 2.03, "learning_rate": 1.6234645820415152e-05, "loss": 0.6055, "step": 182965 }, { "epoch": 2.03, "learning_rate": 1.6233723093276643e-05, "loss": 0.6445, "step": 182970 }, { "epoch": 2.03, "learning_rate": 1.6232800366138128e-05, "loss": 0.5521, "step": 182975 }, { "epoch": 2.03, "learning_rate": 1.623187763899962e-05, "loss": 0.5912, "step": 182980 }, { "epoch": 2.03, "learning_rate": 1.6230954911861104e-05, "loss": 0.6146, "step": 182985 }, { "epoch": 2.03, "learning_rate": 1.6230032184722595e-05, "loss": 0.6448, "step": 182990 }, { "epoch": 2.03, "learning_rate": 1.622910945758408e-05, "loss": 0.5652, "step": 182995 }, { "epoch": 2.03, "learning_rate": 1.6228186730445567e-05, "loss": 0.6483, "step": 183000 }, { "epoch": 2.03, "eval_loss": 0.571638822555542, "eval_runtime": 69.1709, "eval_samples_per_second": 28.914, "eval_steps_per_second": 14.457, "step": 183000 }, { "epoch": 2.03, "learning_rate": 1.6227264003307055e-05, "loss": 0.5784, "step": 183005 }, { "epoch": 2.03, "learning_rate": 1.6226341276168543e-05, "loss": 0.6094, "step": 183010 }, { "epoch": 2.03, "learning_rate": 1.622541854903003e-05, "loss": 0.6207, "step": 183015 }, { "epoch": 2.03, "learning_rate": 1.6224495821891515e-05, "loss": 0.6087, "step": 183020 }, { "epoch": 2.03, "learning_rate": 1.6223573094753006e-05, "loss": 0.6034, "step": 183025 }, { "epoch": 2.03, "learning_rate": 1.622265036761449e-05, "loss": 0.6027, "step": 183030 }, { "epoch": 2.03, "learning_rate": 1.6221727640475982e-05, "loss": 0.6224, "step": 183035 }, { "epoch": 2.03, "learning_rate": 1.6220804913337467e-05, "loss": 0.6227, "step": 183040 }, { "epoch": 2.03, "learning_rate": 1.6219882186198958e-05, "loss": 0.5946, "step": 183045 }, { "epoch": 2.03, "learning_rate": 1.6218959459060442e-05, "loss": 0.609, "step": 183050 }, { "epoch": 2.03, "learning_rate": 1.621803673192193e-05, "loss": 0.5909, "step": 183055 }, { "epoch": 2.03, "learning_rate": 1.6217114004783418e-05, "loss": 0.5869, "step": 183060 }, { "epoch": 2.03, "learning_rate": 1.6216191277644906e-05, "loss": 0.626, "step": 183065 }, { "epoch": 2.03, "learning_rate": 1.6215268550506394e-05, "loss": 0.5646, "step": 183070 }, { "epoch": 2.03, "learning_rate": 1.6214345823367878e-05, "loss": 0.5833, "step": 183075 }, { "epoch": 2.03, "learning_rate": 1.621342309622937e-05, "loss": 0.6415, "step": 183080 }, { "epoch": 2.03, "learning_rate": 1.6212500369090854e-05, "loss": 0.6994, "step": 183085 }, { "epoch": 2.03, "learning_rate": 1.6211577641952345e-05, "loss": 0.575, "step": 183090 }, { "epoch": 2.03, "learning_rate": 1.621065491481383e-05, "loss": 0.6193, "step": 183095 }, { "epoch": 2.03, "learning_rate": 1.620973218767532e-05, "loss": 0.5794, "step": 183100 }, { "epoch": 2.03, "learning_rate": 1.6208809460536805e-05, "loss": 0.5932, "step": 183105 }, { "epoch": 2.03, "learning_rate": 1.6207886733398296e-05, "loss": 0.5844, "step": 183110 }, { "epoch": 2.03, "learning_rate": 1.620696400625978e-05, "loss": 0.6405, "step": 183115 }, { "epoch": 2.03, "learning_rate": 1.620604127912127e-05, "loss": 0.6077, "step": 183120 }, { "epoch": 2.03, "learning_rate": 1.6205118551982757e-05, "loss": 0.6876, "step": 183125 }, { "epoch": 2.03, "learning_rate": 1.6204195824844244e-05, "loss": 0.643, "step": 183130 }, { "epoch": 2.03, "learning_rate": 1.6203273097705732e-05, "loss": 0.6486, "step": 183135 }, { "epoch": 2.03, "learning_rate": 1.620235037056722e-05, "loss": 0.6341, "step": 183140 }, { "epoch": 2.03, "learning_rate": 1.6201427643428708e-05, "loss": 0.6383, "step": 183145 }, { "epoch": 2.03, "learning_rate": 1.6200504916290192e-05, "loss": 0.55, "step": 183150 }, { "epoch": 2.03, "learning_rate": 1.6199582189151684e-05, "loss": 0.6176, "step": 183155 }, { "epoch": 2.03, "learning_rate": 1.6198659462013168e-05, "loss": 0.6355, "step": 183160 }, { "epoch": 2.03, "learning_rate": 1.619773673487466e-05, "loss": 0.618, "step": 183165 }, { "epoch": 2.03, "learning_rate": 1.6196814007736144e-05, "loss": 0.5788, "step": 183170 }, { "epoch": 2.03, "learning_rate": 1.619589128059763e-05, "loss": 0.5593, "step": 183175 }, { "epoch": 2.03, "learning_rate": 1.619496855345912e-05, "loss": 0.5914, "step": 183180 }, { "epoch": 2.03, "learning_rate": 1.6194045826320607e-05, "loss": 0.6287, "step": 183185 }, { "epoch": 2.03, "learning_rate": 1.6193123099182095e-05, "loss": 0.5909, "step": 183190 }, { "epoch": 2.03, "learning_rate": 1.6192200372043583e-05, "loss": 0.585, "step": 183195 }, { "epoch": 2.03, "learning_rate": 1.619127764490507e-05, "loss": 0.6388, "step": 183200 }, { "epoch": 2.03, "learning_rate": 1.619035491776656e-05, "loss": 0.621, "step": 183205 }, { "epoch": 2.03, "learning_rate": 1.6189432190628047e-05, "loss": 0.5831, "step": 183210 }, { "epoch": 2.03, "learning_rate": 1.6188509463489534e-05, "loss": 0.5517, "step": 183215 }, { "epoch": 2.03, "learning_rate": 1.6187586736351022e-05, "loss": 0.6168, "step": 183220 }, { "epoch": 2.03, "learning_rate": 1.6186664009212507e-05, "loss": 0.5748, "step": 183225 }, { "epoch": 2.03, "learning_rate": 1.6185741282073995e-05, "loss": 0.5998, "step": 183230 }, { "epoch": 2.03, "learning_rate": 1.6184818554935482e-05, "loss": 0.5718, "step": 183235 }, { "epoch": 2.03, "learning_rate": 1.618389582779697e-05, "loss": 0.6266, "step": 183240 }, { "epoch": 2.03, "learning_rate": 1.6182973100658458e-05, "loss": 0.5996, "step": 183245 }, { "epoch": 2.03, "learning_rate": 1.6182050373519946e-05, "loss": 0.6471, "step": 183250 }, { "epoch": 2.03, "learning_rate": 1.6181127646381434e-05, "loss": 0.6377, "step": 183255 }, { "epoch": 2.03, "learning_rate": 1.6180204919242922e-05, "loss": 0.6188, "step": 183260 }, { "epoch": 2.03, "learning_rate": 1.617928219210441e-05, "loss": 0.5811, "step": 183265 }, { "epoch": 2.03, "learning_rate": 1.6178359464965897e-05, "loss": 0.609, "step": 183270 }, { "epoch": 2.03, "learning_rate": 1.6177436737827385e-05, "loss": 0.5618, "step": 183275 }, { "epoch": 2.03, "learning_rate": 1.6176514010688873e-05, "loss": 0.6464, "step": 183280 }, { "epoch": 2.03, "learning_rate": 1.6175591283550358e-05, "loss": 0.5706, "step": 183285 }, { "epoch": 2.03, "learning_rate": 1.617466855641185e-05, "loss": 0.6072, "step": 183290 }, { "epoch": 2.03, "learning_rate": 1.6173745829273333e-05, "loss": 0.6283, "step": 183295 }, { "epoch": 2.03, "learning_rate": 1.6172823102134824e-05, "loss": 0.6287, "step": 183300 }, { "epoch": 2.03, "learning_rate": 1.617190037499631e-05, "loss": 0.6363, "step": 183305 }, { "epoch": 2.03, "learning_rate": 1.6170977647857797e-05, "loss": 0.5925, "step": 183310 }, { "epoch": 2.03, "learning_rate": 1.6170054920719285e-05, "loss": 0.6068, "step": 183315 }, { "epoch": 2.03, "learning_rate": 1.6169132193580772e-05, "loss": 0.6183, "step": 183320 }, { "epoch": 2.03, "learning_rate": 1.616820946644226e-05, "loss": 0.6251, "step": 183325 }, { "epoch": 2.03, "learning_rate": 1.6167286739303748e-05, "loss": 0.6407, "step": 183330 }, { "epoch": 2.03, "learning_rate": 1.6166364012165236e-05, "loss": 0.646, "step": 183335 }, { "epoch": 2.03, "learning_rate": 1.6165441285026724e-05, "loss": 0.6482, "step": 183340 }, { "epoch": 2.03, "learning_rate": 1.6164518557888212e-05, "loss": 0.6242, "step": 183345 }, { "epoch": 2.03, "learning_rate": 1.6163595830749696e-05, "loss": 0.5541, "step": 183350 }, { "epoch": 2.03, "learning_rate": 1.6162673103611187e-05, "loss": 0.6531, "step": 183355 }, { "epoch": 2.03, "learning_rate": 1.6161750376472672e-05, "loss": 0.5895, "step": 183360 }, { "epoch": 2.03, "learning_rate": 1.6160827649334163e-05, "loss": 0.5836, "step": 183365 }, { "epoch": 2.03, "learning_rate": 1.6159904922195648e-05, "loss": 0.6071, "step": 183370 }, { "epoch": 2.03, "learning_rate": 1.615898219505714e-05, "loss": 0.608, "step": 183375 }, { "epoch": 2.03, "learning_rate": 1.6158059467918623e-05, "loss": 0.583, "step": 183380 }, { "epoch": 2.03, "learning_rate": 1.615713674078011e-05, "loss": 0.5772, "step": 183385 }, { "epoch": 2.03, "learning_rate": 1.61562140136416e-05, "loss": 0.6169, "step": 183390 }, { "epoch": 2.03, "learning_rate": 1.6155291286503087e-05, "loss": 0.6069, "step": 183395 }, { "epoch": 2.03, "learning_rate": 1.6154368559364575e-05, "loss": 0.6442, "step": 183400 }, { "epoch": 2.03, "learning_rate": 1.615344583222606e-05, "loss": 0.5772, "step": 183405 }, { "epoch": 2.03, "learning_rate": 1.615252310508755e-05, "loss": 0.555, "step": 183410 }, { "epoch": 2.03, "learning_rate": 1.6151600377949035e-05, "loss": 0.582, "step": 183415 }, { "epoch": 2.03, "learning_rate": 1.6150677650810526e-05, "loss": 0.5705, "step": 183420 }, { "epoch": 2.03, "learning_rate": 1.614975492367201e-05, "loss": 0.5846, "step": 183425 }, { "epoch": 2.03, "learning_rate": 1.6148832196533502e-05, "loss": 0.627, "step": 183430 }, { "epoch": 2.03, "learning_rate": 1.6147909469394986e-05, "loss": 0.613, "step": 183435 }, { "epoch": 2.03, "learning_rate": 1.6146986742256477e-05, "loss": 0.5841, "step": 183440 }, { "epoch": 2.03, "learning_rate": 1.6146064015117962e-05, "loss": 0.5667, "step": 183445 }, { "epoch": 2.03, "learning_rate": 1.614514128797945e-05, "loss": 0.6041, "step": 183450 }, { "epoch": 2.03, "learning_rate": 1.6144218560840938e-05, "loss": 0.589, "step": 183455 }, { "epoch": 2.03, "learning_rate": 1.6143295833702422e-05, "loss": 0.583, "step": 183460 }, { "epoch": 2.03, "learning_rate": 1.6142373106563913e-05, "loss": 0.6827, "step": 183465 }, { "epoch": 2.03, "learning_rate": 1.6141450379425398e-05, "loss": 0.6003, "step": 183470 }, { "epoch": 2.03, "learning_rate": 1.614052765228689e-05, "loss": 0.5909, "step": 183475 }, { "epoch": 2.03, "learning_rate": 1.6139604925148373e-05, "loss": 0.6267, "step": 183480 }, { "epoch": 2.03, "learning_rate": 1.6138682198009865e-05, "loss": 0.6203, "step": 183485 }, { "epoch": 2.03, "learning_rate": 1.613775947087135e-05, "loss": 0.619, "step": 183490 }, { "epoch": 2.03, "learning_rate": 1.613683674373284e-05, "loss": 0.6111, "step": 183495 }, { "epoch": 2.03, "learning_rate": 1.6135914016594325e-05, "loss": 0.5496, "step": 183500 }, { "epoch": 2.03, "learning_rate": 1.6134991289455813e-05, "loss": 0.6076, "step": 183505 }, { "epoch": 2.03, "learning_rate": 1.61340685623173e-05, "loss": 0.5908, "step": 183510 }, { "epoch": 2.03, "learning_rate": 1.613314583517879e-05, "loss": 0.6017, "step": 183515 }, { "epoch": 2.03, "learning_rate": 1.6132223108040276e-05, "loss": 0.5732, "step": 183520 }, { "epoch": 2.03, "learning_rate": 1.6131300380901764e-05, "loss": 0.6129, "step": 183525 }, { "epoch": 2.03, "learning_rate": 1.6130377653763252e-05, "loss": 0.5822, "step": 183530 }, { "epoch": 2.03, "learning_rate": 1.6129454926624736e-05, "loss": 0.5996, "step": 183535 }, { "epoch": 2.03, "learning_rate": 1.6128532199486228e-05, "loss": 0.6101, "step": 183540 }, { "epoch": 2.03, "learning_rate": 1.6127609472347712e-05, "loss": 0.5726, "step": 183545 }, { "epoch": 2.03, "learning_rate": 1.6126686745209203e-05, "loss": 0.6409, "step": 183550 }, { "epoch": 2.03, "learning_rate": 1.6125764018070688e-05, "loss": 0.5802, "step": 183555 }, { "epoch": 2.03, "learning_rate": 1.6124841290932176e-05, "loss": 0.538, "step": 183560 }, { "epoch": 2.03, "learning_rate": 1.6123918563793664e-05, "loss": 0.6475, "step": 183565 }, { "epoch": 2.03, "learning_rate": 1.612299583665515e-05, "loss": 0.5691, "step": 183570 }, { "epoch": 2.03, "learning_rate": 1.612207310951664e-05, "loss": 0.6386, "step": 183575 }, { "epoch": 2.03, "learning_rate": 1.6121150382378127e-05, "loss": 0.6225, "step": 183580 }, { "epoch": 2.03, "learning_rate": 1.6120227655239615e-05, "loss": 0.5684, "step": 183585 }, { "epoch": 2.03, "learning_rate": 1.6119304928101103e-05, "loss": 0.6071, "step": 183590 }, { "epoch": 2.03, "learning_rate": 1.611838220096259e-05, "loss": 0.5595, "step": 183595 }, { "epoch": 2.03, "learning_rate": 1.611745947382408e-05, "loss": 0.6309, "step": 183600 }, { "epoch": 2.03, "learning_rate": 1.6116536746685566e-05, "loss": 0.6208, "step": 183605 }, { "epoch": 2.03, "learning_rate": 1.611561401954705e-05, "loss": 0.6196, "step": 183610 }, { "epoch": 2.03, "learning_rate": 1.611469129240854e-05, "loss": 0.6135, "step": 183615 }, { "epoch": 2.03, "learning_rate": 1.6113768565270026e-05, "loss": 0.6168, "step": 183620 }, { "epoch": 2.03, "learning_rate": 1.6112845838131514e-05, "loss": 0.6419, "step": 183625 }, { "epoch": 2.03, "learning_rate": 1.6111923110993002e-05, "loss": 0.6359, "step": 183630 }, { "epoch": 2.03, "learning_rate": 1.611100038385449e-05, "loss": 0.5647, "step": 183635 }, { "epoch": 2.03, "learning_rate": 1.6110077656715978e-05, "loss": 0.5974, "step": 183640 }, { "epoch": 2.03, "learning_rate": 1.6109154929577466e-05, "loss": 0.6406, "step": 183645 }, { "epoch": 2.03, "learning_rate": 1.6108232202438954e-05, "loss": 0.6198, "step": 183650 }, { "epoch": 2.03, "learning_rate": 1.610730947530044e-05, "loss": 0.6023, "step": 183655 }, { "epoch": 2.03, "learning_rate": 1.610638674816193e-05, "loss": 0.6713, "step": 183660 }, { "epoch": 2.03, "learning_rate": 1.6105464021023417e-05, "loss": 0.5953, "step": 183665 }, { "epoch": 2.03, "learning_rate": 1.6104541293884905e-05, "loss": 0.6256, "step": 183670 }, { "epoch": 2.03, "learning_rate": 1.6103618566746393e-05, "loss": 0.6074, "step": 183675 }, { "epoch": 2.03, "learning_rate": 1.6102695839607877e-05, "loss": 0.6063, "step": 183680 }, { "epoch": 2.03, "learning_rate": 1.6101773112469365e-05, "loss": 0.6336, "step": 183685 }, { "epoch": 2.03, "learning_rate": 1.6100850385330853e-05, "loss": 0.5776, "step": 183690 }, { "epoch": 2.03, "learning_rate": 1.609992765819234e-05, "loss": 0.6199, "step": 183695 }, { "epoch": 2.03, "learning_rate": 1.609900493105383e-05, "loss": 0.6055, "step": 183700 }, { "epoch": 2.03, "learning_rate": 1.6098082203915317e-05, "loss": 0.5921, "step": 183705 }, { "epoch": 2.03, "learning_rate": 1.6097159476776804e-05, "loss": 0.593, "step": 183710 }, { "epoch": 2.03, "learning_rate": 1.6096236749638292e-05, "loss": 0.6301, "step": 183715 }, { "epoch": 2.03, "learning_rate": 1.609531402249978e-05, "loss": 0.5996, "step": 183720 }, { "epoch": 2.03, "learning_rate": 1.6094391295361268e-05, "loss": 0.6268, "step": 183725 }, { "epoch": 2.03, "learning_rate": 1.6093468568222756e-05, "loss": 0.6184, "step": 183730 }, { "epoch": 2.03, "learning_rate": 1.609254584108424e-05, "loss": 0.6192, "step": 183735 }, { "epoch": 2.03, "learning_rate": 1.609162311394573e-05, "loss": 0.6421, "step": 183740 }, { "epoch": 2.03, "learning_rate": 1.6090700386807216e-05, "loss": 0.6194, "step": 183745 }, { "epoch": 2.03, "learning_rate": 1.6089777659668707e-05, "loss": 0.6525, "step": 183750 }, { "epoch": 2.03, "learning_rate": 1.608885493253019e-05, "loss": 0.6223, "step": 183755 }, { "epoch": 2.03, "learning_rate": 1.608793220539168e-05, "loss": 0.5591, "step": 183760 }, { "epoch": 2.03, "learning_rate": 1.6087009478253167e-05, "loss": 0.592, "step": 183765 }, { "epoch": 2.03, "learning_rate": 1.6086086751114655e-05, "loss": 0.5645, "step": 183770 }, { "epoch": 2.03, "learning_rate": 1.6085164023976143e-05, "loss": 0.5555, "step": 183775 }, { "epoch": 2.03, "learning_rate": 1.608424129683763e-05, "loss": 0.622, "step": 183780 }, { "epoch": 2.04, "learning_rate": 1.608331856969912e-05, "loss": 0.6393, "step": 183785 }, { "epoch": 2.04, "learning_rate": 1.6082395842560603e-05, "loss": 0.6553, "step": 183790 }, { "epoch": 2.04, "learning_rate": 1.6081473115422094e-05, "loss": 0.6166, "step": 183795 }, { "epoch": 2.04, "learning_rate": 1.608055038828358e-05, "loss": 0.5655, "step": 183800 }, { "epoch": 2.04, "learning_rate": 1.607962766114507e-05, "loss": 0.6388, "step": 183805 }, { "epoch": 2.04, "learning_rate": 1.6078704934006555e-05, "loss": 0.593, "step": 183810 }, { "epoch": 2.04, "learning_rate": 1.6077782206868046e-05, "loss": 0.5419, "step": 183815 }, { "epoch": 2.04, "learning_rate": 1.607685947972953e-05, "loss": 0.6634, "step": 183820 }, { "epoch": 2.04, "learning_rate": 1.607593675259102e-05, "loss": 0.6176, "step": 183825 }, { "epoch": 2.04, "learning_rate": 1.6075014025452506e-05, "loss": 0.5998, "step": 183830 }, { "epoch": 2.04, "learning_rate": 1.6074091298313994e-05, "loss": 0.6722, "step": 183835 }, { "epoch": 2.04, "learning_rate": 1.607316857117548e-05, "loss": 0.6077, "step": 183840 }, { "epoch": 2.04, "learning_rate": 1.6072245844036966e-05, "loss": 0.618, "step": 183845 }, { "epoch": 2.04, "learning_rate": 1.6071323116898457e-05, "loss": 0.6037, "step": 183850 }, { "epoch": 2.04, "learning_rate": 1.6070400389759942e-05, "loss": 0.5897, "step": 183855 }, { "epoch": 2.04, "learning_rate": 1.6069477662621433e-05, "loss": 0.6082, "step": 183860 }, { "epoch": 2.04, "learning_rate": 1.6068554935482918e-05, "loss": 0.5981, "step": 183865 }, { "epoch": 2.04, "learning_rate": 1.606763220834441e-05, "loss": 0.6209, "step": 183870 }, { "epoch": 2.04, "learning_rate": 1.6066709481205893e-05, "loss": 0.5524, "step": 183875 }, { "epoch": 2.04, "learning_rate": 1.6065786754067384e-05, "loss": 0.6021, "step": 183880 }, { "epoch": 2.04, "learning_rate": 1.606486402692887e-05, "loss": 0.6188, "step": 183885 }, { "epoch": 2.04, "learning_rate": 1.6063941299790357e-05, "loss": 0.6027, "step": 183890 }, { "epoch": 2.04, "learning_rate": 1.6063018572651845e-05, "loss": 0.5758, "step": 183895 }, { "epoch": 2.04, "learning_rate": 1.6062095845513332e-05, "loss": 0.6093, "step": 183900 }, { "epoch": 2.04, "learning_rate": 1.606117311837482e-05, "loss": 0.6081, "step": 183905 }, { "epoch": 2.04, "learning_rate": 1.6060250391236305e-05, "loss": 0.6434, "step": 183910 }, { "epoch": 2.04, "learning_rate": 1.6059327664097796e-05, "loss": 0.5991, "step": 183915 }, { "epoch": 2.04, "learning_rate": 1.605840493695928e-05, "loss": 0.6557, "step": 183920 }, { "epoch": 2.04, "learning_rate": 1.605748220982077e-05, "loss": 0.5999, "step": 183925 }, { "epoch": 2.04, "learning_rate": 1.6056559482682256e-05, "loss": 0.578, "step": 183930 }, { "epoch": 2.04, "learning_rate": 1.6055636755543747e-05, "loss": 0.6164, "step": 183935 }, { "epoch": 2.04, "learning_rate": 1.6054714028405232e-05, "loss": 0.5985, "step": 183940 }, { "epoch": 2.04, "learning_rate": 1.605379130126672e-05, "loss": 0.5877, "step": 183945 }, { "epoch": 2.04, "learning_rate": 1.6052868574128208e-05, "loss": 0.5983, "step": 183950 }, { "epoch": 2.04, "learning_rate": 1.6051945846989695e-05, "loss": 0.6154, "step": 183955 }, { "epoch": 2.04, "learning_rate": 1.6051023119851183e-05, "loss": 0.6105, "step": 183960 }, { "epoch": 2.04, "learning_rate": 1.605010039271267e-05, "loss": 0.6402, "step": 183965 }, { "epoch": 2.04, "learning_rate": 1.604917766557416e-05, "loss": 0.658, "step": 183970 }, { "epoch": 2.04, "learning_rate": 1.6048254938435647e-05, "loss": 0.587, "step": 183975 }, { "epoch": 2.04, "learning_rate": 1.6047332211297135e-05, "loss": 0.5761, "step": 183980 }, { "epoch": 2.04, "learning_rate": 1.604640948415862e-05, "loss": 0.6474, "step": 183985 }, { "epoch": 2.04, "learning_rate": 1.604548675702011e-05, "loss": 0.6536, "step": 183990 }, { "epoch": 2.04, "learning_rate": 1.6044564029881595e-05, "loss": 0.611, "step": 183995 }, { "epoch": 2.04, "learning_rate": 1.6043641302743083e-05, "loss": 0.5503, "step": 184000 }, { "epoch": 2.04, "eval_loss": 0.5806185007095337, "eval_runtime": 69.3758, "eval_samples_per_second": 28.829, "eval_steps_per_second": 14.414, "step": 184000 }, { "epoch": 2.04, "learning_rate": 1.604271857560457e-05, "loss": 0.617, "step": 184005 }, { "epoch": 2.04, "learning_rate": 1.604179584846606e-05, "loss": 0.6175, "step": 184010 }, { "epoch": 2.04, "learning_rate": 1.6040873121327546e-05, "loss": 0.6268, "step": 184015 }, { "epoch": 2.04, "learning_rate": 1.6039950394189034e-05, "loss": 0.591, "step": 184020 }, { "epoch": 2.04, "learning_rate": 1.6039027667050522e-05, "loss": 0.6111, "step": 184025 }, { "epoch": 2.04, "learning_rate": 1.603810493991201e-05, "loss": 0.6168, "step": 184030 }, { "epoch": 2.04, "learning_rate": 1.6037182212773498e-05, "loss": 0.6106, "step": 184035 }, { "epoch": 2.04, "learning_rate": 1.6036259485634985e-05, "loss": 0.6401, "step": 184040 }, { "epoch": 2.04, "learning_rate": 1.6035336758496473e-05, "loss": 0.6527, "step": 184045 }, { "epoch": 2.04, "learning_rate": 1.603441403135796e-05, "loss": 0.5814, "step": 184050 }, { "epoch": 2.04, "learning_rate": 1.603349130421945e-05, "loss": 0.6336, "step": 184055 }, { "epoch": 2.04, "learning_rate": 1.6032568577080933e-05, "loss": 0.6287, "step": 184060 }, { "epoch": 2.04, "learning_rate": 1.603164584994242e-05, "loss": 0.5891, "step": 184065 }, { "epoch": 2.04, "learning_rate": 1.603072312280391e-05, "loss": 0.6051, "step": 184070 }, { "epoch": 2.04, "learning_rate": 1.6029800395665397e-05, "loss": 0.6191, "step": 184075 }, { "epoch": 2.04, "learning_rate": 1.6028877668526885e-05, "loss": 0.5743, "step": 184080 }, { "epoch": 2.04, "learning_rate": 1.6027954941388373e-05, "loss": 0.6624, "step": 184085 }, { "epoch": 2.04, "learning_rate": 1.602703221424986e-05, "loss": 0.5912, "step": 184090 }, { "epoch": 2.04, "learning_rate": 1.602610948711135e-05, "loss": 0.5885, "step": 184095 }, { "epoch": 2.04, "learning_rate": 1.6025186759972836e-05, "loss": 0.5884, "step": 184100 }, { "epoch": 2.04, "learning_rate": 1.6024264032834324e-05, "loss": 0.5976, "step": 184105 }, { "epoch": 2.04, "learning_rate": 1.6023341305695812e-05, "loss": 0.5512, "step": 184110 }, { "epoch": 2.04, "learning_rate": 1.60224185785573e-05, "loss": 0.6173, "step": 184115 }, { "epoch": 2.04, "learning_rate": 1.6021495851418784e-05, "loss": 0.6638, "step": 184120 }, { "epoch": 2.04, "learning_rate": 1.6020573124280275e-05, "loss": 0.6246, "step": 184125 }, { "epoch": 2.04, "learning_rate": 1.601965039714176e-05, "loss": 0.6275, "step": 184130 }, { "epoch": 2.04, "learning_rate": 1.601872767000325e-05, "loss": 0.5645, "step": 184135 }, { "epoch": 2.04, "learning_rate": 1.6017804942864736e-05, "loss": 0.6125, "step": 184140 }, { "epoch": 2.04, "learning_rate": 1.6016882215726223e-05, "loss": 0.6157, "step": 184145 }, { "epoch": 2.04, "learning_rate": 1.601595948858771e-05, "loss": 0.5821, "step": 184150 }, { "epoch": 2.04, "learning_rate": 1.60150367614492e-05, "loss": 0.5523, "step": 184155 }, { "epoch": 2.04, "learning_rate": 1.6014114034310687e-05, "loss": 0.6121, "step": 184160 }, { "epoch": 2.04, "learning_rate": 1.6013191307172175e-05, "loss": 0.6695, "step": 184165 }, { "epoch": 2.04, "learning_rate": 1.6012268580033663e-05, "loss": 0.6187, "step": 184170 }, { "epoch": 2.04, "learning_rate": 1.6011345852895147e-05, "loss": 0.5414, "step": 184175 }, { "epoch": 2.04, "learning_rate": 1.601042312575664e-05, "loss": 0.5607, "step": 184180 }, { "epoch": 2.04, "learning_rate": 1.6009500398618123e-05, "loss": 0.5617, "step": 184185 }, { "epoch": 2.04, "learning_rate": 1.6008577671479614e-05, "loss": 0.5786, "step": 184190 }, { "epoch": 2.04, "learning_rate": 1.60076549443411e-05, "loss": 0.6494, "step": 184195 }, { "epoch": 2.04, "learning_rate": 1.600673221720259e-05, "loss": 0.571, "step": 184200 }, { "epoch": 2.04, "learning_rate": 1.6005809490064074e-05, "loss": 0.5793, "step": 184205 }, { "epoch": 2.04, "learning_rate": 1.6004886762925566e-05, "loss": 0.6276, "step": 184210 }, { "epoch": 2.04, "learning_rate": 1.600396403578705e-05, "loss": 0.6241, "step": 184215 }, { "epoch": 2.04, "learning_rate": 1.6003041308648538e-05, "loss": 0.5946, "step": 184220 }, { "epoch": 2.04, "learning_rate": 1.6002118581510026e-05, "loss": 0.5896, "step": 184225 }, { "epoch": 2.04, "learning_rate": 1.600119585437151e-05, "loss": 0.6173, "step": 184230 }, { "epoch": 2.04, "learning_rate": 1.6000273127233e-05, "loss": 0.6149, "step": 184235 }, { "epoch": 2.04, "learning_rate": 1.5999350400094486e-05, "loss": 0.612, "step": 184240 }, { "epoch": 2.04, "learning_rate": 1.5998427672955977e-05, "loss": 0.577, "step": 184245 }, { "epoch": 2.04, "learning_rate": 1.599750494581746e-05, "loss": 0.6243, "step": 184250 }, { "epoch": 2.04, "learning_rate": 1.5996582218678953e-05, "loss": 0.6259, "step": 184255 }, { "epoch": 2.04, "learning_rate": 1.5995659491540437e-05, "loss": 0.6641, "step": 184260 }, { "epoch": 2.04, "learning_rate": 1.599473676440193e-05, "loss": 0.6338, "step": 184265 }, { "epoch": 2.04, "learning_rate": 1.5993814037263413e-05, "loss": 0.5903, "step": 184270 }, { "epoch": 2.04, "learning_rate": 1.59928913101249e-05, "loss": 0.611, "step": 184275 }, { "epoch": 2.04, "learning_rate": 1.599196858298639e-05, "loss": 0.5681, "step": 184280 }, { "epoch": 2.04, "learning_rate": 1.5991045855847876e-05, "loss": 0.572, "step": 184285 }, { "epoch": 2.04, "learning_rate": 1.5990123128709364e-05, "loss": 0.6237, "step": 184290 }, { "epoch": 2.04, "learning_rate": 1.598920040157085e-05, "loss": 0.6343, "step": 184295 }, { "epoch": 2.04, "learning_rate": 1.598827767443234e-05, "loss": 0.6143, "step": 184300 }, { "epoch": 2.04, "learning_rate": 1.5987354947293824e-05, "loss": 0.6356, "step": 184305 }, { "epoch": 2.04, "learning_rate": 1.5986432220155316e-05, "loss": 0.6171, "step": 184310 }, { "epoch": 2.04, "learning_rate": 1.59855094930168e-05, "loss": 0.5855, "step": 184315 }, { "epoch": 2.04, "learning_rate": 1.598458676587829e-05, "loss": 0.5908, "step": 184320 }, { "epoch": 2.04, "learning_rate": 1.5983664038739776e-05, "loss": 0.6268, "step": 184325 }, { "epoch": 2.04, "learning_rate": 1.5982741311601264e-05, "loss": 0.5717, "step": 184330 }, { "epoch": 2.04, "learning_rate": 1.598181858446275e-05, "loss": 0.6095, "step": 184335 }, { "epoch": 2.04, "learning_rate": 1.598089585732424e-05, "loss": 0.5825, "step": 184340 }, { "epoch": 2.04, "learning_rate": 1.5979973130185727e-05, "loss": 0.6108, "step": 184345 }, { "epoch": 2.04, "learning_rate": 1.5979050403047215e-05, "loss": 0.6046, "step": 184350 }, { "epoch": 2.04, "learning_rate": 1.5978127675908703e-05, "loss": 0.5743, "step": 184355 }, { "epoch": 2.04, "learning_rate": 1.597720494877019e-05, "loss": 0.6568, "step": 184360 }, { "epoch": 2.04, "learning_rate": 1.597628222163168e-05, "loss": 0.6561, "step": 184365 }, { "epoch": 2.04, "learning_rate": 1.5975359494493163e-05, "loss": 0.6176, "step": 184370 }, { "epoch": 2.04, "learning_rate": 1.5974436767354654e-05, "loss": 0.5676, "step": 184375 }, { "epoch": 2.04, "learning_rate": 1.597351404021614e-05, "loss": 0.5957, "step": 184380 }, { "epoch": 2.04, "learning_rate": 1.5972591313077627e-05, "loss": 0.5728, "step": 184385 }, { "epoch": 2.04, "learning_rate": 1.5971668585939115e-05, "loss": 0.6288, "step": 184390 }, { "epoch": 2.04, "learning_rate": 1.5970745858800602e-05, "loss": 0.5998, "step": 184395 }, { "epoch": 2.04, "learning_rate": 1.596982313166209e-05, "loss": 0.6052, "step": 184400 }, { "epoch": 2.04, "learning_rate": 1.5968900404523578e-05, "loss": 0.6185, "step": 184405 }, { "epoch": 2.04, "learning_rate": 1.5967977677385066e-05, "loss": 0.5764, "step": 184410 }, { "epoch": 2.04, "learning_rate": 1.5967054950246554e-05, "loss": 0.6026, "step": 184415 }, { "epoch": 2.04, "learning_rate": 1.596613222310804e-05, "loss": 0.6214, "step": 184420 }, { "epoch": 2.04, "learning_rate": 1.596520949596953e-05, "loss": 0.5344, "step": 184425 }, { "epoch": 2.04, "learning_rate": 1.5964286768831017e-05, "loss": 0.6148, "step": 184430 }, { "epoch": 2.04, "learning_rate": 1.5963364041692505e-05, "loss": 0.5844, "step": 184435 }, { "epoch": 2.04, "learning_rate": 1.5962441314553993e-05, "loss": 0.6559, "step": 184440 }, { "epoch": 2.04, "learning_rate": 1.5961518587415477e-05, "loss": 0.5877, "step": 184445 }, { "epoch": 2.04, "learning_rate": 1.5960595860276965e-05, "loss": 0.5483, "step": 184450 }, { "epoch": 2.04, "learning_rate": 1.5959673133138453e-05, "loss": 0.6287, "step": 184455 }, { "epoch": 2.04, "learning_rate": 1.595875040599994e-05, "loss": 0.613, "step": 184460 }, { "epoch": 2.04, "learning_rate": 1.595782767886143e-05, "loss": 0.6209, "step": 184465 }, { "epoch": 2.04, "learning_rate": 1.5956904951722917e-05, "loss": 0.6194, "step": 184470 }, { "epoch": 2.04, "learning_rate": 1.5955982224584405e-05, "loss": 0.5921, "step": 184475 }, { "epoch": 2.04, "learning_rate": 1.5955059497445892e-05, "loss": 0.5692, "step": 184480 }, { "epoch": 2.04, "learning_rate": 1.595413677030738e-05, "loss": 0.6105, "step": 184485 }, { "epoch": 2.04, "learning_rate": 1.5953214043168868e-05, "loss": 0.5868, "step": 184490 }, { "epoch": 2.04, "learning_rate": 1.5952291316030356e-05, "loss": 0.5974, "step": 184495 }, { "epoch": 2.04, "learning_rate": 1.5951368588891844e-05, "loss": 0.6443, "step": 184500 }, { "epoch": 2.04, "learning_rate": 1.5950445861753328e-05, "loss": 0.5926, "step": 184505 }, { "epoch": 2.04, "learning_rate": 1.594952313461482e-05, "loss": 0.5774, "step": 184510 }, { "epoch": 2.04, "learning_rate": 1.5948600407476304e-05, "loss": 0.5289, "step": 184515 }, { "epoch": 2.04, "learning_rate": 1.5947677680337792e-05, "loss": 0.6084, "step": 184520 }, { "epoch": 2.04, "learning_rate": 1.594675495319928e-05, "loss": 0.6023, "step": 184525 }, { "epoch": 2.04, "learning_rate": 1.5945832226060767e-05, "loss": 0.5909, "step": 184530 }, { "epoch": 2.04, "learning_rate": 1.5944909498922255e-05, "loss": 0.5837, "step": 184535 }, { "epoch": 2.04, "learning_rate": 1.5943986771783743e-05, "loss": 0.5751, "step": 184540 }, { "epoch": 2.04, "learning_rate": 1.594306404464523e-05, "loss": 0.6213, "step": 184545 }, { "epoch": 2.04, "learning_rate": 1.594214131750672e-05, "loss": 0.644, "step": 184550 }, { "epoch": 2.04, "learning_rate": 1.5941218590368207e-05, "loss": 0.5954, "step": 184555 }, { "epoch": 2.04, "learning_rate": 1.594029586322969e-05, "loss": 0.555, "step": 184560 }, { "epoch": 2.04, "learning_rate": 1.5939373136091182e-05, "loss": 0.5386, "step": 184565 }, { "epoch": 2.04, "learning_rate": 1.5938450408952667e-05, "loss": 0.5712, "step": 184570 }, { "epoch": 2.04, "learning_rate": 1.5937527681814158e-05, "loss": 0.588, "step": 184575 }, { "epoch": 2.04, "learning_rate": 1.5936604954675643e-05, "loss": 0.6514, "step": 184580 }, { "epoch": 2.04, "learning_rate": 1.5935682227537134e-05, "loss": 0.5801, "step": 184585 }, { "epoch": 2.04, "learning_rate": 1.5934759500398618e-05, "loss": 0.5883, "step": 184590 }, { "epoch": 2.04, "learning_rate": 1.5933836773260106e-05, "loss": 0.5397, "step": 184595 }, { "epoch": 2.04, "learning_rate": 1.5932914046121594e-05, "loss": 0.5899, "step": 184600 }, { "epoch": 2.04, "learning_rate": 1.5931991318983082e-05, "loss": 0.6297, "step": 184605 }, { "epoch": 2.04, "learning_rate": 1.593106859184457e-05, "loss": 0.613, "step": 184610 }, { "epoch": 2.04, "learning_rate": 1.5930145864706058e-05, "loss": 0.6447, "step": 184615 }, { "epoch": 2.04, "learning_rate": 1.5929223137567545e-05, "loss": 0.6154, "step": 184620 }, { "epoch": 2.04, "learning_rate": 1.592830041042903e-05, "loss": 0.5545, "step": 184625 }, { "epoch": 2.04, "learning_rate": 1.592737768329052e-05, "loss": 0.6044, "step": 184630 }, { "epoch": 2.04, "learning_rate": 1.5926454956152006e-05, "loss": 0.6068, "step": 184635 }, { "epoch": 2.04, "learning_rate": 1.5925532229013497e-05, "loss": 0.6182, "step": 184640 }, { "epoch": 2.04, "learning_rate": 1.592460950187498e-05, "loss": 0.6485, "step": 184645 }, { "epoch": 2.04, "learning_rate": 1.5923686774736472e-05, "loss": 0.5646, "step": 184650 }, { "epoch": 2.04, "learning_rate": 1.5922764047597957e-05, "loss": 0.6519, "step": 184655 }, { "epoch": 2.04, "learning_rate": 1.5921841320459445e-05, "loss": 0.6148, "step": 184660 }, { "epoch": 2.04, "learning_rate": 1.5920918593320933e-05, "loss": 0.574, "step": 184665 }, { "epoch": 2.04, "learning_rate": 1.591999586618242e-05, "loss": 0.6278, "step": 184670 }, { "epoch": 2.04, "learning_rate": 1.591907313904391e-05, "loss": 0.5743, "step": 184675 }, { "epoch": 2.04, "learning_rate": 1.5918150411905393e-05, "loss": 0.5902, "step": 184680 }, { "epoch": 2.04, "learning_rate": 1.5917227684766884e-05, "loss": 0.6617, "step": 184685 }, { "epoch": 2.05, "learning_rate": 1.591630495762837e-05, "loss": 0.5777, "step": 184690 }, { "epoch": 2.05, "learning_rate": 1.591538223048986e-05, "loss": 0.6303, "step": 184695 }, { "epoch": 2.05, "learning_rate": 1.5914459503351344e-05, "loss": 0.6301, "step": 184700 }, { "epoch": 2.05, "learning_rate": 1.5913536776212835e-05, "loss": 0.6097, "step": 184705 }, { "epoch": 2.05, "learning_rate": 1.591261404907432e-05, "loss": 0.6132, "step": 184710 }, { "epoch": 2.05, "learning_rate": 1.5911691321935808e-05, "loss": 0.6121, "step": 184715 }, { "epoch": 2.05, "learning_rate": 1.5910768594797296e-05, "loss": 0.6615, "step": 184720 }, { "epoch": 2.05, "learning_rate": 1.5909845867658783e-05, "loss": 0.6555, "step": 184725 }, { "epoch": 2.05, "learning_rate": 1.590892314052027e-05, "loss": 0.5961, "step": 184730 }, { "epoch": 2.05, "learning_rate": 1.590800041338176e-05, "loss": 0.6005, "step": 184735 }, { "epoch": 2.05, "learning_rate": 1.5907077686243247e-05, "loss": 0.5894, "step": 184740 }, { "epoch": 2.05, "learning_rate": 1.590615495910473e-05, "loss": 0.6571, "step": 184745 }, { "epoch": 2.05, "learning_rate": 1.5905232231966223e-05, "loss": 0.5835, "step": 184750 }, { "epoch": 2.05, "learning_rate": 1.5904309504827707e-05, "loss": 0.687, "step": 184755 }, { "epoch": 2.05, "learning_rate": 1.59033867776892e-05, "loss": 0.6131, "step": 184760 }, { "epoch": 2.05, "learning_rate": 1.5902464050550683e-05, "loss": 0.6192, "step": 184765 }, { "epoch": 2.05, "learning_rate": 1.5901541323412174e-05, "loss": 0.6198, "step": 184770 }, { "epoch": 2.05, "learning_rate": 1.590061859627366e-05, "loss": 0.6081, "step": 184775 }, { "epoch": 2.05, "learning_rate": 1.5899695869135146e-05, "loss": 0.5986, "step": 184780 }, { "epoch": 2.05, "learning_rate": 1.5898773141996634e-05, "loss": 0.567, "step": 184785 }, { "epoch": 2.05, "learning_rate": 1.5897850414858122e-05, "loss": 0.5711, "step": 184790 }, { "epoch": 2.05, "learning_rate": 1.589692768771961e-05, "loss": 0.6223, "step": 184795 }, { "epoch": 2.05, "learning_rate": 1.5896004960581098e-05, "loss": 0.6008, "step": 184800 }, { "epoch": 2.05, "learning_rate": 1.5895082233442586e-05, "loss": 0.6094, "step": 184805 }, { "epoch": 2.05, "learning_rate": 1.5894159506304073e-05, "loss": 0.5981, "step": 184810 }, { "epoch": 2.05, "learning_rate": 1.589323677916556e-05, "loss": 0.6792, "step": 184815 }, { "epoch": 2.05, "learning_rate": 1.5892314052027046e-05, "loss": 0.6463, "step": 184820 }, { "epoch": 2.05, "learning_rate": 1.5891391324888537e-05, "loss": 0.6368, "step": 184825 }, { "epoch": 2.05, "learning_rate": 1.589046859775002e-05, "loss": 0.6204, "step": 184830 }, { "epoch": 2.05, "learning_rate": 1.588954587061151e-05, "loss": 0.6363, "step": 184835 }, { "epoch": 2.05, "learning_rate": 1.5888623143472997e-05, "loss": 0.6383, "step": 184840 }, { "epoch": 2.05, "learning_rate": 1.5887700416334485e-05, "loss": 0.6754, "step": 184845 }, { "epoch": 2.05, "learning_rate": 1.5886777689195973e-05, "loss": 0.6023, "step": 184850 }, { "epoch": 2.05, "learning_rate": 1.588585496205746e-05, "loss": 0.6464, "step": 184855 }, { "epoch": 2.05, "learning_rate": 1.588493223491895e-05, "loss": 0.6275, "step": 184860 }, { "epoch": 2.05, "learning_rate": 1.5884009507780436e-05, "loss": 0.608, "step": 184865 }, { "epoch": 2.05, "learning_rate": 1.5883086780641924e-05, "loss": 0.6061, "step": 184870 }, { "epoch": 2.05, "learning_rate": 1.5882164053503412e-05, "loss": 0.6151, "step": 184875 }, { "epoch": 2.05, "learning_rate": 1.58812413263649e-05, "loss": 0.5875, "step": 184880 }, { "epoch": 2.05, "learning_rate": 1.5880318599226388e-05, "loss": 0.5777, "step": 184885 }, { "epoch": 2.05, "learning_rate": 1.5879395872087872e-05, "loss": 0.6699, "step": 184890 }, { "epoch": 2.05, "learning_rate": 1.587847314494936e-05, "loss": 0.6877, "step": 184895 }, { "epoch": 2.05, "learning_rate": 1.5877550417810848e-05, "loss": 0.6378, "step": 184900 }, { "epoch": 2.05, "learning_rate": 1.5876627690672336e-05, "loss": 0.5876, "step": 184905 }, { "epoch": 2.05, "learning_rate": 1.5875704963533824e-05, "loss": 0.6117, "step": 184910 }, { "epoch": 2.05, "learning_rate": 1.587478223639531e-05, "loss": 0.6133, "step": 184915 }, { "epoch": 2.05, "learning_rate": 1.58738595092568e-05, "loss": 0.631, "step": 184920 }, { "epoch": 2.05, "learning_rate": 1.5872936782118287e-05, "loss": 0.6346, "step": 184925 }, { "epoch": 2.05, "learning_rate": 1.5872014054979775e-05, "loss": 0.6648, "step": 184930 }, { "epoch": 2.05, "learning_rate": 1.5871091327841263e-05, "loss": 0.6183, "step": 184935 }, { "epoch": 2.05, "learning_rate": 1.587016860070275e-05, "loss": 0.6069, "step": 184940 }, { "epoch": 2.05, "learning_rate": 1.5869245873564235e-05, "loss": 0.5692, "step": 184945 }, { "epoch": 2.05, "learning_rate": 1.5868323146425726e-05, "loss": 0.615, "step": 184950 }, { "epoch": 2.05, "learning_rate": 1.586740041928721e-05, "loss": 0.5912, "step": 184955 }, { "epoch": 2.05, "learning_rate": 1.5866477692148702e-05, "loss": 0.6145, "step": 184960 }, { "epoch": 2.05, "learning_rate": 1.5865554965010187e-05, "loss": 0.6635, "step": 184965 }, { "epoch": 2.05, "learning_rate": 1.5864632237871678e-05, "loss": 0.6316, "step": 184970 }, { "epoch": 2.05, "learning_rate": 1.5863709510733162e-05, "loss": 0.6216, "step": 184975 }, { "epoch": 2.05, "learning_rate": 1.586278678359465e-05, "loss": 0.6085, "step": 184980 }, { "epoch": 2.05, "learning_rate": 1.5861864056456138e-05, "loss": 0.6036, "step": 184985 }, { "epoch": 2.05, "learning_rate": 1.5860941329317626e-05, "loss": 0.5919, "step": 184990 }, { "epoch": 2.05, "learning_rate": 1.5860018602179114e-05, "loss": 0.5697, "step": 184995 }, { "epoch": 2.05, "learning_rate": 1.58590958750406e-05, "loss": 0.6231, "step": 185000 }, { "epoch": 2.05, "eval_loss": 0.5698525309562683, "eval_runtime": 69.1383, "eval_samples_per_second": 28.928, "eval_steps_per_second": 14.464, "step": 185000 }, { "epoch": 2.05, "learning_rate": 1.585817314790209e-05, "loss": 0.5782, "step": 185005 }, { "epoch": 2.05, "learning_rate": 1.5857250420763574e-05, "loss": 0.6137, "step": 185010 }, { "epoch": 2.05, "learning_rate": 1.5856327693625065e-05, "loss": 0.5847, "step": 185015 }, { "epoch": 2.05, "learning_rate": 1.585540496648655e-05, "loss": 0.6452, "step": 185020 }, { "epoch": 2.05, "learning_rate": 1.585448223934804e-05, "loss": 0.5986, "step": 185025 }, { "epoch": 2.05, "learning_rate": 1.5853559512209525e-05, "loss": 0.5819, "step": 185030 }, { "epoch": 2.05, "learning_rate": 1.5852636785071016e-05, "loss": 0.6522, "step": 185035 }, { "epoch": 2.05, "learning_rate": 1.58517140579325e-05, "loss": 0.5668, "step": 185040 }, { "epoch": 2.05, "learning_rate": 1.585079133079399e-05, "loss": 0.5844, "step": 185045 }, { "epoch": 2.05, "learning_rate": 1.5849868603655477e-05, "loss": 0.5636, "step": 185050 }, { "epoch": 2.05, "learning_rate": 1.5848945876516965e-05, "loss": 0.6432, "step": 185055 }, { "epoch": 2.05, "learning_rate": 1.5848023149378452e-05, "loss": 0.6364, "step": 185060 }, { "epoch": 2.05, "learning_rate": 1.5847100422239937e-05, "loss": 0.5573, "step": 185065 }, { "epoch": 2.05, "learning_rate": 1.5846177695101428e-05, "loss": 0.546, "step": 185070 }, { "epoch": 2.05, "learning_rate": 1.5845254967962913e-05, "loss": 0.5711, "step": 185075 }, { "epoch": 2.05, "learning_rate": 1.5844332240824404e-05, "loss": 0.6267, "step": 185080 }, { "epoch": 2.05, "learning_rate": 1.5843409513685888e-05, "loss": 0.6055, "step": 185085 }, { "epoch": 2.05, "learning_rate": 1.584248678654738e-05, "loss": 0.6541, "step": 185090 }, { "epoch": 2.05, "learning_rate": 1.5841564059408864e-05, "loss": 0.5946, "step": 185095 }, { "epoch": 2.05, "learning_rate": 1.5840641332270352e-05, "loss": 0.6446, "step": 185100 }, { "epoch": 2.05, "learning_rate": 1.583971860513184e-05, "loss": 0.6345, "step": 185105 }, { "epoch": 2.05, "learning_rate": 1.5838795877993327e-05, "loss": 0.5953, "step": 185110 }, { "epoch": 2.05, "learning_rate": 1.5837873150854815e-05, "loss": 0.5756, "step": 185115 }, { "epoch": 2.05, "learning_rate": 1.5836950423716303e-05, "loss": 0.5645, "step": 185120 }, { "epoch": 2.05, "learning_rate": 1.583602769657779e-05, "loss": 0.5921, "step": 185125 }, { "epoch": 2.05, "learning_rate": 1.5835104969439275e-05, "loss": 0.618, "step": 185130 }, { "epoch": 2.05, "learning_rate": 1.5834182242300767e-05, "loss": 0.5721, "step": 185135 }, { "epoch": 2.05, "learning_rate": 1.583325951516225e-05, "loss": 0.5692, "step": 185140 }, { "epoch": 2.05, "learning_rate": 1.5832336788023742e-05, "loss": 0.5784, "step": 185145 }, { "epoch": 2.05, "learning_rate": 1.5831414060885227e-05, "loss": 0.625, "step": 185150 }, { "epoch": 2.05, "learning_rate": 1.5830491333746718e-05, "loss": 0.5774, "step": 185155 }, { "epoch": 2.05, "learning_rate": 1.5829568606608203e-05, "loss": 0.5764, "step": 185160 }, { "epoch": 2.05, "learning_rate": 1.582864587946969e-05, "loss": 0.609, "step": 185165 }, { "epoch": 2.05, "learning_rate": 1.5827723152331178e-05, "loss": 0.6432, "step": 185170 }, { "epoch": 2.05, "learning_rate": 1.5826800425192666e-05, "loss": 0.5663, "step": 185175 }, { "epoch": 2.05, "learning_rate": 1.5825877698054154e-05, "loss": 0.553, "step": 185180 }, { "epoch": 2.05, "learning_rate": 1.5824954970915642e-05, "loss": 0.6197, "step": 185185 }, { "epoch": 2.05, "learning_rate": 1.582403224377713e-05, "loss": 0.6114, "step": 185190 }, { "epoch": 2.05, "learning_rate": 1.5823109516638617e-05, "loss": 0.5724, "step": 185195 }, { "epoch": 2.05, "learning_rate": 1.5822186789500105e-05, "loss": 0.5605, "step": 185200 }, { "epoch": 2.05, "learning_rate": 1.582126406236159e-05, "loss": 0.5714, "step": 185205 }, { "epoch": 2.05, "learning_rate": 1.582034133522308e-05, "loss": 0.6492, "step": 185210 }, { "epoch": 2.05, "learning_rate": 1.5819418608084565e-05, "loss": 0.6042, "step": 185215 }, { "epoch": 2.05, "learning_rate": 1.5818495880946053e-05, "loss": 0.5482, "step": 185220 }, { "epoch": 2.05, "learning_rate": 1.581757315380754e-05, "loss": 0.6326, "step": 185225 }, { "epoch": 2.05, "learning_rate": 1.581665042666903e-05, "loss": 0.6134, "step": 185230 }, { "epoch": 2.05, "learning_rate": 1.5815727699530517e-05, "loss": 0.5976, "step": 185235 }, { "epoch": 2.05, "learning_rate": 1.5814804972392005e-05, "loss": 0.5949, "step": 185240 }, { "epoch": 2.05, "learning_rate": 1.5813882245253493e-05, "loss": 0.6042, "step": 185245 }, { "epoch": 2.05, "learning_rate": 1.581295951811498e-05, "loss": 0.6176, "step": 185250 }, { "epoch": 2.05, "learning_rate": 1.5812036790976468e-05, "loss": 0.6287, "step": 185255 }, { "epoch": 2.05, "learning_rate": 1.5811114063837956e-05, "loss": 0.5596, "step": 185260 }, { "epoch": 2.05, "learning_rate": 1.5810191336699444e-05, "loss": 0.6492, "step": 185265 }, { "epoch": 2.05, "learning_rate": 1.5809268609560932e-05, "loss": 0.5893, "step": 185270 }, { "epoch": 2.05, "learning_rate": 1.5808345882422416e-05, "loss": 0.5973, "step": 185275 }, { "epoch": 2.05, "learning_rate": 1.5807423155283904e-05, "loss": 0.6055, "step": 185280 }, { "epoch": 2.05, "learning_rate": 1.5806500428145392e-05, "loss": 0.5971, "step": 185285 }, { "epoch": 2.05, "learning_rate": 1.580557770100688e-05, "loss": 0.5924, "step": 185290 }, { "epoch": 2.05, "learning_rate": 1.5804654973868368e-05, "loss": 0.5826, "step": 185295 }, { "epoch": 2.05, "learning_rate": 1.5803732246729856e-05, "loss": 0.659, "step": 185300 }, { "epoch": 2.05, "learning_rate": 1.5802809519591343e-05, "loss": 0.5978, "step": 185305 }, { "epoch": 2.05, "learning_rate": 1.580188679245283e-05, "loss": 0.6241, "step": 185310 }, { "epoch": 2.05, "learning_rate": 1.580096406531432e-05, "loss": 0.5964, "step": 185315 }, { "epoch": 2.05, "learning_rate": 1.5800041338175807e-05, "loss": 0.6482, "step": 185320 }, { "epoch": 2.05, "learning_rate": 1.5799118611037295e-05, "loss": 0.603, "step": 185325 }, { "epoch": 2.05, "learning_rate": 1.579819588389878e-05, "loss": 0.6276, "step": 185330 }, { "epoch": 2.05, "learning_rate": 1.579727315676027e-05, "loss": 0.5896, "step": 185335 }, { "epoch": 2.05, "learning_rate": 1.5796350429621755e-05, "loss": 0.5799, "step": 185340 }, { "epoch": 2.05, "learning_rate": 1.5795427702483246e-05, "loss": 0.6126, "step": 185345 }, { "epoch": 2.05, "learning_rate": 1.579450497534473e-05, "loss": 0.6744, "step": 185350 }, { "epoch": 2.05, "learning_rate": 1.579358224820622e-05, "loss": 0.6505, "step": 185355 }, { "epoch": 2.05, "learning_rate": 1.5792659521067706e-05, "loss": 0.5697, "step": 185360 }, { "epoch": 2.05, "learning_rate": 1.5791736793929194e-05, "loss": 0.5916, "step": 185365 }, { "epoch": 2.05, "learning_rate": 1.5790814066790682e-05, "loss": 0.5745, "step": 185370 }, { "epoch": 2.05, "learning_rate": 1.578989133965217e-05, "loss": 0.5806, "step": 185375 }, { "epoch": 2.05, "learning_rate": 1.5788968612513658e-05, "loss": 0.5774, "step": 185380 }, { "epoch": 2.05, "learning_rate": 1.5788045885375146e-05, "loss": 0.6064, "step": 185385 }, { "epoch": 2.05, "learning_rate": 1.5787123158236633e-05, "loss": 0.6083, "step": 185390 }, { "epoch": 2.05, "learning_rate": 1.5786200431098118e-05, "loss": 0.6379, "step": 185395 }, { "epoch": 2.05, "learning_rate": 1.578527770395961e-05, "loss": 0.6125, "step": 185400 }, { "epoch": 2.05, "learning_rate": 1.5784354976821094e-05, "loss": 0.5669, "step": 185405 }, { "epoch": 2.05, "learning_rate": 1.5783432249682585e-05, "loss": 0.5609, "step": 185410 }, { "epoch": 2.05, "learning_rate": 1.578250952254407e-05, "loss": 0.5811, "step": 185415 }, { "epoch": 2.05, "learning_rate": 1.578158679540556e-05, "loss": 0.6134, "step": 185420 }, { "epoch": 2.05, "learning_rate": 1.5780664068267045e-05, "loss": 0.6136, "step": 185425 }, { "epoch": 2.05, "learning_rate": 1.5779741341128533e-05, "loss": 0.5825, "step": 185430 }, { "epoch": 2.05, "learning_rate": 1.577881861399002e-05, "loss": 0.598, "step": 185435 }, { "epoch": 2.05, "learning_rate": 1.577789588685151e-05, "loss": 0.6904, "step": 185440 }, { "epoch": 2.05, "learning_rate": 1.5776973159712996e-05, "loss": 0.5743, "step": 185445 }, { "epoch": 2.05, "learning_rate": 1.577605043257448e-05, "loss": 0.5882, "step": 185450 }, { "epoch": 2.05, "learning_rate": 1.5775127705435972e-05, "loss": 0.6043, "step": 185455 }, { "epoch": 2.05, "learning_rate": 1.5774204978297457e-05, "loss": 0.6221, "step": 185460 }, { "epoch": 2.05, "learning_rate": 1.5773282251158948e-05, "loss": 0.6261, "step": 185465 }, { "epoch": 2.05, "learning_rate": 1.5772359524020432e-05, "loss": 0.5827, "step": 185470 }, { "epoch": 2.05, "learning_rate": 1.5771436796881923e-05, "loss": 0.6331, "step": 185475 }, { "epoch": 2.05, "learning_rate": 1.5770514069743408e-05, "loss": 0.6122, "step": 185480 }, { "epoch": 2.05, "learning_rate": 1.5769591342604896e-05, "loss": 0.6072, "step": 185485 }, { "epoch": 2.05, "learning_rate": 1.5768668615466384e-05, "loss": 0.6124, "step": 185490 }, { "epoch": 2.05, "learning_rate": 1.576774588832787e-05, "loss": 0.6204, "step": 185495 }, { "epoch": 2.05, "learning_rate": 1.576682316118936e-05, "loss": 0.5555, "step": 185500 }, { "epoch": 2.05, "learning_rate": 1.5765900434050844e-05, "loss": 0.6258, "step": 185505 }, { "epoch": 2.05, "learning_rate": 1.5764977706912335e-05, "loss": 0.6007, "step": 185510 }, { "epoch": 2.05, "learning_rate": 1.576405497977382e-05, "loss": 0.6668, "step": 185515 }, { "epoch": 2.05, "learning_rate": 1.576313225263531e-05, "loss": 0.6396, "step": 185520 }, { "epoch": 2.05, "learning_rate": 1.5762209525496795e-05, "loss": 0.6005, "step": 185525 }, { "epoch": 2.05, "learning_rate": 1.5761286798358286e-05, "loss": 0.6466, "step": 185530 }, { "epoch": 2.05, "learning_rate": 1.576036407121977e-05, "loss": 0.5733, "step": 185535 }, { "epoch": 2.05, "learning_rate": 1.5759441344081262e-05, "loss": 0.5942, "step": 185540 }, { "epoch": 2.05, "learning_rate": 1.5758518616942747e-05, "loss": 0.5558, "step": 185545 }, { "epoch": 2.05, "learning_rate": 1.5757595889804234e-05, "loss": 0.6482, "step": 185550 }, { "epoch": 2.05, "learning_rate": 1.5756673162665722e-05, "loss": 0.5677, "step": 185555 }, { "epoch": 2.05, "learning_rate": 1.575575043552721e-05, "loss": 0.574, "step": 185560 }, { "epoch": 2.05, "learning_rate": 1.5754827708388698e-05, "loss": 0.5438, "step": 185565 }, { "epoch": 2.05, "learning_rate": 1.5753904981250186e-05, "loss": 0.6748, "step": 185570 }, { "epoch": 2.05, "learning_rate": 1.5752982254111674e-05, "loss": 0.5764, "step": 185575 }, { "epoch": 2.05, "learning_rate": 1.5752059526973158e-05, "loss": 0.5965, "step": 185580 }, { "epoch": 2.05, "learning_rate": 1.575113679983465e-05, "loss": 0.6396, "step": 185585 }, { "epoch": 2.05, "learning_rate": 1.5750214072696134e-05, "loss": 0.6024, "step": 185590 }, { "epoch": 2.06, "learning_rate": 1.5749291345557625e-05, "loss": 0.5522, "step": 185595 }, { "epoch": 2.06, "learning_rate": 1.574836861841911e-05, "loss": 0.6156, "step": 185600 }, { "epoch": 2.06, "learning_rate": 1.5747445891280597e-05, "loss": 0.6201, "step": 185605 }, { "epoch": 2.06, "learning_rate": 1.5746523164142085e-05, "loss": 0.6071, "step": 185610 }, { "epoch": 2.06, "learning_rate": 1.5745600437003573e-05, "loss": 0.5784, "step": 185615 }, { "epoch": 2.06, "learning_rate": 1.574467770986506e-05, "loss": 0.595, "step": 185620 }, { "epoch": 2.06, "learning_rate": 1.574375498272655e-05, "loss": 0.6469, "step": 185625 }, { "epoch": 2.06, "learning_rate": 1.5742832255588037e-05, "loss": 0.5721, "step": 185630 }, { "epoch": 2.06, "learning_rate": 1.5741909528449524e-05, "loss": 0.6264, "step": 185635 }, { "epoch": 2.06, "learning_rate": 1.5740986801311012e-05, "loss": 0.6398, "step": 185640 }, { "epoch": 2.06, "learning_rate": 1.57400640741725e-05, "loss": 0.6334, "step": 185645 }, { "epoch": 2.06, "learning_rate": 1.5739141347033988e-05, "loss": 0.6065, "step": 185650 }, { "epoch": 2.06, "learning_rate": 1.5738218619895472e-05, "loss": 0.6436, "step": 185655 }, { "epoch": 2.06, "learning_rate": 1.573729589275696e-05, "loss": 0.574, "step": 185660 }, { "epoch": 2.06, "learning_rate": 1.5736373165618448e-05, "loss": 0.6192, "step": 185665 }, { "epoch": 2.06, "learning_rate": 1.5735450438479936e-05, "loss": 0.5834, "step": 185670 }, { "epoch": 2.06, "learning_rate": 1.5734527711341424e-05, "loss": 0.6687, "step": 185675 }, { "epoch": 2.06, "learning_rate": 1.5733604984202912e-05, "loss": 0.633, "step": 185680 }, { "epoch": 2.06, "learning_rate": 1.57326822570644e-05, "loss": 0.5868, "step": 185685 }, { "epoch": 2.06, "learning_rate": 1.5731759529925887e-05, "loss": 0.616, "step": 185690 }, { "epoch": 2.06, "learning_rate": 1.5730836802787375e-05, "loss": 0.6114, "step": 185695 }, { "epoch": 2.06, "learning_rate": 1.5729914075648863e-05, "loss": 0.5935, "step": 185700 }, { "epoch": 2.06, "learning_rate": 1.572899134851035e-05, "loss": 0.6185, "step": 185705 }, { "epoch": 2.06, "learning_rate": 1.572806862137184e-05, "loss": 0.5769, "step": 185710 }, { "epoch": 2.06, "learning_rate": 1.5727145894233327e-05, "loss": 0.5739, "step": 185715 }, { "epoch": 2.06, "learning_rate": 1.5726223167094814e-05, "loss": 0.6148, "step": 185720 }, { "epoch": 2.06, "learning_rate": 1.57253004399563e-05, "loss": 0.6356, "step": 185725 }, { "epoch": 2.06, "learning_rate": 1.5724377712817787e-05, "loss": 0.6681, "step": 185730 }, { "epoch": 2.06, "learning_rate": 1.5723454985679275e-05, "loss": 0.5827, "step": 185735 }, { "epoch": 2.06, "learning_rate": 1.5722532258540763e-05, "loss": 0.63, "step": 185740 }, { "epoch": 2.06, "learning_rate": 1.572160953140225e-05, "loss": 0.5992, "step": 185745 }, { "epoch": 2.06, "learning_rate": 1.5720686804263738e-05, "loss": 0.5195, "step": 185750 }, { "epoch": 2.06, "learning_rate": 1.5719764077125226e-05, "loss": 0.6009, "step": 185755 }, { "epoch": 2.06, "learning_rate": 1.5718841349986714e-05, "loss": 0.5528, "step": 185760 }, { "epoch": 2.06, "learning_rate": 1.5717918622848202e-05, "loss": 0.6178, "step": 185765 }, { "epoch": 2.06, "learning_rate": 1.571699589570969e-05, "loss": 0.6042, "step": 185770 }, { "epoch": 2.06, "learning_rate": 1.5716073168571177e-05, "loss": 0.6284, "step": 185775 }, { "epoch": 2.06, "learning_rate": 1.5715150441432662e-05, "loss": 0.5382, "step": 185780 }, { "epoch": 2.06, "learning_rate": 1.5714227714294153e-05, "loss": 0.6032, "step": 185785 }, { "epoch": 2.06, "learning_rate": 1.5713304987155638e-05, "loss": 0.5922, "step": 185790 }, { "epoch": 2.06, "learning_rate": 1.571238226001713e-05, "loss": 0.6007, "step": 185795 }, { "epoch": 2.06, "learning_rate": 1.5711459532878613e-05, "loss": 0.5949, "step": 185800 }, { "epoch": 2.06, "learning_rate": 1.5710536805740105e-05, "loss": 0.6193, "step": 185805 }, { "epoch": 2.06, "learning_rate": 1.570961407860159e-05, "loss": 0.6729, "step": 185810 }, { "epoch": 2.06, "learning_rate": 1.5708691351463077e-05, "loss": 0.6014, "step": 185815 }, { "epoch": 2.06, "learning_rate": 1.5707768624324565e-05, "loss": 0.607, "step": 185820 }, { "epoch": 2.06, "learning_rate": 1.5706845897186053e-05, "loss": 0.6273, "step": 185825 }, { "epoch": 2.06, "learning_rate": 1.570592317004754e-05, "loss": 0.5775, "step": 185830 }, { "epoch": 2.06, "learning_rate": 1.5705000442909025e-05, "loss": 0.6097, "step": 185835 }, { "epoch": 2.06, "learning_rate": 1.5704077715770516e-05, "loss": 0.5466, "step": 185840 }, { "epoch": 2.06, "learning_rate": 1.5703154988632e-05, "loss": 0.6108, "step": 185845 }, { "epoch": 2.06, "learning_rate": 1.5702232261493492e-05, "loss": 0.5925, "step": 185850 }, { "epoch": 2.06, "learning_rate": 1.5701309534354976e-05, "loss": 0.6368, "step": 185855 }, { "epoch": 2.06, "learning_rate": 1.5700386807216467e-05, "loss": 0.5441, "step": 185860 }, { "epoch": 2.06, "learning_rate": 1.5699464080077952e-05, "loss": 0.6129, "step": 185865 }, { "epoch": 2.06, "learning_rate": 1.5698541352939443e-05, "loss": 0.5994, "step": 185870 }, { "epoch": 2.06, "learning_rate": 1.5697618625800928e-05, "loss": 0.6107, "step": 185875 }, { "epoch": 2.06, "learning_rate": 1.5696695898662415e-05, "loss": 0.5613, "step": 185880 }, { "epoch": 2.06, "learning_rate": 1.5695773171523903e-05, "loss": 0.6309, "step": 185885 }, { "epoch": 2.06, "learning_rate": 1.5694850444385388e-05, "loss": 0.5988, "step": 185890 }, { "epoch": 2.06, "learning_rate": 1.569392771724688e-05, "loss": 0.5382, "step": 185895 }, { "epoch": 2.06, "learning_rate": 1.5693004990108363e-05, "loss": 0.5871, "step": 185900 }, { "epoch": 2.06, "learning_rate": 1.5692082262969855e-05, "loss": 0.618, "step": 185905 }, { "epoch": 2.06, "learning_rate": 1.569115953583134e-05, "loss": 0.62, "step": 185910 }, { "epoch": 2.06, "learning_rate": 1.569023680869283e-05, "loss": 0.5356, "step": 185915 }, { "epoch": 2.06, "learning_rate": 1.5689314081554315e-05, "loss": 0.5729, "step": 185920 }, { "epoch": 2.06, "learning_rate": 1.5688391354415806e-05, "loss": 0.5695, "step": 185925 }, { "epoch": 2.06, "learning_rate": 1.568746862727729e-05, "loss": 0.6563, "step": 185930 }, { "epoch": 2.06, "learning_rate": 1.568654590013878e-05, "loss": 0.5937, "step": 185935 }, { "epoch": 2.06, "learning_rate": 1.5685623173000266e-05, "loss": 0.6572, "step": 185940 }, { "epoch": 2.06, "learning_rate": 1.5684700445861754e-05, "loss": 0.6106, "step": 185945 }, { "epoch": 2.06, "learning_rate": 1.5683777718723242e-05, "loss": 0.5829, "step": 185950 }, { "epoch": 2.06, "learning_rate": 1.568285499158473e-05, "loss": 0.6425, "step": 185955 }, { "epoch": 2.06, "learning_rate": 1.5681932264446218e-05, "loss": 0.5864, "step": 185960 }, { "epoch": 2.06, "learning_rate": 1.5681009537307702e-05, "loss": 0.6732, "step": 185965 }, { "epoch": 2.06, "learning_rate": 1.5680086810169193e-05, "loss": 0.6643, "step": 185970 }, { "epoch": 2.06, "learning_rate": 1.5679164083030678e-05, "loss": 0.6084, "step": 185975 }, { "epoch": 2.06, "learning_rate": 1.567824135589217e-05, "loss": 0.5446, "step": 185980 }, { "epoch": 2.06, "learning_rate": 1.5677318628753654e-05, "loss": 0.5911, "step": 185985 }, { "epoch": 2.06, "learning_rate": 1.567639590161514e-05, "loss": 0.6388, "step": 185990 }, { "epoch": 2.06, "learning_rate": 1.567547317447663e-05, "loss": 0.5993, "step": 185995 }, { "epoch": 2.06, "learning_rate": 1.5674550447338117e-05, "loss": 0.5892, "step": 186000 }, { "epoch": 2.06, "eval_loss": 0.5979287028312683, "eval_runtime": 69.2233, "eval_samples_per_second": 28.892, "eval_steps_per_second": 14.446, "step": 186000 }, { "epoch": 2.06, "learning_rate": 1.5673627720199605e-05, "loss": 0.6199, "step": 186005 }, { "epoch": 2.06, "learning_rate": 1.5672704993061093e-05, "loss": 0.6294, "step": 186010 }, { "epoch": 2.06, "learning_rate": 1.567178226592258e-05, "loss": 0.6059, "step": 186015 }, { "epoch": 2.06, "learning_rate": 1.567085953878407e-05, "loss": 0.6241, "step": 186020 }, { "epoch": 2.06, "learning_rate": 1.5669936811645556e-05, "loss": 0.6181, "step": 186025 }, { "epoch": 2.06, "learning_rate": 1.5669014084507044e-05, "loss": 0.643, "step": 186030 }, { "epoch": 2.06, "learning_rate": 1.5668091357368532e-05, "loss": 0.6007, "step": 186035 }, { "epoch": 2.06, "learning_rate": 1.5667168630230016e-05, "loss": 0.5987, "step": 186040 }, { "epoch": 2.06, "learning_rate": 1.5666245903091504e-05, "loss": 0.5873, "step": 186045 }, { "epoch": 2.06, "learning_rate": 1.5665323175952992e-05, "loss": 0.6268, "step": 186050 }, { "epoch": 2.06, "learning_rate": 1.566440044881448e-05, "loss": 0.6013, "step": 186055 }, { "epoch": 2.06, "learning_rate": 1.5663477721675968e-05, "loss": 0.5947, "step": 186060 }, { "epoch": 2.06, "learning_rate": 1.5662554994537456e-05, "loss": 0.5848, "step": 186065 }, { "epoch": 2.06, "learning_rate": 1.5661632267398944e-05, "loss": 0.6563, "step": 186070 }, { "epoch": 2.06, "learning_rate": 1.566070954026043e-05, "loss": 0.6454, "step": 186075 }, { "epoch": 2.06, "learning_rate": 1.565978681312192e-05, "loss": 0.6061, "step": 186080 }, { "epoch": 2.06, "learning_rate": 1.5658864085983407e-05, "loss": 0.5859, "step": 186085 }, { "epoch": 2.06, "learning_rate": 1.5657941358844895e-05, "loss": 0.5816, "step": 186090 }, { "epoch": 2.06, "learning_rate": 1.5657018631706383e-05, "loss": 0.6192, "step": 186095 }, { "epoch": 2.06, "learning_rate": 1.565609590456787e-05, "loss": 0.607, "step": 186100 }, { "epoch": 2.06, "learning_rate": 1.565517317742936e-05, "loss": 0.6143, "step": 186105 }, { "epoch": 2.06, "learning_rate": 1.5654250450290843e-05, "loss": 0.5992, "step": 186110 }, { "epoch": 2.06, "learning_rate": 1.565332772315233e-05, "loss": 0.5334, "step": 186115 }, { "epoch": 2.06, "learning_rate": 1.565240499601382e-05, "loss": 0.5449, "step": 186120 }, { "epoch": 2.06, "learning_rate": 1.5651482268875307e-05, "loss": 0.6079, "step": 186125 }, { "epoch": 2.06, "learning_rate": 1.5650559541736794e-05, "loss": 0.578, "step": 186130 }, { "epoch": 2.06, "learning_rate": 1.5649636814598282e-05, "loss": 0.5936, "step": 186135 }, { "epoch": 2.06, "learning_rate": 1.564871408745977e-05, "loss": 0.5898, "step": 186140 }, { "epoch": 2.06, "learning_rate": 1.5647791360321258e-05, "loss": 0.6807, "step": 186145 }, { "epoch": 2.06, "learning_rate": 1.5646868633182746e-05, "loss": 0.6452, "step": 186150 }, { "epoch": 2.06, "learning_rate": 1.5645945906044234e-05, "loss": 0.6029, "step": 186155 }, { "epoch": 2.06, "learning_rate": 1.564502317890572e-05, "loss": 0.6657, "step": 186160 }, { "epoch": 2.06, "learning_rate": 1.5644100451767206e-05, "loss": 0.6243, "step": 186165 }, { "epoch": 2.06, "learning_rate": 1.5643177724628697e-05, "loss": 0.6148, "step": 186170 }, { "epoch": 2.06, "learning_rate": 1.564225499749018e-05, "loss": 0.5708, "step": 186175 }, { "epoch": 2.06, "learning_rate": 1.5641332270351673e-05, "loss": 0.6283, "step": 186180 }, { "epoch": 2.06, "learning_rate": 1.5640409543213157e-05, "loss": 0.6189, "step": 186185 }, { "epoch": 2.06, "learning_rate": 1.5639486816074645e-05, "loss": 0.5987, "step": 186190 }, { "epoch": 2.06, "learning_rate": 1.5638564088936133e-05, "loss": 0.6037, "step": 186195 }, { "epoch": 2.06, "learning_rate": 1.563764136179762e-05, "loss": 0.5685, "step": 186200 }, { "epoch": 2.06, "learning_rate": 1.563671863465911e-05, "loss": 0.6245, "step": 186205 }, { "epoch": 2.06, "learning_rate": 1.5635795907520597e-05, "loss": 0.5428, "step": 186210 }, { "epoch": 2.06, "learning_rate": 1.5634873180382084e-05, "loss": 0.6365, "step": 186215 }, { "epoch": 2.06, "learning_rate": 1.563395045324357e-05, "loss": 0.632, "step": 186220 }, { "epoch": 2.06, "learning_rate": 1.563302772610506e-05, "loss": 0.5585, "step": 186225 }, { "epoch": 2.06, "learning_rate": 1.5632104998966545e-05, "loss": 0.6072, "step": 186230 }, { "epoch": 2.06, "learning_rate": 1.5631182271828036e-05, "loss": 0.6054, "step": 186235 }, { "epoch": 2.06, "learning_rate": 1.563025954468952e-05, "loss": 0.5595, "step": 186240 }, { "epoch": 2.06, "learning_rate": 1.562933681755101e-05, "loss": 0.6083, "step": 186245 }, { "epoch": 2.06, "learning_rate": 1.5628414090412496e-05, "loss": 0.5727, "step": 186250 }, { "epoch": 2.06, "learning_rate": 1.5627491363273987e-05, "loss": 0.5816, "step": 186255 }, { "epoch": 2.06, "learning_rate": 1.562656863613547e-05, "loss": 0.6003, "step": 186260 }, { "epoch": 2.06, "learning_rate": 1.562564590899696e-05, "loss": 0.5703, "step": 186265 }, { "epoch": 2.06, "learning_rate": 1.5624723181858447e-05, "loss": 0.5977, "step": 186270 }, { "epoch": 2.06, "learning_rate": 1.5623800454719932e-05, "loss": 0.6331, "step": 186275 }, { "epoch": 2.06, "learning_rate": 1.5622877727581423e-05, "loss": 0.5606, "step": 186280 }, { "epoch": 2.06, "learning_rate": 1.5621955000442908e-05, "loss": 0.5682, "step": 186285 }, { "epoch": 2.06, "learning_rate": 1.56210322733044e-05, "loss": 0.5761, "step": 186290 }, { "epoch": 2.06, "learning_rate": 1.5620109546165883e-05, "loss": 0.7012, "step": 186295 }, { "epoch": 2.06, "learning_rate": 1.5619186819027374e-05, "loss": 0.6001, "step": 186300 }, { "epoch": 2.06, "learning_rate": 1.561826409188886e-05, "loss": 0.5643, "step": 186305 }, { "epoch": 2.06, "learning_rate": 1.561734136475035e-05, "loss": 0.5711, "step": 186310 }, { "epoch": 2.06, "learning_rate": 1.5616418637611835e-05, "loss": 0.563, "step": 186315 }, { "epoch": 2.06, "learning_rate": 1.5615495910473322e-05, "loss": 0.602, "step": 186320 }, { "epoch": 2.06, "learning_rate": 1.561457318333481e-05, "loss": 0.6291, "step": 186325 }, { "epoch": 2.06, "learning_rate": 1.5613650456196298e-05, "loss": 0.6264, "step": 186330 }, { "epoch": 2.06, "learning_rate": 1.5612727729057786e-05, "loss": 0.5754, "step": 186335 }, { "epoch": 2.06, "learning_rate": 1.561180500191927e-05, "loss": 0.5749, "step": 186340 }, { "epoch": 2.06, "learning_rate": 1.5610882274780762e-05, "loss": 0.6137, "step": 186345 }, { "epoch": 2.06, "learning_rate": 1.5609959547642246e-05, "loss": 0.5728, "step": 186350 }, { "epoch": 2.06, "learning_rate": 1.5609036820503737e-05, "loss": 0.6305, "step": 186355 }, { "epoch": 2.06, "learning_rate": 1.5608114093365222e-05, "loss": 0.5991, "step": 186360 }, { "epoch": 2.06, "learning_rate": 1.5607191366226713e-05, "loss": 0.584, "step": 186365 }, { "epoch": 2.06, "learning_rate": 1.5606268639088198e-05, "loss": 0.6081, "step": 186370 }, { "epoch": 2.06, "learning_rate": 1.5605345911949685e-05, "loss": 0.5593, "step": 186375 }, { "epoch": 2.06, "learning_rate": 1.5604423184811173e-05, "loss": 0.606, "step": 186380 }, { "epoch": 2.06, "learning_rate": 1.560350045767266e-05, "loss": 0.6282, "step": 186385 }, { "epoch": 2.06, "learning_rate": 1.560257773053415e-05, "loss": 0.6052, "step": 186390 }, { "epoch": 2.06, "learning_rate": 1.5601655003395637e-05, "loss": 0.649, "step": 186395 }, { "epoch": 2.06, "learning_rate": 1.5600732276257125e-05, "loss": 0.6784, "step": 186400 }, { "epoch": 2.06, "learning_rate": 1.5599809549118612e-05, "loss": 0.6296, "step": 186405 }, { "epoch": 2.06, "learning_rate": 1.55988868219801e-05, "loss": 0.5758, "step": 186410 }, { "epoch": 2.06, "learning_rate": 1.5597964094841585e-05, "loss": 0.6144, "step": 186415 }, { "epoch": 2.06, "learning_rate": 1.5597041367703076e-05, "loss": 0.606, "step": 186420 }, { "epoch": 2.06, "learning_rate": 1.559611864056456e-05, "loss": 0.6492, "step": 186425 }, { "epoch": 2.06, "learning_rate": 1.559519591342605e-05, "loss": 0.6103, "step": 186430 }, { "epoch": 2.06, "learning_rate": 1.5594273186287536e-05, "loss": 0.6228, "step": 186435 }, { "epoch": 2.06, "learning_rate": 1.5593350459149024e-05, "loss": 0.6241, "step": 186440 }, { "epoch": 2.06, "learning_rate": 1.5592427732010512e-05, "loss": 0.5925, "step": 186445 }, { "epoch": 2.06, "learning_rate": 1.5591505004872e-05, "loss": 0.6282, "step": 186450 }, { "epoch": 2.06, "learning_rate": 1.5590582277733488e-05, "loss": 0.5895, "step": 186455 }, { "epoch": 2.06, "learning_rate": 1.5589659550594975e-05, "loss": 0.5693, "step": 186460 }, { "epoch": 2.06, "learning_rate": 1.5588736823456463e-05, "loss": 0.6403, "step": 186465 }, { "epoch": 2.06, "learning_rate": 1.558781409631795e-05, "loss": 0.597, "step": 186470 }, { "epoch": 2.06, "learning_rate": 1.558689136917944e-05, "loss": 0.5793, "step": 186475 }, { "epoch": 2.06, "learning_rate": 1.5585968642040927e-05, "loss": 0.5637, "step": 186480 }, { "epoch": 2.06, "learning_rate": 1.5585045914902415e-05, "loss": 0.592, "step": 186485 }, { "epoch": 2.06, "learning_rate": 1.55841231877639e-05, "loss": 0.5596, "step": 186490 }, { "epoch": 2.07, "learning_rate": 1.5583200460625387e-05, "loss": 0.6254, "step": 186495 }, { "epoch": 2.07, "learning_rate": 1.5582277733486875e-05, "loss": 0.5914, "step": 186500 }, { "epoch": 2.07, "learning_rate": 1.5581355006348363e-05, "loss": 0.6756, "step": 186505 }, { "epoch": 2.07, "learning_rate": 1.558043227920985e-05, "loss": 0.6153, "step": 186510 }, { "epoch": 2.07, "learning_rate": 1.557950955207134e-05, "loss": 0.6099, "step": 186515 }, { "epoch": 2.07, "learning_rate": 1.5578586824932826e-05, "loss": 0.6218, "step": 186520 }, { "epoch": 2.07, "learning_rate": 1.5577664097794314e-05, "loss": 0.6292, "step": 186525 }, { "epoch": 2.07, "learning_rate": 1.5576741370655802e-05, "loss": 0.6041, "step": 186530 }, { "epoch": 2.07, "learning_rate": 1.557581864351729e-05, "loss": 0.546, "step": 186535 }, { "epoch": 2.07, "learning_rate": 1.5574895916378778e-05, "loss": 0.6421, "step": 186540 }, { "epoch": 2.07, "learning_rate": 1.5573973189240265e-05, "loss": 0.5686, "step": 186545 }, { "epoch": 2.07, "learning_rate": 1.557305046210175e-05, "loss": 0.5686, "step": 186550 }, { "epoch": 2.07, "learning_rate": 1.557212773496324e-05, "loss": 0.6111, "step": 186555 }, { "epoch": 2.07, "learning_rate": 1.5571205007824726e-05, "loss": 0.6249, "step": 186560 }, { "epoch": 2.07, "learning_rate": 1.5570282280686213e-05, "loss": 0.5874, "step": 186565 }, { "epoch": 2.07, "learning_rate": 1.55693595535477e-05, "loss": 0.5964, "step": 186570 }, { "epoch": 2.07, "learning_rate": 1.556843682640919e-05, "loss": 0.6162, "step": 186575 }, { "epoch": 2.07, "learning_rate": 1.5567514099270677e-05, "loss": 0.5862, "step": 186580 }, { "epoch": 2.07, "learning_rate": 1.5566591372132165e-05, "loss": 0.5959, "step": 186585 }, { "epoch": 2.07, "learning_rate": 1.5565668644993653e-05, "loss": 0.584, "step": 186590 }, { "epoch": 2.07, "learning_rate": 1.556474591785514e-05, "loss": 0.6253, "step": 186595 }, { "epoch": 2.07, "learning_rate": 1.556382319071663e-05, "loss": 0.6639, "step": 186600 }, { "epoch": 2.07, "learning_rate": 1.5562900463578113e-05, "loss": 0.6011, "step": 186605 }, { "epoch": 2.07, "learning_rate": 1.5561977736439604e-05, "loss": 0.5688, "step": 186610 }, { "epoch": 2.07, "learning_rate": 1.556105500930109e-05, "loss": 0.5639, "step": 186615 }, { "epoch": 2.07, "learning_rate": 1.556013228216258e-05, "loss": 0.6035, "step": 186620 }, { "epoch": 2.07, "learning_rate": 1.5559209555024064e-05, "loss": 0.6359, "step": 186625 }, { "epoch": 2.07, "learning_rate": 1.5558286827885556e-05, "loss": 0.5876, "step": 186630 }, { "epoch": 2.07, "learning_rate": 1.555736410074704e-05, "loss": 0.5855, "step": 186635 }, { "epoch": 2.07, "learning_rate": 1.555644137360853e-05, "loss": 0.6249, "step": 186640 }, { "epoch": 2.07, "learning_rate": 1.5555518646470016e-05, "loss": 0.6624, "step": 186645 }, { "epoch": 2.07, "learning_rate": 1.5554595919331504e-05, "loss": 0.6242, "step": 186650 }, { "epoch": 2.07, "learning_rate": 1.555367319219299e-05, "loss": 0.6323, "step": 186655 }, { "epoch": 2.07, "learning_rate": 1.5552750465054476e-05, "loss": 0.6003, "step": 186660 }, { "epoch": 2.07, "learning_rate": 1.5551827737915967e-05, "loss": 0.5744, "step": 186665 }, { "epoch": 2.07, "learning_rate": 1.555090501077745e-05, "loss": 0.5778, "step": 186670 }, { "epoch": 2.07, "learning_rate": 1.5549982283638943e-05, "loss": 0.5947, "step": 186675 }, { "epoch": 2.07, "learning_rate": 1.5549059556500427e-05, "loss": 0.5845, "step": 186680 }, { "epoch": 2.07, "learning_rate": 1.554813682936192e-05, "loss": 0.568, "step": 186685 }, { "epoch": 2.07, "learning_rate": 1.5547214102223403e-05, "loss": 0.6007, "step": 186690 }, { "epoch": 2.07, "learning_rate": 1.5546291375084894e-05, "loss": 0.6115, "step": 186695 }, { "epoch": 2.07, "learning_rate": 1.554536864794638e-05, "loss": 0.587, "step": 186700 }, { "epoch": 2.07, "learning_rate": 1.5544445920807866e-05, "loss": 0.5922, "step": 186705 }, { "epoch": 2.07, "learning_rate": 1.5543523193669354e-05, "loss": 0.63, "step": 186710 }, { "epoch": 2.07, "learning_rate": 1.5542600466530842e-05, "loss": 0.6073, "step": 186715 }, { "epoch": 2.07, "learning_rate": 1.554167773939233e-05, "loss": 0.546, "step": 186720 }, { "epoch": 2.07, "learning_rate": 1.5540755012253814e-05, "loss": 0.6063, "step": 186725 }, { "epoch": 2.07, "learning_rate": 1.5539832285115306e-05, "loss": 0.5925, "step": 186730 }, { "epoch": 2.07, "learning_rate": 1.553890955797679e-05, "loss": 0.6204, "step": 186735 }, { "epoch": 2.07, "learning_rate": 1.553798683083828e-05, "loss": 0.6368, "step": 186740 }, { "epoch": 2.07, "learning_rate": 1.5537064103699766e-05, "loss": 0.6123, "step": 186745 }, { "epoch": 2.07, "learning_rate": 1.5536141376561257e-05, "loss": 0.6047, "step": 186750 }, { "epoch": 2.07, "learning_rate": 1.553521864942274e-05, "loss": 0.5246, "step": 186755 }, { "epoch": 2.07, "learning_rate": 1.553429592228423e-05, "loss": 0.5831, "step": 186760 }, { "epoch": 2.07, "learning_rate": 1.5533373195145717e-05, "loss": 0.5931, "step": 186765 }, { "epoch": 2.07, "learning_rate": 1.5532450468007205e-05, "loss": 0.6273, "step": 186770 }, { "epoch": 2.07, "learning_rate": 1.5531527740868693e-05, "loss": 0.5863, "step": 186775 }, { "epoch": 2.07, "learning_rate": 1.553060501373018e-05, "loss": 0.6086, "step": 186780 }, { "epoch": 2.07, "learning_rate": 1.552968228659167e-05, "loss": 0.5973, "step": 186785 }, { "epoch": 2.07, "learning_rate": 1.5528759559453157e-05, "loss": 0.6003, "step": 186790 }, { "epoch": 2.07, "learning_rate": 1.5527836832314644e-05, "loss": 0.6461, "step": 186795 }, { "epoch": 2.07, "learning_rate": 1.552691410517613e-05, "loss": 0.6021, "step": 186800 }, { "epoch": 2.07, "learning_rate": 1.552599137803762e-05, "loss": 0.6108, "step": 186805 }, { "epoch": 2.07, "learning_rate": 1.5525068650899105e-05, "loss": 0.5877, "step": 186810 }, { "epoch": 2.07, "learning_rate": 1.5524145923760596e-05, "loss": 0.5876, "step": 186815 }, { "epoch": 2.07, "learning_rate": 1.552322319662208e-05, "loss": 0.647, "step": 186820 }, { "epoch": 2.07, "learning_rate": 1.5522300469483568e-05, "loss": 0.6244, "step": 186825 }, { "epoch": 2.07, "learning_rate": 1.5521377742345056e-05, "loss": 0.6538, "step": 186830 }, { "epoch": 2.07, "learning_rate": 1.5520455015206544e-05, "loss": 0.5945, "step": 186835 }, { "epoch": 2.07, "learning_rate": 1.551953228806803e-05, "loss": 0.6008, "step": 186840 }, { "epoch": 2.07, "learning_rate": 1.551860956092952e-05, "loss": 0.5965, "step": 186845 }, { "epoch": 2.07, "learning_rate": 1.5517686833791007e-05, "loss": 0.5348, "step": 186850 }, { "epoch": 2.07, "learning_rate": 1.5516764106652495e-05, "loss": 0.5697, "step": 186855 }, { "epoch": 2.07, "learning_rate": 1.5515841379513983e-05, "loss": 0.5992, "step": 186860 }, { "epoch": 2.07, "learning_rate": 1.551491865237547e-05, "loss": 0.6427, "step": 186865 }, { "epoch": 2.07, "learning_rate": 1.551399592523696e-05, "loss": 0.6326, "step": 186870 }, { "epoch": 2.07, "learning_rate": 1.5513073198098443e-05, "loss": 0.5781, "step": 186875 }, { "epoch": 2.07, "learning_rate": 1.551215047095993e-05, "loss": 0.6185, "step": 186880 }, { "epoch": 2.07, "learning_rate": 1.551122774382142e-05, "loss": 0.62, "step": 186885 }, { "epoch": 2.07, "learning_rate": 1.5510305016682907e-05, "loss": 0.5799, "step": 186890 }, { "epoch": 2.07, "learning_rate": 1.5509382289544395e-05, "loss": 0.628, "step": 186895 }, { "epoch": 2.07, "learning_rate": 1.5508459562405882e-05, "loss": 0.6157, "step": 186900 }, { "epoch": 2.07, "learning_rate": 1.550753683526737e-05, "loss": 0.6425, "step": 186905 }, { "epoch": 2.07, "learning_rate": 1.5506614108128858e-05, "loss": 0.613, "step": 186910 }, { "epoch": 2.07, "learning_rate": 1.5505691380990346e-05, "loss": 0.6032, "step": 186915 }, { "epoch": 2.07, "learning_rate": 1.5504768653851834e-05, "loss": 0.6348, "step": 186920 }, { "epoch": 2.07, "learning_rate": 1.550384592671332e-05, "loss": 0.5821, "step": 186925 }, { "epoch": 2.07, "learning_rate": 1.550292319957481e-05, "loss": 0.5862, "step": 186930 }, { "epoch": 2.07, "learning_rate": 1.5502000472436294e-05, "loss": 0.6484, "step": 186935 }, { "epoch": 2.07, "learning_rate": 1.5501077745297785e-05, "loss": 0.6061, "step": 186940 }, { "epoch": 2.07, "learning_rate": 1.550015501815927e-05, "loss": 0.602, "step": 186945 }, { "epoch": 2.07, "learning_rate": 1.5499232291020758e-05, "loss": 0.6401, "step": 186950 }, { "epoch": 2.07, "learning_rate": 1.5498309563882245e-05, "loss": 0.6148, "step": 186955 }, { "epoch": 2.07, "learning_rate": 1.5497386836743733e-05, "loss": 0.5621, "step": 186960 }, { "epoch": 2.07, "learning_rate": 1.549646410960522e-05, "loss": 0.6076, "step": 186965 }, { "epoch": 2.07, "learning_rate": 1.549554138246671e-05, "loss": 0.5961, "step": 186970 }, { "epoch": 2.07, "learning_rate": 1.5494618655328197e-05, "loss": 0.5663, "step": 186975 }, { "epoch": 2.07, "learning_rate": 1.5493695928189685e-05, "loss": 0.6167, "step": 186980 }, { "epoch": 2.07, "learning_rate": 1.5492773201051172e-05, "loss": 0.5765, "step": 186985 }, { "epoch": 2.07, "learning_rate": 1.5491850473912657e-05, "loss": 0.6478, "step": 186990 }, { "epoch": 2.07, "learning_rate": 1.5490927746774148e-05, "loss": 0.5377, "step": 186995 }, { "epoch": 2.07, "learning_rate": 1.5490005019635633e-05, "loss": 0.5933, "step": 187000 }, { "epoch": 2.07, "eval_loss": 0.5709390044212341, "eval_runtime": 69.1592, "eval_samples_per_second": 28.919, "eval_steps_per_second": 14.459, "step": 187000 }, { "epoch": 2.07, "learning_rate": 1.5489082292497124e-05, "loss": 0.5969, "step": 187005 }, { "epoch": 2.07, "learning_rate": 1.5488159565358608e-05, "loss": 0.5838, "step": 187010 }, { "epoch": 2.07, "learning_rate": 1.54872368382201e-05, "loss": 0.6286, "step": 187015 }, { "epoch": 2.07, "learning_rate": 1.5486314111081584e-05, "loss": 0.5927, "step": 187020 }, { "epoch": 2.07, "learning_rate": 1.5485391383943072e-05, "loss": 0.6459, "step": 187025 }, { "epoch": 2.07, "learning_rate": 1.548446865680456e-05, "loss": 0.5781, "step": 187030 }, { "epoch": 2.07, "learning_rate": 1.5483545929666048e-05, "loss": 0.5328, "step": 187035 }, { "epoch": 2.07, "learning_rate": 1.5482623202527535e-05, "loss": 0.5693, "step": 187040 }, { "epoch": 2.07, "learning_rate": 1.5481700475389023e-05, "loss": 0.6417, "step": 187045 }, { "epoch": 2.07, "learning_rate": 1.548077774825051e-05, "loss": 0.5807, "step": 187050 }, { "epoch": 2.07, "learning_rate": 1.5479855021111996e-05, "loss": 0.5847, "step": 187055 }, { "epoch": 2.07, "learning_rate": 1.5478932293973487e-05, "loss": 0.5865, "step": 187060 }, { "epoch": 2.07, "learning_rate": 1.547800956683497e-05, "loss": 0.6088, "step": 187065 }, { "epoch": 2.07, "learning_rate": 1.5477086839696462e-05, "loss": 0.6226, "step": 187070 }, { "epoch": 2.07, "learning_rate": 1.5476164112557947e-05, "loss": 0.6328, "step": 187075 }, { "epoch": 2.07, "learning_rate": 1.5475241385419438e-05, "loss": 0.6224, "step": 187080 }, { "epoch": 2.07, "learning_rate": 1.5474318658280923e-05, "loss": 0.6734, "step": 187085 }, { "epoch": 2.07, "learning_rate": 1.547339593114241e-05, "loss": 0.5933, "step": 187090 }, { "epoch": 2.07, "learning_rate": 1.54724732040039e-05, "loss": 0.609, "step": 187095 }, { "epoch": 2.07, "learning_rate": 1.5471550476865386e-05, "loss": 0.6034, "step": 187100 }, { "epoch": 2.07, "learning_rate": 1.5470627749726874e-05, "loss": 0.6156, "step": 187105 }, { "epoch": 2.07, "learning_rate": 1.546970502258836e-05, "loss": 0.5603, "step": 187110 }, { "epoch": 2.07, "learning_rate": 1.546878229544985e-05, "loss": 0.5798, "step": 187115 }, { "epoch": 2.07, "learning_rate": 1.5467859568311334e-05, "loss": 0.5519, "step": 187120 }, { "epoch": 2.07, "learning_rate": 1.5466936841172825e-05, "loss": 0.6142, "step": 187125 }, { "epoch": 2.07, "learning_rate": 1.546601411403431e-05, "loss": 0.5783, "step": 187130 }, { "epoch": 2.07, "learning_rate": 1.54650913868958e-05, "loss": 0.578, "step": 187135 }, { "epoch": 2.07, "learning_rate": 1.5464168659757286e-05, "loss": 0.6473, "step": 187140 }, { "epoch": 2.07, "learning_rate": 1.5463245932618773e-05, "loss": 0.6478, "step": 187145 }, { "epoch": 2.07, "learning_rate": 1.546232320548026e-05, "loss": 0.5996, "step": 187150 }, { "epoch": 2.07, "learning_rate": 1.546140047834175e-05, "loss": 0.6123, "step": 187155 }, { "epoch": 2.07, "learning_rate": 1.5460477751203237e-05, "loss": 0.6195, "step": 187160 }, { "epoch": 2.07, "learning_rate": 1.5459555024064725e-05, "loss": 0.6138, "step": 187165 }, { "epoch": 2.07, "learning_rate": 1.5458632296926213e-05, "loss": 0.6123, "step": 187170 }, { "epoch": 2.07, "learning_rate": 1.5457709569787697e-05, "loss": 0.606, "step": 187175 }, { "epoch": 2.07, "learning_rate": 1.545678684264919e-05, "loss": 0.6407, "step": 187180 }, { "epoch": 2.07, "learning_rate": 1.5455864115510673e-05, "loss": 0.5625, "step": 187185 }, { "epoch": 2.07, "learning_rate": 1.5454941388372164e-05, "loss": 0.5651, "step": 187190 }, { "epoch": 2.07, "learning_rate": 1.545401866123365e-05, "loss": 0.6183, "step": 187195 }, { "epoch": 2.07, "learning_rate": 1.545309593409514e-05, "loss": 0.5864, "step": 187200 }, { "epoch": 2.07, "learning_rate": 1.5452173206956624e-05, "loss": 0.6255, "step": 187205 }, { "epoch": 2.07, "learning_rate": 1.5451250479818112e-05, "loss": 0.5771, "step": 187210 }, { "epoch": 2.07, "learning_rate": 1.54503277526796e-05, "loss": 0.6151, "step": 187215 }, { "epoch": 2.07, "learning_rate": 1.5449405025541088e-05, "loss": 0.5859, "step": 187220 }, { "epoch": 2.07, "learning_rate": 1.5448482298402576e-05, "loss": 0.624, "step": 187225 }, { "epoch": 2.07, "learning_rate": 1.5447559571264063e-05, "loss": 0.5915, "step": 187230 }, { "epoch": 2.07, "learning_rate": 1.544663684412555e-05, "loss": 0.6223, "step": 187235 }, { "epoch": 2.07, "learning_rate": 1.544571411698704e-05, "loss": 0.6, "step": 187240 }, { "epoch": 2.07, "learning_rate": 1.5444791389848527e-05, "loss": 0.5805, "step": 187245 }, { "epoch": 2.07, "learning_rate": 1.544386866271001e-05, "loss": 0.5858, "step": 187250 }, { "epoch": 2.07, "learning_rate": 1.5442945935571503e-05, "loss": 0.603, "step": 187255 }, { "epoch": 2.07, "learning_rate": 1.5442023208432987e-05, "loss": 0.619, "step": 187260 }, { "epoch": 2.07, "learning_rate": 1.5441100481294475e-05, "loss": 0.6315, "step": 187265 }, { "epoch": 2.07, "learning_rate": 1.5440177754155963e-05, "loss": 0.6249, "step": 187270 }, { "epoch": 2.07, "learning_rate": 1.543925502701745e-05, "loss": 0.6182, "step": 187275 }, { "epoch": 2.07, "learning_rate": 1.543833229987894e-05, "loss": 0.5848, "step": 187280 }, { "epoch": 2.07, "learning_rate": 1.5437409572740426e-05, "loss": 0.608, "step": 187285 }, { "epoch": 2.07, "learning_rate": 1.5436486845601914e-05, "loss": 0.6279, "step": 187290 }, { "epoch": 2.07, "learning_rate": 1.5435564118463402e-05, "loss": 0.6158, "step": 187295 }, { "epoch": 2.07, "learning_rate": 1.543464139132489e-05, "loss": 0.5573, "step": 187300 }, { "epoch": 2.07, "learning_rate": 1.5433718664186378e-05, "loss": 0.6481, "step": 187305 }, { "epoch": 2.07, "learning_rate": 1.5432795937047866e-05, "loss": 0.6029, "step": 187310 }, { "epoch": 2.07, "learning_rate": 1.5431873209909354e-05, "loss": 0.6208, "step": 187315 }, { "epoch": 2.07, "learning_rate": 1.5430950482770838e-05, "loss": 0.5759, "step": 187320 }, { "epoch": 2.07, "learning_rate": 1.5430027755632326e-05, "loss": 0.5442, "step": 187325 }, { "epoch": 2.07, "learning_rate": 1.5429105028493814e-05, "loss": 0.6031, "step": 187330 }, { "epoch": 2.07, "learning_rate": 1.54281823013553e-05, "loss": 0.652, "step": 187335 }, { "epoch": 2.07, "learning_rate": 1.542725957421679e-05, "loss": 0.5806, "step": 187340 }, { "epoch": 2.07, "learning_rate": 1.5426336847078277e-05, "loss": 0.661, "step": 187345 }, { "epoch": 2.07, "learning_rate": 1.5425414119939765e-05, "loss": 0.6029, "step": 187350 }, { "epoch": 2.07, "learning_rate": 1.5424491392801253e-05, "loss": 0.5959, "step": 187355 }, { "epoch": 2.07, "learning_rate": 1.542356866566274e-05, "loss": 0.6237, "step": 187360 }, { "epoch": 2.07, "learning_rate": 1.542264593852423e-05, "loss": 0.5337, "step": 187365 }, { "epoch": 2.07, "learning_rate": 1.5421723211385716e-05, "loss": 0.6032, "step": 187370 }, { "epoch": 2.07, "learning_rate": 1.54208004842472e-05, "loss": 0.6251, "step": 187375 }, { "epoch": 2.07, "learning_rate": 1.5419877757108692e-05, "loss": 0.5551, "step": 187380 }, { "epoch": 2.07, "learning_rate": 1.5418955029970177e-05, "loss": 0.5593, "step": 187385 }, { "epoch": 2.07, "learning_rate": 1.5418032302831668e-05, "loss": 0.597, "step": 187390 }, { "epoch": 2.07, "learning_rate": 1.5417109575693152e-05, "loss": 0.5955, "step": 187395 }, { "epoch": 2.08, "learning_rate": 1.541618684855464e-05, "loss": 0.6515, "step": 187400 }, { "epoch": 2.08, "learning_rate": 1.5415264121416128e-05, "loss": 0.5808, "step": 187405 }, { "epoch": 2.08, "learning_rate": 1.5414341394277616e-05, "loss": 0.5913, "step": 187410 }, { "epoch": 2.08, "learning_rate": 1.5413418667139104e-05, "loss": 0.6175, "step": 187415 }, { "epoch": 2.08, "learning_rate": 1.541249594000059e-05, "loss": 0.623, "step": 187420 }, { "epoch": 2.08, "learning_rate": 1.541157321286208e-05, "loss": 0.6942, "step": 187425 }, { "epoch": 2.08, "learning_rate": 1.5410650485723567e-05, "loss": 0.6421, "step": 187430 }, { "epoch": 2.08, "learning_rate": 1.5409727758585055e-05, "loss": 0.6138, "step": 187435 }, { "epoch": 2.08, "learning_rate": 1.540880503144654e-05, "loss": 0.5837, "step": 187440 }, { "epoch": 2.08, "learning_rate": 1.540788230430803e-05, "loss": 0.66, "step": 187445 }, { "epoch": 2.08, "learning_rate": 1.5406959577169515e-05, "loss": 0.6193, "step": 187450 }, { "epoch": 2.08, "learning_rate": 1.5406036850031007e-05, "loss": 0.62, "step": 187455 }, { "epoch": 2.08, "learning_rate": 1.540511412289249e-05, "loss": 0.5979, "step": 187460 }, { "epoch": 2.08, "learning_rate": 1.5404191395753982e-05, "loss": 0.605, "step": 187465 }, { "epoch": 2.08, "learning_rate": 1.5403268668615467e-05, "loss": 0.6143, "step": 187470 }, { "epoch": 2.08, "learning_rate": 1.5402345941476955e-05, "loss": 0.6437, "step": 187475 }, { "epoch": 2.08, "learning_rate": 1.5401423214338442e-05, "loss": 0.5705, "step": 187480 }, { "epoch": 2.08, "learning_rate": 1.540050048719993e-05, "loss": 0.65, "step": 187485 }, { "epoch": 2.08, "learning_rate": 1.5399577760061418e-05, "loss": 0.5994, "step": 187490 }, { "epoch": 2.08, "learning_rate": 1.5398655032922903e-05, "loss": 0.6433, "step": 187495 }, { "epoch": 2.08, "learning_rate": 1.5397732305784394e-05, "loss": 0.6564, "step": 187500 }, { "epoch": 2.08, "learning_rate": 1.5396809578645878e-05, "loss": 0.6114, "step": 187505 }, { "epoch": 2.08, "learning_rate": 1.539588685150737e-05, "loss": 0.6278, "step": 187510 }, { "epoch": 2.08, "learning_rate": 1.5394964124368854e-05, "loss": 0.6003, "step": 187515 }, { "epoch": 2.08, "learning_rate": 1.5394041397230345e-05, "loss": 0.6292, "step": 187520 }, { "epoch": 2.08, "learning_rate": 1.539311867009183e-05, "loss": 0.5854, "step": 187525 }, { "epoch": 2.08, "learning_rate": 1.5392195942953317e-05, "loss": 0.635, "step": 187530 }, { "epoch": 2.08, "learning_rate": 1.5391273215814805e-05, "loss": 0.5763, "step": 187535 }, { "epoch": 2.08, "learning_rate": 1.5390350488676293e-05, "loss": 0.6191, "step": 187540 }, { "epoch": 2.08, "learning_rate": 1.538942776153778e-05, "loss": 0.5867, "step": 187545 }, { "epoch": 2.08, "learning_rate": 1.538850503439927e-05, "loss": 0.6302, "step": 187550 }, { "epoch": 2.08, "learning_rate": 1.5387582307260757e-05, "loss": 0.5859, "step": 187555 }, { "epoch": 2.08, "learning_rate": 1.538665958012224e-05, "loss": 0.6312, "step": 187560 }, { "epoch": 2.08, "learning_rate": 1.5385736852983732e-05, "loss": 0.6099, "step": 187565 }, { "epoch": 2.08, "learning_rate": 1.5384814125845217e-05, "loss": 0.624, "step": 187570 }, { "epoch": 2.08, "learning_rate": 1.5383891398706708e-05, "loss": 0.5929, "step": 187575 }, { "epoch": 2.08, "learning_rate": 1.5382968671568193e-05, "loss": 0.6101, "step": 187580 }, { "epoch": 2.08, "learning_rate": 1.5382045944429684e-05, "loss": 0.6221, "step": 187585 }, { "epoch": 2.08, "learning_rate": 1.5381123217291168e-05, "loss": 0.5929, "step": 187590 }, { "epoch": 2.08, "learning_rate": 1.5380200490152656e-05, "loss": 0.608, "step": 187595 }, { "epoch": 2.08, "learning_rate": 1.5379277763014144e-05, "loss": 0.6202, "step": 187600 }, { "epoch": 2.08, "learning_rate": 1.5378355035875632e-05, "loss": 0.6824, "step": 187605 }, { "epoch": 2.08, "learning_rate": 1.537743230873712e-05, "loss": 0.5474, "step": 187610 }, { "epoch": 2.08, "learning_rate": 1.5376509581598608e-05, "loss": 0.5961, "step": 187615 }, { "epoch": 2.08, "learning_rate": 1.5375586854460095e-05, "loss": 0.6033, "step": 187620 }, { "epoch": 2.08, "learning_rate": 1.5374664127321583e-05, "loss": 0.662, "step": 187625 }, { "epoch": 2.08, "learning_rate": 1.537374140018307e-05, "loss": 0.5676, "step": 187630 }, { "epoch": 2.08, "learning_rate": 1.5372818673044556e-05, "loss": 0.5677, "step": 187635 }, { "epoch": 2.08, "learning_rate": 1.5371895945906047e-05, "loss": 0.6103, "step": 187640 }, { "epoch": 2.08, "learning_rate": 1.537097321876753e-05, "loss": 0.6479, "step": 187645 }, { "epoch": 2.08, "learning_rate": 1.537005049162902e-05, "loss": 0.574, "step": 187650 }, { "epoch": 2.08, "learning_rate": 1.5369127764490507e-05, "loss": 0.6006, "step": 187655 }, { "epoch": 2.08, "learning_rate": 1.5368205037351995e-05, "loss": 0.5809, "step": 187660 }, { "epoch": 2.08, "learning_rate": 1.5367282310213483e-05, "loss": 0.6231, "step": 187665 }, { "epoch": 2.08, "learning_rate": 1.536635958307497e-05, "loss": 0.5849, "step": 187670 }, { "epoch": 2.08, "learning_rate": 1.5365436855936458e-05, "loss": 0.6386, "step": 187675 }, { "epoch": 2.08, "learning_rate": 1.5364514128797946e-05, "loss": 0.6201, "step": 187680 }, { "epoch": 2.08, "learning_rate": 1.5363591401659434e-05, "loss": 0.6357, "step": 187685 }, { "epoch": 2.08, "learning_rate": 1.5362668674520922e-05, "loss": 0.661, "step": 187690 }, { "epoch": 2.08, "learning_rate": 1.536174594738241e-05, "loss": 0.5719, "step": 187695 }, { "epoch": 2.08, "learning_rate": 1.5360823220243898e-05, "loss": 0.5989, "step": 187700 }, { "epoch": 2.08, "learning_rate": 1.5359900493105382e-05, "loss": 0.6327, "step": 187705 }, { "epoch": 2.08, "learning_rate": 1.535897776596687e-05, "loss": 0.6443, "step": 187710 }, { "epoch": 2.08, "learning_rate": 1.5358055038828358e-05, "loss": 0.5754, "step": 187715 }, { "epoch": 2.08, "learning_rate": 1.5357132311689846e-05, "loss": 0.5713, "step": 187720 }, { "epoch": 2.08, "learning_rate": 1.5356209584551333e-05, "loss": 0.6085, "step": 187725 }, { "epoch": 2.08, "learning_rate": 1.535528685741282e-05, "loss": 0.5711, "step": 187730 }, { "epoch": 2.08, "learning_rate": 1.535436413027431e-05, "loss": 0.6248, "step": 187735 }, { "epoch": 2.08, "learning_rate": 1.5353441403135797e-05, "loss": 0.5712, "step": 187740 }, { "epoch": 2.08, "learning_rate": 1.5352518675997285e-05, "loss": 0.5641, "step": 187745 }, { "epoch": 2.08, "learning_rate": 1.5351595948858773e-05, "loss": 0.6001, "step": 187750 }, { "epoch": 2.08, "learning_rate": 1.535067322172026e-05, "loss": 0.6309, "step": 187755 }, { "epoch": 2.08, "learning_rate": 1.5349750494581745e-05, "loss": 0.6403, "step": 187760 }, { "epoch": 2.08, "learning_rate": 1.5348827767443236e-05, "loss": 0.6295, "step": 187765 }, { "epoch": 2.08, "learning_rate": 1.534790504030472e-05, "loss": 0.6, "step": 187770 }, { "epoch": 2.08, "learning_rate": 1.5346982313166212e-05, "loss": 0.5887, "step": 187775 }, { "epoch": 2.08, "learning_rate": 1.5346059586027696e-05, "loss": 0.6156, "step": 187780 }, { "epoch": 2.08, "learning_rate": 1.5345136858889184e-05, "loss": 0.6291, "step": 187785 }, { "epoch": 2.08, "learning_rate": 1.5344214131750672e-05, "loss": 0.5406, "step": 187790 }, { "epoch": 2.08, "learning_rate": 1.534329140461216e-05, "loss": 0.5845, "step": 187795 }, { "epoch": 2.08, "learning_rate": 1.5342368677473648e-05, "loss": 0.6434, "step": 187800 }, { "epoch": 2.08, "learning_rate": 1.5341445950335136e-05, "loss": 0.6149, "step": 187805 }, { "epoch": 2.08, "learning_rate": 1.5340523223196623e-05, "loss": 0.6055, "step": 187810 }, { "epoch": 2.08, "learning_rate": 1.533960049605811e-05, "loss": 0.6416, "step": 187815 }, { "epoch": 2.08, "learning_rate": 1.53386777689196e-05, "loss": 0.6361, "step": 187820 }, { "epoch": 2.08, "learning_rate": 1.5337755041781084e-05, "loss": 0.585, "step": 187825 }, { "epoch": 2.08, "learning_rate": 1.5336832314642575e-05, "loss": 0.5585, "step": 187830 }, { "epoch": 2.08, "learning_rate": 1.533590958750406e-05, "loss": 0.5969, "step": 187835 }, { "epoch": 2.08, "learning_rate": 1.533498686036555e-05, "loss": 0.6314, "step": 187840 }, { "epoch": 2.08, "learning_rate": 1.5334064133227035e-05, "loss": 0.5972, "step": 187845 }, { "epoch": 2.08, "learning_rate": 1.5333141406088526e-05, "loss": 0.6064, "step": 187850 }, { "epoch": 2.08, "learning_rate": 1.533221867895001e-05, "loss": 0.5885, "step": 187855 }, { "epoch": 2.08, "learning_rate": 1.53312959518115e-05, "loss": 0.5879, "step": 187860 }, { "epoch": 2.08, "learning_rate": 1.5330373224672986e-05, "loss": 0.6427, "step": 187865 }, { "epoch": 2.08, "learning_rate": 1.5329450497534474e-05, "loss": 0.6166, "step": 187870 }, { "epoch": 2.08, "learning_rate": 1.5328527770395962e-05, "loss": 0.5822, "step": 187875 }, { "epoch": 2.08, "learning_rate": 1.5327605043257447e-05, "loss": 0.5886, "step": 187880 }, { "epoch": 2.08, "learning_rate": 1.5326682316118938e-05, "loss": 0.6536, "step": 187885 }, { "epoch": 2.08, "learning_rate": 1.5325759588980422e-05, "loss": 0.5762, "step": 187890 }, { "epoch": 2.08, "learning_rate": 1.5324836861841913e-05, "loss": 0.5844, "step": 187895 }, { "epoch": 2.08, "learning_rate": 1.5323914134703398e-05, "loss": 0.6116, "step": 187900 }, { "epoch": 2.08, "learning_rate": 1.532299140756489e-05, "loss": 0.583, "step": 187905 }, { "epoch": 2.08, "learning_rate": 1.5322068680426374e-05, "loss": 0.6741, "step": 187910 }, { "epoch": 2.08, "learning_rate": 1.532114595328786e-05, "loss": 0.6545, "step": 187915 }, { "epoch": 2.08, "learning_rate": 1.532022322614935e-05, "loss": 0.6226, "step": 187920 }, { "epoch": 2.08, "learning_rate": 1.5319300499010837e-05, "loss": 0.5612, "step": 187925 }, { "epoch": 2.08, "learning_rate": 1.5318377771872325e-05, "loss": 0.6021, "step": 187930 }, { "epoch": 2.08, "learning_rate": 1.531745504473381e-05, "loss": 0.6021, "step": 187935 }, { "epoch": 2.08, "learning_rate": 1.53165323175953e-05, "loss": 0.6173, "step": 187940 }, { "epoch": 2.08, "learning_rate": 1.5315609590456785e-05, "loss": 0.603, "step": 187945 }, { "epoch": 2.08, "learning_rate": 1.5314686863318276e-05, "loss": 0.5592, "step": 187950 }, { "epoch": 2.08, "learning_rate": 1.531376413617976e-05, "loss": 0.6129, "step": 187955 }, { "epoch": 2.08, "learning_rate": 1.5312841409041252e-05, "loss": 0.5744, "step": 187960 }, { "epoch": 2.08, "learning_rate": 1.5311918681902737e-05, "loss": 0.6098, "step": 187965 }, { "epoch": 2.08, "learning_rate": 1.5310995954764228e-05, "loss": 0.6118, "step": 187970 }, { "epoch": 2.08, "learning_rate": 1.5310073227625712e-05, "loss": 0.6099, "step": 187975 }, { "epoch": 2.08, "learning_rate": 1.53091505004872e-05, "loss": 0.5802, "step": 187980 }, { "epoch": 2.08, "learning_rate": 1.5308227773348688e-05, "loss": 0.6514, "step": 187985 }, { "epoch": 2.08, "learning_rate": 1.5307305046210176e-05, "loss": 0.6189, "step": 187990 }, { "epoch": 2.08, "learning_rate": 1.5306382319071664e-05, "loss": 0.5313, "step": 187995 }, { "epoch": 2.08, "learning_rate": 1.530545959193315e-05, "loss": 0.594, "step": 188000 }, { "epoch": 2.08, "eval_loss": 0.5719437003135681, "eval_runtime": 69.1674, "eval_samples_per_second": 28.915, "eval_steps_per_second": 14.458, "step": 188000 }, { "epoch": 2.08, "learning_rate": 1.530453686479464e-05, "loss": 0.6118, "step": 188005 }, { "epoch": 2.08, "learning_rate": 1.5303614137656124e-05, "loss": 0.6571, "step": 188010 }, { "epoch": 2.08, "learning_rate": 1.5302691410517615e-05, "loss": 0.6537, "step": 188015 }, { "epoch": 2.08, "learning_rate": 1.53017686833791e-05, "loss": 0.622, "step": 188020 }, { "epoch": 2.08, "learning_rate": 1.530084595624059e-05, "loss": 0.6453, "step": 188025 }, { "epoch": 2.08, "learning_rate": 1.5299923229102075e-05, "loss": 0.6001, "step": 188030 }, { "epoch": 2.08, "learning_rate": 1.5299000501963563e-05, "loss": 0.541, "step": 188035 }, { "epoch": 2.08, "learning_rate": 1.529807777482505e-05, "loss": 0.5978, "step": 188040 }, { "epoch": 2.08, "learning_rate": 1.529715504768654e-05, "loss": 0.6521, "step": 188045 }, { "epoch": 2.08, "learning_rate": 1.5296232320548027e-05, "loss": 0.6407, "step": 188050 }, { "epoch": 2.08, "learning_rate": 1.5295309593409514e-05, "loss": 0.6233, "step": 188055 }, { "epoch": 2.08, "learning_rate": 1.5294386866271002e-05, "loss": 0.552, "step": 188060 }, { "epoch": 2.08, "learning_rate": 1.529346413913249e-05, "loss": 0.6336, "step": 188065 }, { "epoch": 2.08, "learning_rate": 1.5292541411993978e-05, "loss": 0.6052, "step": 188070 }, { "epoch": 2.08, "learning_rate": 1.5291618684855466e-05, "loss": 0.6507, "step": 188075 }, { "epoch": 2.08, "learning_rate": 1.5290695957716954e-05, "loss": 0.6156, "step": 188080 }, { "epoch": 2.08, "learning_rate": 1.5289773230578438e-05, "loss": 0.6644, "step": 188085 }, { "epoch": 2.08, "learning_rate": 1.5288850503439926e-05, "loss": 0.6136, "step": 188090 }, { "epoch": 2.08, "learning_rate": 1.5287927776301414e-05, "loss": 0.5786, "step": 188095 }, { "epoch": 2.08, "learning_rate": 1.5287005049162902e-05, "loss": 0.6164, "step": 188100 }, { "epoch": 2.08, "learning_rate": 1.528608232202439e-05, "loss": 0.5903, "step": 188105 }, { "epoch": 2.08, "learning_rate": 1.5285159594885877e-05, "loss": 0.6462, "step": 188110 }, { "epoch": 2.08, "learning_rate": 1.5284236867747365e-05, "loss": 0.6307, "step": 188115 }, { "epoch": 2.08, "learning_rate": 1.5283314140608853e-05, "loss": 0.6614, "step": 188120 }, { "epoch": 2.08, "learning_rate": 1.528239141347034e-05, "loss": 0.593, "step": 188125 }, { "epoch": 2.08, "learning_rate": 1.528146868633183e-05, "loss": 0.5838, "step": 188130 }, { "epoch": 2.08, "learning_rate": 1.5280545959193317e-05, "loss": 0.6045, "step": 188135 }, { "epoch": 2.08, "learning_rate": 1.5279623232054805e-05, "loss": 0.5779, "step": 188140 }, { "epoch": 2.08, "learning_rate": 1.5278700504916292e-05, "loss": 0.6278, "step": 188145 }, { "epoch": 2.08, "learning_rate": 1.527777777777778e-05, "loss": 0.5956, "step": 188150 }, { "epoch": 2.08, "learning_rate": 1.5276855050639265e-05, "loss": 0.643, "step": 188155 }, { "epoch": 2.08, "learning_rate": 1.5275932323500753e-05, "loss": 0.6009, "step": 188160 }, { "epoch": 2.08, "learning_rate": 1.527500959636224e-05, "loss": 0.6211, "step": 188165 }, { "epoch": 2.08, "learning_rate": 1.5274086869223728e-05, "loss": 0.6031, "step": 188170 }, { "epoch": 2.08, "learning_rate": 1.5273164142085216e-05, "loss": 0.6257, "step": 188175 }, { "epoch": 2.08, "learning_rate": 1.5272241414946704e-05, "loss": 0.6641, "step": 188180 }, { "epoch": 2.08, "learning_rate": 1.5271318687808192e-05, "loss": 0.5922, "step": 188185 }, { "epoch": 2.08, "learning_rate": 1.527039596066968e-05, "loss": 0.5178, "step": 188190 }, { "epoch": 2.08, "learning_rate": 1.5269473233531167e-05, "loss": 0.5896, "step": 188195 }, { "epoch": 2.08, "learning_rate": 1.5268550506392655e-05, "loss": 0.61, "step": 188200 }, { "epoch": 2.08, "learning_rate": 1.5267627779254143e-05, "loss": 0.6564, "step": 188205 }, { "epoch": 2.08, "learning_rate": 1.5266705052115628e-05, "loss": 0.6137, "step": 188210 }, { "epoch": 2.08, "learning_rate": 1.526578232497712e-05, "loss": 0.575, "step": 188215 }, { "epoch": 2.08, "learning_rate": 1.5264859597838603e-05, "loss": 0.5934, "step": 188220 }, { "epoch": 2.08, "learning_rate": 1.5263936870700095e-05, "loss": 0.6053, "step": 188225 }, { "epoch": 2.08, "learning_rate": 1.526301414356158e-05, "loss": 0.6258, "step": 188230 }, { "epoch": 2.08, "learning_rate": 1.5262091416423067e-05, "loss": 0.6156, "step": 188235 }, { "epoch": 2.08, "learning_rate": 1.5261168689284555e-05, "loss": 0.5942, "step": 188240 }, { "epoch": 2.08, "learning_rate": 1.5260245962146043e-05, "loss": 0.5613, "step": 188245 }, { "epoch": 2.08, "learning_rate": 1.525932323500753e-05, "loss": 0.5741, "step": 188250 }, { "epoch": 2.08, "learning_rate": 1.5258400507869017e-05, "loss": 0.6252, "step": 188255 }, { "epoch": 2.08, "learning_rate": 1.5257477780730506e-05, "loss": 0.5838, "step": 188260 }, { "epoch": 2.08, "learning_rate": 1.5256555053591992e-05, "loss": 0.5889, "step": 188265 }, { "epoch": 2.08, "learning_rate": 1.5255632326453482e-05, "loss": 0.608, "step": 188270 }, { "epoch": 2.08, "learning_rate": 1.5254709599314968e-05, "loss": 0.5966, "step": 188275 }, { "epoch": 2.08, "learning_rate": 1.5253786872176456e-05, "loss": 0.5737, "step": 188280 }, { "epoch": 2.08, "learning_rate": 1.5252864145037944e-05, "loss": 0.6315, "step": 188285 }, { "epoch": 2.08, "learning_rate": 1.5251941417899431e-05, "loss": 0.5958, "step": 188290 }, { "epoch": 2.08, "learning_rate": 1.5251018690760918e-05, "loss": 0.6034, "step": 188295 }, { "epoch": 2.08, "learning_rate": 1.5250095963622407e-05, "loss": 0.5512, "step": 188300 }, { "epoch": 2.09, "learning_rate": 1.5249173236483893e-05, "loss": 0.5736, "step": 188305 }, { "epoch": 2.09, "learning_rate": 1.5248250509345383e-05, "loss": 0.6118, "step": 188310 }, { "epoch": 2.09, "learning_rate": 1.5247327782206869e-05, "loss": 0.6408, "step": 188315 }, { "epoch": 2.09, "learning_rate": 1.5246405055068355e-05, "loss": 0.5424, "step": 188320 }, { "epoch": 2.09, "learning_rate": 1.5245482327929845e-05, "loss": 0.5999, "step": 188325 }, { "epoch": 2.09, "learning_rate": 1.5244559600791331e-05, "loss": 0.6109, "step": 188330 }, { "epoch": 2.09, "learning_rate": 1.5243636873652819e-05, "loss": 0.6097, "step": 188335 }, { "epoch": 2.09, "learning_rate": 1.5242714146514307e-05, "loss": 0.6368, "step": 188340 }, { "epoch": 2.09, "learning_rate": 1.5241791419375794e-05, "loss": 0.5525, "step": 188345 }, { "epoch": 2.09, "learning_rate": 1.524086869223728e-05, "loss": 0.5697, "step": 188350 }, { "epoch": 2.09, "learning_rate": 1.523994596509877e-05, "loss": 0.6188, "step": 188355 }, { "epoch": 2.09, "learning_rate": 1.5239023237960256e-05, "loss": 0.5885, "step": 188360 }, { "epoch": 2.09, "learning_rate": 1.5238100510821746e-05, "loss": 0.6489, "step": 188365 }, { "epoch": 2.09, "learning_rate": 1.5237177783683232e-05, "loss": 0.5398, "step": 188370 }, { "epoch": 2.09, "learning_rate": 1.5236255056544722e-05, "loss": 0.5977, "step": 188375 }, { "epoch": 2.09, "learning_rate": 1.5235332329406208e-05, "loss": 0.6546, "step": 188380 }, { "epoch": 2.09, "learning_rate": 1.5234409602267696e-05, "loss": 0.6182, "step": 188385 }, { "epoch": 2.09, "learning_rate": 1.5233486875129183e-05, "loss": 0.6101, "step": 188390 }, { "epoch": 2.09, "learning_rate": 1.523256414799067e-05, "loss": 0.5839, "step": 188395 }, { "epoch": 2.09, "learning_rate": 1.5231641420852157e-05, "loss": 0.6166, "step": 188400 }, { "epoch": 2.09, "learning_rate": 1.5230718693713644e-05, "loss": 0.6177, "step": 188405 }, { "epoch": 2.09, "learning_rate": 1.5229795966575133e-05, "loss": 0.6292, "step": 188410 }, { "epoch": 2.09, "learning_rate": 1.522887323943662e-05, "loss": 0.5802, "step": 188415 }, { "epoch": 2.09, "learning_rate": 1.5227950512298109e-05, "loss": 0.5783, "step": 188420 }, { "epoch": 2.09, "learning_rate": 1.5227027785159595e-05, "loss": 0.6289, "step": 188425 }, { "epoch": 2.09, "learning_rate": 1.5226105058021084e-05, "loss": 0.6165, "step": 188430 }, { "epoch": 2.09, "learning_rate": 1.522518233088257e-05, "loss": 0.6039, "step": 188435 }, { "epoch": 2.09, "learning_rate": 1.522425960374406e-05, "loss": 0.5806, "step": 188440 }, { "epoch": 2.09, "learning_rate": 1.5223336876605546e-05, "loss": 0.599, "step": 188445 }, { "epoch": 2.09, "learning_rate": 1.5222414149467034e-05, "loss": 0.6084, "step": 188450 }, { "epoch": 2.09, "learning_rate": 1.522149142232852e-05, "loss": 0.5667, "step": 188455 }, { "epoch": 2.09, "learning_rate": 1.522056869519001e-05, "loss": 0.6069, "step": 188460 }, { "epoch": 2.09, "learning_rate": 1.5219645968051496e-05, "loss": 0.5849, "step": 188465 }, { "epoch": 2.09, "learning_rate": 1.5218723240912982e-05, "loss": 0.6366, "step": 188470 }, { "epoch": 2.09, "learning_rate": 1.5217800513774472e-05, "loss": 0.6176, "step": 188475 }, { "epoch": 2.09, "learning_rate": 1.5216877786635958e-05, "loss": 0.6069, "step": 188480 }, { "epoch": 2.09, "learning_rate": 1.5215955059497447e-05, "loss": 0.5765, "step": 188485 }, { "epoch": 2.09, "learning_rate": 1.5215032332358934e-05, "loss": 0.5677, "step": 188490 }, { "epoch": 2.09, "learning_rate": 1.5214109605220423e-05, "loss": 0.613, "step": 188495 }, { "epoch": 2.09, "learning_rate": 1.521318687808191e-05, "loss": 0.6246, "step": 188500 }, { "epoch": 2.09, "learning_rate": 1.5212264150943397e-05, "loss": 0.6046, "step": 188505 }, { "epoch": 2.09, "learning_rate": 1.5211341423804883e-05, "loss": 0.6292, "step": 188510 }, { "epoch": 2.09, "learning_rate": 1.5210418696666373e-05, "loss": 0.6081, "step": 188515 }, { "epoch": 2.09, "learning_rate": 1.5209495969527859e-05, "loss": 0.6199, "step": 188520 }, { "epoch": 2.09, "learning_rate": 1.5208573242389349e-05, "loss": 0.6427, "step": 188525 }, { "epoch": 2.09, "learning_rate": 1.5207650515250835e-05, "loss": 0.5722, "step": 188530 }, { "epoch": 2.09, "learning_rate": 1.5206727788112324e-05, "loss": 0.582, "step": 188535 }, { "epoch": 2.09, "learning_rate": 1.520580506097381e-05, "loss": 0.5816, "step": 188540 }, { "epoch": 2.09, "learning_rate": 1.5204882333835297e-05, "loss": 0.6131, "step": 188545 }, { "epoch": 2.09, "learning_rate": 1.5203959606696786e-05, "loss": 0.58, "step": 188550 }, { "epoch": 2.09, "learning_rate": 1.5203036879558272e-05, "loss": 0.6548, "step": 188555 }, { "epoch": 2.09, "learning_rate": 1.520211415241976e-05, "loss": 0.6366, "step": 188560 }, { "epoch": 2.09, "learning_rate": 1.5201191425281246e-05, "loss": 0.7006, "step": 188565 }, { "epoch": 2.09, "learning_rate": 1.5200268698142736e-05, "loss": 0.6176, "step": 188570 }, { "epoch": 2.09, "learning_rate": 1.5199345971004222e-05, "loss": 0.5665, "step": 188575 }, { "epoch": 2.09, "learning_rate": 1.5198423243865711e-05, "loss": 0.5861, "step": 188580 }, { "epoch": 2.09, "learning_rate": 1.5197500516727198e-05, "loss": 0.6447, "step": 188585 }, { "epoch": 2.09, "learning_rate": 1.5196577789588687e-05, "loss": 0.6157, "step": 188590 }, { "epoch": 2.09, "learning_rate": 1.5195655062450173e-05, "loss": 0.5709, "step": 188595 }, { "epoch": 2.09, "learning_rate": 1.5194732335311663e-05, "loss": 0.6176, "step": 188600 }, { "epoch": 2.09, "learning_rate": 1.5193809608173149e-05, "loss": 0.5175, "step": 188605 }, { "epoch": 2.09, "learning_rate": 1.5192886881034637e-05, "loss": 0.6236, "step": 188610 }, { "epoch": 2.09, "learning_rate": 1.5191964153896123e-05, "loss": 0.5859, "step": 188615 }, { "epoch": 2.09, "learning_rate": 1.5191041426757611e-05, "loss": 0.5639, "step": 188620 }, { "epoch": 2.09, "learning_rate": 1.5190118699619099e-05, "loss": 0.6391, "step": 188625 }, { "epoch": 2.09, "learning_rate": 1.5189195972480585e-05, "loss": 0.6133, "step": 188630 }, { "epoch": 2.09, "learning_rate": 1.5188273245342074e-05, "loss": 0.5828, "step": 188635 }, { "epoch": 2.09, "learning_rate": 1.518735051820356e-05, "loss": 0.6529, "step": 188640 }, { "epoch": 2.09, "learning_rate": 1.518642779106505e-05, "loss": 0.5978, "step": 188645 }, { "epoch": 2.09, "learning_rate": 1.5185505063926536e-05, "loss": 0.6585, "step": 188650 }, { "epoch": 2.09, "learning_rate": 1.5184582336788026e-05, "loss": 0.5935, "step": 188655 }, { "epoch": 2.09, "learning_rate": 1.5183659609649512e-05, "loss": 0.6289, "step": 188660 }, { "epoch": 2.09, "learning_rate": 1.5182736882511e-05, "loss": 0.6146, "step": 188665 }, { "epoch": 2.09, "learning_rate": 1.5181814155372488e-05, "loss": 0.6211, "step": 188670 }, { "epoch": 2.09, "learning_rate": 1.5180891428233976e-05, "loss": 0.6382, "step": 188675 }, { "epoch": 2.09, "learning_rate": 1.5179968701095462e-05, "loss": 0.5613, "step": 188680 }, { "epoch": 2.09, "learning_rate": 1.5179045973956951e-05, "loss": 0.6384, "step": 188685 }, { "epoch": 2.09, "learning_rate": 1.5178123246818437e-05, "loss": 0.5792, "step": 188690 }, { "epoch": 2.09, "learning_rate": 1.5177200519679924e-05, "loss": 0.6839, "step": 188695 }, { "epoch": 2.09, "learning_rate": 1.5176277792541413e-05, "loss": 0.6209, "step": 188700 }, { "epoch": 2.09, "learning_rate": 1.51753550654029e-05, "loss": 0.6129, "step": 188705 }, { "epoch": 2.09, "learning_rate": 1.5174432338264389e-05, "loss": 0.6004, "step": 188710 }, { "epoch": 2.09, "learning_rate": 1.5173509611125875e-05, "loss": 0.6181, "step": 188715 }, { "epoch": 2.09, "learning_rate": 1.5172586883987363e-05, "loss": 0.623, "step": 188720 }, { "epoch": 2.09, "learning_rate": 1.517166415684885e-05, "loss": 0.5339, "step": 188725 }, { "epoch": 2.09, "learning_rate": 1.5170741429710338e-05, "loss": 0.7238, "step": 188730 }, { "epoch": 2.09, "learning_rate": 1.5169818702571825e-05, "loss": 0.6158, "step": 188735 }, { "epoch": 2.09, "learning_rate": 1.5168895975433314e-05, "loss": 0.5853, "step": 188740 }, { "epoch": 2.09, "learning_rate": 1.51679732482948e-05, "loss": 0.5701, "step": 188745 }, { "epoch": 2.09, "learning_rate": 1.516705052115629e-05, "loss": 0.5776, "step": 188750 }, { "epoch": 2.09, "learning_rate": 1.5166127794017776e-05, "loss": 0.5993, "step": 188755 }, { "epoch": 2.09, "learning_rate": 1.5165205066879266e-05, "loss": 0.5748, "step": 188760 }, { "epoch": 2.09, "learning_rate": 1.5164282339740752e-05, "loss": 0.5618, "step": 188765 }, { "epoch": 2.09, "learning_rate": 1.5163359612602238e-05, "loss": 0.6068, "step": 188770 }, { "epoch": 2.09, "learning_rate": 1.5162436885463727e-05, "loss": 0.6074, "step": 188775 }, { "epoch": 2.09, "learning_rate": 1.5161514158325214e-05, "loss": 0.6098, "step": 188780 }, { "epoch": 2.09, "learning_rate": 1.5160591431186701e-05, "loss": 0.6057, "step": 188785 }, { "epoch": 2.09, "learning_rate": 1.5159668704048188e-05, "loss": 0.5585, "step": 188790 }, { "epoch": 2.09, "learning_rate": 1.5158745976909677e-05, "loss": 0.6123, "step": 188795 }, { "epoch": 2.09, "learning_rate": 1.5157823249771163e-05, "loss": 0.577, "step": 188800 }, { "epoch": 2.09, "learning_rate": 1.5156900522632653e-05, "loss": 0.6646, "step": 188805 }, { "epoch": 2.09, "learning_rate": 1.5155977795494139e-05, "loss": 0.5616, "step": 188810 }, { "epoch": 2.09, "learning_rate": 1.5155055068355629e-05, "loss": 0.626, "step": 188815 }, { "epoch": 2.09, "learning_rate": 1.5154132341217115e-05, "loss": 0.608, "step": 188820 }, { "epoch": 2.09, "learning_rate": 1.5153209614078604e-05, "loss": 0.6024, "step": 188825 }, { "epoch": 2.09, "learning_rate": 1.515228688694009e-05, "loss": 0.5684, "step": 188830 }, { "epoch": 2.09, "learning_rate": 1.5151364159801578e-05, "loss": 0.6504, "step": 188835 }, { "epoch": 2.09, "learning_rate": 1.5150441432663064e-05, "loss": 0.624, "step": 188840 }, { "epoch": 2.09, "learning_rate": 1.514951870552455e-05, "loss": 0.5981, "step": 188845 }, { "epoch": 2.09, "learning_rate": 1.514859597838604e-05, "loss": 0.602, "step": 188850 }, { "epoch": 2.09, "learning_rate": 1.5147673251247526e-05, "loss": 0.6407, "step": 188855 }, { "epoch": 2.09, "learning_rate": 1.5146750524109016e-05, "loss": 0.5778, "step": 188860 }, { "epoch": 2.09, "learning_rate": 1.5145827796970502e-05, "loss": 0.6519, "step": 188865 }, { "epoch": 2.09, "learning_rate": 1.5144905069831991e-05, "loss": 0.5985, "step": 188870 }, { "epoch": 2.09, "learning_rate": 1.5143982342693478e-05, "loss": 0.5841, "step": 188875 }, { "epoch": 2.09, "learning_rate": 1.5143059615554967e-05, "loss": 0.5746, "step": 188880 }, { "epoch": 2.09, "learning_rate": 1.5142136888416453e-05, "loss": 0.6234, "step": 188885 }, { "epoch": 2.09, "learning_rate": 1.5141214161277941e-05, "loss": 0.5793, "step": 188890 }, { "epoch": 2.09, "learning_rate": 1.5140291434139427e-05, "loss": 0.5767, "step": 188895 }, { "epoch": 2.09, "learning_rate": 1.5139368707000917e-05, "loss": 0.6208, "step": 188900 }, { "epoch": 2.09, "learning_rate": 1.5138445979862403e-05, "loss": 0.614, "step": 188905 }, { "epoch": 2.09, "learning_rate": 1.5137523252723893e-05, "loss": 0.5978, "step": 188910 }, { "epoch": 2.09, "learning_rate": 1.5136600525585379e-05, "loss": 0.5618, "step": 188915 }, { "epoch": 2.09, "learning_rate": 1.5135677798446865e-05, "loss": 0.5458, "step": 188920 }, { "epoch": 2.09, "learning_rate": 1.5134755071308354e-05, "loss": 0.5923, "step": 188925 }, { "epoch": 2.09, "learning_rate": 1.513383234416984e-05, "loss": 0.5754, "step": 188930 }, { "epoch": 2.09, "learning_rate": 1.513290961703133e-05, "loss": 0.6328, "step": 188935 }, { "epoch": 2.09, "learning_rate": 1.5131986889892816e-05, "loss": 0.6047, "step": 188940 }, { "epoch": 2.09, "learning_rate": 1.5131064162754304e-05, "loss": 0.5281, "step": 188945 }, { "epoch": 2.09, "learning_rate": 1.5130141435615792e-05, "loss": 0.5499, "step": 188950 }, { "epoch": 2.09, "learning_rate": 1.512921870847728e-05, "loss": 0.6148, "step": 188955 }, { "epoch": 2.09, "learning_rate": 1.5128295981338766e-05, "loss": 0.5775, "step": 188960 }, { "epoch": 2.09, "learning_rate": 1.5127373254200255e-05, "loss": 0.5951, "step": 188965 }, { "epoch": 2.09, "learning_rate": 1.5126450527061742e-05, "loss": 0.6228, "step": 188970 }, { "epoch": 2.09, "learning_rate": 1.5125527799923231e-05, "loss": 0.5702, "step": 188975 }, { "epoch": 2.09, "learning_rate": 1.5124605072784717e-05, "loss": 0.5991, "step": 188980 }, { "epoch": 2.09, "learning_rate": 1.5123682345646207e-05, "loss": 0.614, "step": 188985 }, { "epoch": 2.09, "learning_rate": 1.5122759618507693e-05, "loss": 0.6288, "step": 188990 }, { "epoch": 2.09, "learning_rate": 1.512183689136918e-05, "loss": 0.5859, "step": 188995 }, { "epoch": 2.09, "learning_rate": 1.5120914164230667e-05, "loss": 0.5838, "step": 189000 }, { "epoch": 2.09, "eval_loss": 0.5878751873970032, "eval_runtime": 69.2482, "eval_samples_per_second": 28.882, "eval_steps_per_second": 14.441, "step": 189000 }, { "epoch": 2.09, "learning_rate": 1.5119991437092155e-05, "loss": 0.635, "step": 189005 }, { "epoch": 2.09, "learning_rate": 1.5119068709953643e-05, "loss": 0.6015, "step": 189010 }, { "epoch": 2.09, "learning_rate": 1.5118145982815129e-05, "loss": 0.5929, "step": 189015 }, { "epoch": 2.09, "learning_rate": 1.5117223255676618e-05, "loss": 0.7055, "step": 189020 }, { "epoch": 2.09, "learning_rate": 1.5116300528538105e-05, "loss": 0.595, "step": 189025 }, { "epoch": 2.09, "learning_rate": 1.5115377801399594e-05, "loss": 0.6118, "step": 189030 }, { "epoch": 2.09, "learning_rate": 1.511445507426108e-05, "loss": 0.6108, "step": 189035 }, { "epoch": 2.09, "learning_rate": 1.511353234712257e-05, "loss": 0.5721, "step": 189040 }, { "epoch": 2.09, "learning_rate": 1.5112609619984056e-05, "loss": 0.6264, "step": 189045 }, { "epoch": 2.09, "learning_rate": 1.5111686892845544e-05, "loss": 0.5927, "step": 189050 }, { "epoch": 2.09, "learning_rate": 1.5110764165707032e-05, "loss": 0.5927, "step": 189055 }, { "epoch": 2.09, "learning_rate": 1.510984143856852e-05, "loss": 0.6195, "step": 189060 }, { "epoch": 2.09, "learning_rate": 1.5108918711430006e-05, "loss": 0.6909, "step": 189065 }, { "epoch": 2.09, "learning_rate": 1.5107995984291492e-05, "loss": 0.6228, "step": 189070 }, { "epoch": 2.09, "learning_rate": 1.5107073257152981e-05, "loss": 0.6458, "step": 189075 }, { "epoch": 2.09, "learning_rate": 1.5106150530014468e-05, "loss": 0.653, "step": 189080 }, { "epoch": 2.09, "learning_rate": 1.5105227802875957e-05, "loss": 0.5559, "step": 189085 }, { "epoch": 2.09, "learning_rate": 1.5104305075737443e-05, "loss": 0.6778, "step": 189090 }, { "epoch": 2.09, "learning_rate": 1.5103382348598933e-05, "loss": 0.5265, "step": 189095 }, { "epoch": 2.09, "learning_rate": 1.5102459621460419e-05, "loss": 0.5737, "step": 189100 }, { "epoch": 2.09, "learning_rate": 1.5101536894321908e-05, "loss": 0.5564, "step": 189105 }, { "epoch": 2.09, "learning_rate": 1.5100614167183395e-05, "loss": 0.6042, "step": 189110 }, { "epoch": 2.09, "learning_rate": 1.5099691440044882e-05, "loss": 0.6572, "step": 189115 }, { "epoch": 2.09, "learning_rate": 1.5098768712906369e-05, "loss": 0.6042, "step": 189120 }, { "epoch": 2.09, "learning_rate": 1.5097845985767858e-05, "loss": 0.6359, "step": 189125 }, { "epoch": 2.09, "learning_rate": 1.5096923258629344e-05, "loss": 0.5944, "step": 189130 }, { "epoch": 2.09, "learning_rate": 1.5096000531490834e-05, "loss": 0.5569, "step": 189135 }, { "epoch": 2.09, "learning_rate": 1.509507780435232e-05, "loss": 0.5855, "step": 189140 }, { "epoch": 2.09, "learning_rate": 1.509415507721381e-05, "loss": 0.6197, "step": 189145 }, { "epoch": 2.09, "learning_rate": 1.5093232350075296e-05, "loss": 0.61, "step": 189150 }, { "epoch": 2.09, "learning_rate": 1.5092309622936782e-05, "loss": 0.6106, "step": 189155 }, { "epoch": 2.09, "learning_rate": 1.5091386895798271e-05, "loss": 0.5751, "step": 189160 }, { "epoch": 2.09, "learning_rate": 1.5090464168659758e-05, "loss": 0.6054, "step": 189165 }, { "epoch": 2.09, "learning_rate": 1.5089541441521245e-05, "loss": 0.5809, "step": 189170 }, { "epoch": 2.09, "learning_rate": 1.5088618714382732e-05, "loss": 0.6137, "step": 189175 }, { "epoch": 2.09, "learning_rate": 1.5087695987244221e-05, "loss": 0.5951, "step": 189180 }, { "epoch": 2.09, "learning_rate": 1.5086773260105707e-05, "loss": 0.5924, "step": 189185 }, { "epoch": 2.09, "learning_rate": 1.5085850532967197e-05, "loss": 0.633, "step": 189190 }, { "epoch": 2.09, "learning_rate": 1.5084927805828683e-05, "loss": 0.6197, "step": 189195 }, { "epoch": 2.09, "learning_rate": 1.5084005078690173e-05, "loss": 0.6067, "step": 189200 }, { "epoch": 2.1, "learning_rate": 1.5083082351551659e-05, "loss": 0.6066, "step": 189205 }, { "epoch": 2.1, "learning_rate": 1.5082159624413148e-05, "loss": 0.5555, "step": 189210 }, { "epoch": 2.1, "learning_rate": 1.5081236897274634e-05, "loss": 0.584, "step": 189215 }, { "epoch": 2.1, "learning_rate": 1.5080314170136122e-05, "loss": 0.6276, "step": 189220 }, { "epoch": 2.1, "learning_rate": 1.5079391442997608e-05, "loss": 0.5955, "step": 189225 }, { "epoch": 2.1, "learning_rate": 1.5078468715859095e-05, "loss": 0.6139, "step": 189230 }, { "epoch": 2.1, "learning_rate": 1.5077545988720584e-05, "loss": 0.5877, "step": 189235 }, { "epoch": 2.1, "learning_rate": 1.507662326158207e-05, "loss": 0.5887, "step": 189240 }, { "epoch": 2.1, "learning_rate": 1.507570053444356e-05, "loss": 0.6493, "step": 189245 }, { "epoch": 2.1, "learning_rate": 1.5074777807305046e-05, "loss": 0.58, "step": 189250 }, { "epoch": 2.1, "learning_rate": 1.5073855080166535e-05, "loss": 0.6189, "step": 189255 }, { "epoch": 2.1, "learning_rate": 1.5072932353028022e-05, "loss": 0.6004, "step": 189260 }, { "epoch": 2.1, "learning_rate": 1.5072009625889511e-05, "loss": 0.6127, "step": 189265 }, { "epoch": 2.1, "learning_rate": 1.5071086898750997e-05, "loss": 0.5727, "step": 189270 }, { "epoch": 2.1, "learning_rate": 1.5070164171612485e-05, "loss": 0.5923, "step": 189275 }, { "epoch": 2.1, "learning_rate": 1.5069241444473971e-05, "loss": 0.5951, "step": 189280 }, { "epoch": 2.1, "learning_rate": 1.5068318717335461e-05, "loss": 0.5989, "step": 189285 }, { "epoch": 2.1, "learning_rate": 1.5067395990196947e-05, "loss": 0.6467, "step": 189290 }, { "epoch": 2.1, "learning_rate": 1.5066473263058437e-05, "loss": 0.6068, "step": 189295 }, { "epoch": 2.1, "learning_rate": 1.5065550535919923e-05, "loss": 0.5999, "step": 189300 }, { "epoch": 2.1, "learning_rate": 1.5064627808781409e-05, "loss": 0.5965, "step": 189305 }, { "epoch": 2.1, "learning_rate": 1.5063705081642898e-05, "loss": 0.6477, "step": 189310 }, { "epoch": 2.1, "learning_rate": 1.5062782354504385e-05, "loss": 0.6447, "step": 189315 }, { "epoch": 2.1, "learning_rate": 1.5061859627365874e-05, "loss": 0.6087, "step": 189320 }, { "epoch": 2.1, "learning_rate": 1.506093690022736e-05, "loss": 0.5387, "step": 189325 }, { "epoch": 2.1, "learning_rate": 1.5060014173088848e-05, "loss": 0.5756, "step": 189330 }, { "epoch": 2.1, "learning_rate": 1.5059091445950336e-05, "loss": 0.5515, "step": 189335 }, { "epoch": 2.1, "learning_rate": 1.5058168718811824e-05, "loss": 0.6104, "step": 189340 }, { "epoch": 2.1, "learning_rate": 1.505724599167331e-05, "loss": 0.5983, "step": 189345 }, { "epoch": 2.1, "learning_rate": 1.50563232645348e-05, "loss": 0.5688, "step": 189350 }, { "epoch": 2.1, "learning_rate": 1.5055400537396286e-05, "loss": 0.5634, "step": 189355 }, { "epoch": 2.1, "learning_rate": 1.5054477810257775e-05, "loss": 0.6414, "step": 189360 }, { "epoch": 2.1, "learning_rate": 1.5053555083119261e-05, "loss": 0.5738, "step": 189365 }, { "epoch": 2.1, "learning_rate": 1.5052632355980751e-05, "loss": 0.6549, "step": 189370 }, { "epoch": 2.1, "learning_rate": 1.5051709628842237e-05, "loss": 0.5502, "step": 189375 }, { "epoch": 2.1, "learning_rate": 1.5050786901703723e-05, "loss": 0.6355, "step": 189380 }, { "epoch": 2.1, "learning_rate": 1.5049864174565213e-05, "loss": 0.625, "step": 189385 }, { "epoch": 2.1, "learning_rate": 1.5048941447426699e-05, "loss": 0.6416, "step": 189390 }, { "epoch": 2.1, "learning_rate": 1.5048018720288187e-05, "loss": 0.5659, "step": 189395 }, { "epoch": 2.1, "learning_rate": 1.5047095993149673e-05, "loss": 0.5968, "step": 189400 }, { "epoch": 2.1, "learning_rate": 1.5046173266011162e-05, "loss": 0.5811, "step": 189405 }, { "epoch": 2.1, "learning_rate": 1.5045250538872649e-05, "loss": 0.6188, "step": 189410 }, { "epoch": 2.1, "learning_rate": 1.5044327811734138e-05, "loss": 0.6349, "step": 189415 }, { "epoch": 2.1, "learning_rate": 1.5043405084595624e-05, "loss": 0.6383, "step": 189420 }, { "epoch": 2.1, "learning_rate": 1.5042482357457114e-05, "loss": 0.6074, "step": 189425 }, { "epoch": 2.1, "learning_rate": 1.50415596303186e-05, "loss": 0.6022, "step": 189430 }, { "epoch": 2.1, "learning_rate": 1.5040636903180088e-05, "loss": 0.6116, "step": 189435 }, { "epoch": 2.1, "learning_rate": 1.5039714176041576e-05, "loss": 0.6218, "step": 189440 }, { "epoch": 2.1, "learning_rate": 1.5038791448903064e-05, "loss": 0.6087, "step": 189445 }, { "epoch": 2.1, "learning_rate": 1.503786872176455e-05, "loss": 0.5838, "step": 189450 }, { "epoch": 2.1, "learning_rate": 1.5036945994626036e-05, "loss": 0.5916, "step": 189455 }, { "epoch": 2.1, "learning_rate": 1.5036023267487525e-05, "loss": 0.5704, "step": 189460 }, { "epoch": 2.1, "learning_rate": 1.5035100540349012e-05, "loss": 0.5716, "step": 189465 }, { "epoch": 2.1, "learning_rate": 1.5034177813210501e-05, "loss": 0.5477, "step": 189470 }, { "epoch": 2.1, "learning_rate": 1.5033255086071987e-05, "loss": 0.6151, "step": 189475 }, { "epoch": 2.1, "learning_rate": 1.5032332358933477e-05, "loss": 0.5829, "step": 189480 }, { "epoch": 2.1, "learning_rate": 1.5031409631794963e-05, "loss": 0.6326, "step": 189485 }, { "epoch": 2.1, "learning_rate": 1.5030486904656452e-05, "loss": 0.6235, "step": 189490 }, { "epoch": 2.1, "learning_rate": 1.5029564177517939e-05, "loss": 0.6498, "step": 189495 }, { "epoch": 2.1, "learning_rate": 1.5028641450379427e-05, "loss": 0.5707, "step": 189500 }, { "epoch": 2.1, "learning_rate": 1.5027718723240913e-05, "loss": 0.5627, "step": 189505 }, { "epoch": 2.1, "learning_rate": 1.5026795996102402e-05, "loss": 0.5982, "step": 189510 }, { "epoch": 2.1, "learning_rate": 1.5025873268963888e-05, "loss": 0.6491, "step": 189515 }, { "epoch": 2.1, "learning_rate": 1.5024950541825378e-05, "loss": 0.6409, "step": 189520 }, { "epoch": 2.1, "learning_rate": 1.5024027814686864e-05, "loss": 0.6216, "step": 189525 }, { "epoch": 2.1, "learning_rate": 1.502310508754835e-05, "loss": 0.6441, "step": 189530 }, { "epoch": 2.1, "learning_rate": 1.502218236040984e-05, "loss": 0.5509, "step": 189535 }, { "epoch": 2.1, "learning_rate": 1.5021259633271326e-05, "loss": 0.624, "step": 189540 }, { "epoch": 2.1, "learning_rate": 1.5020336906132815e-05, "loss": 0.6109, "step": 189545 }, { "epoch": 2.1, "learning_rate": 1.5019414178994302e-05, "loss": 0.6155, "step": 189550 }, { "epoch": 2.1, "learning_rate": 1.501849145185579e-05, "loss": 0.6431, "step": 189555 }, { "epoch": 2.1, "learning_rate": 1.5017568724717276e-05, "loss": 0.6003, "step": 189560 }, { "epoch": 2.1, "learning_rate": 1.5016645997578765e-05, "loss": 0.5728, "step": 189565 }, { "epoch": 2.1, "learning_rate": 1.5015723270440251e-05, "loss": 0.5873, "step": 189570 }, { "epoch": 2.1, "learning_rate": 1.501480054330174e-05, "loss": 0.6495, "step": 189575 }, { "epoch": 2.1, "learning_rate": 1.5013877816163227e-05, "loss": 0.6245, "step": 189580 }, { "epoch": 2.1, "learning_rate": 1.5012955089024717e-05, "loss": 0.5877, "step": 189585 }, { "epoch": 2.1, "learning_rate": 1.5012032361886203e-05, "loss": 0.5771, "step": 189590 }, { "epoch": 2.1, "learning_rate": 1.5011109634747692e-05, "loss": 0.5601, "step": 189595 }, { "epoch": 2.1, "learning_rate": 1.5010186907609178e-05, "loss": 0.6445, "step": 189600 }, { "epoch": 2.1, "learning_rate": 1.5009264180470665e-05, "loss": 0.5848, "step": 189605 }, { "epoch": 2.1, "learning_rate": 1.5008341453332152e-05, "loss": 0.5402, "step": 189610 }, { "epoch": 2.1, "learning_rate": 1.500741872619364e-05, "loss": 0.5428, "step": 189615 }, { "epoch": 2.1, "learning_rate": 1.5006495999055128e-05, "loss": 0.6137, "step": 189620 }, { "epoch": 2.1, "learning_rate": 1.5005573271916614e-05, "loss": 0.6051, "step": 189625 }, { "epoch": 2.1, "learning_rate": 1.5004650544778104e-05, "loss": 0.5955, "step": 189630 }, { "epoch": 2.1, "learning_rate": 1.500372781763959e-05, "loss": 0.5682, "step": 189635 }, { "epoch": 2.1, "learning_rate": 1.500280509050108e-05, "loss": 0.5931, "step": 189640 }, { "epoch": 2.1, "learning_rate": 1.5001882363362566e-05, "loss": 0.5907, "step": 189645 }, { "epoch": 2.1, "learning_rate": 1.5000959636224055e-05, "loss": 0.579, "step": 189650 }, { "epoch": 2.1, "learning_rate": 1.5000036909085541e-05, "loss": 0.5755, "step": 189655 }, { "epoch": 2.1, "learning_rate": 1.499911418194703e-05, "loss": 0.576, "step": 189660 }, { "epoch": 2.1, "learning_rate": 1.4998191454808515e-05, "loss": 0.6111, "step": 189665 }, { "epoch": 2.1, "learning_rate": 1.4997268727670005e-05, "loss": 0.6161, "step": 189670 }, { "epoch": 2.1, "learning_rate": 1.4996346000531491e-05, "loss": 0.5798, "step": 189675 }, { "epoch": 2.1, "learning_rate": 1.4995423273392977e-05, "loss": 0.6128, "step": 189680 }, { "epoch": 2.1, "learning_rate": 1.4994500546254467e-05, "loss": 0.6434, "step": 189685 }, { "epoch": 2.1, "learning_rate": 1.4993577819115953e-05, "loss": 0.6216, "step": 189690 }, { "epoch": 2.1, "learning_rate": 1.4992655091977442e-05, "loss": 0.6272, "step": 189695 }, { "epoch": 2.1, "learning_rate": 1.4991732364838929e-05, "loss": 0.6052, "step": 189700 }, { "epoch": 2.1, "learning_rate": 1.4990809637700418e-05, "loss": 0.6037, "step": 189705 }, { "epoch": 2.1, "learning_rate": 1.4989886910561904e-05, "loss": 0.5969, "step": 189710 }, { "epoch": 2.1, "learning_rate": 1.4988964183423392e-05, "loss": 0.6126, "step": 189715 }, { "epoch": 2.1, "learning_rate": 1.498804145628488e-05, "loss": 0.5765, "step": 189720 }, { "epoch": 2.1, "learning_rate": 1.4987118729146368e-05, "loss": 0.5745, "step": 189725 }, { "epoch": 2.1, "learning_rate": 1.4986196002007854e-05, "loss": 0.6351, "step": 189730 }, { "epoch": 2.1, "learning_rate": 1.4985273274869344e-05, "loss": 0.6009, "step": 189735 }, { "epoch": 2.1, "learning_rate": 1.498435054773083e-05, "loss": 0.6532, "step": 189740 }, { "epoch": 2.1, "learning_rate": 1.498342782059232e-05, "loss": 0.5728, "step": 189745 }, { "epoch": 2.1, "learning_rate": 1.4982505093453805e-05, "loss": 0.5368, "step": 189750 }, { "epoch": 2.1, "learning_rate": 1.4981582366315292e-05, "loss": 0.6256, "step": 189755 }, { "epoch": 2.1, "learning_rate": 1.4980659639176781e-05, "loss": 0.6741, "step": 189760 }, { "epoch": 2.1, "learning_rate": 1.4979736912038267e-05, "loss": 0.6089, "step": 189765 }, { "epoch": 2.1, "learning_rate": 1.4978814184899757e-05, "loss": 0.5974, "step": 189770 }, { "epoch": 2.1, "learning_rate": 1.4977891457761243e-05, "loss": 0.632, "step": 189775 }, { "epoch": 2.1, "learning_rate": 1.497696873062273e-05, "loss": 0.5926, "step": 189780 }, { "epoch": 2.1, "learning_rate": 1.4976046003484217e-05, "loss": 0.578, "step": 189785 }, { "epoch": 2.1, "learning_rate": 1.4975123276345706e-05, "loss": 0.5769, "step": 189790 }, { "epoch": 2.1, "learning_rate": 1.4974200549207193e-05, "loss": 0.5615, "step": 189795 }, { "epoch": 2.1, "learning_rate": 1.4973277822068682e-05, "loss": 0.5979, "step": 189800 }, { "epoch": 2.1, "learning_rate": 1.4972355094930168e-05, "loss": 0.6329, "step": 189805 }, { "epoch": 2.1, "learning_rate": 1.4971432367791658e-05, "loss": 0.6232, "step": 189810 }, { "epoch": 2.1, "learning_rate": 1.4970509640653144e-05, "loss": 0.6065, "step": 189815 }, { "epoch": 2.1, "learning_rate": 1.4969586913514632e-05, "loss": 0.6093, "step": 189820 }, { "epoch": 2.1, "learning_rate": 1.496866418637612e-05, "loss": 0.599, "step": 189825 }, { "epoch": 2.1, "learning_rate": 1.4967741459237606e-05, "loss": 0.5756, "step": 189830 }, { "epoch": 2.1, "learning_rate": 1.4966818732099094e-05, "loss": 0.5379, "step": 189835 }, { "epoch": 2.1, "learning_rate": 1.496589600496058e-05, "loss": 0.5751, "step": 189840 }, { "epoch": 2.1, "learning_rate": 1.496497327782207e-05, "loss": 0.6928, "step": 189845 }, { "epoch": 2.1, "learning_rate": 1.4964050550683556e-05, "loss": 0.5987, "step": 189850 }, { "epoch": 2.1, "learning_rate": 1.4963127823545045e-05, "loss": 0.5901, "step": 189855 }, { "epoch": 2.1, "learning_rate": 1.4962205096406531e-05, "loss": 0.6288, "step": 189860 }, { "epoch": 2.1, "learning_rate": 1.496128236926802e-05, "loss": 0.6066, "step": 189865 }, { "epoch": 2.1, "learning_rate": 1.4960359642129507e-05, "loss": 0.6662, "step": 189870 }, { "epoch": 2.1, "learning_rate": 1.4959436914990997e-05, "loss": 0.6094, "step": 189875 }, { "epoch": 2.1, "learning_rate": 1.4958514187852483e-05, "loss": 0.5908, "step": 189880 }, { "epoch": 2.1, "learning_rate": 1.495759146071397e-05, "loss": 0.6606, "step": 189885 }, { "epoch": 2.1, "learning_rate": 1.4956668733575457e-05, "loss": 0.5831, "step": 189890 }, { "epoch": 2.1, "learning_rate": 1.4955746006436946e-05, "loss": 0.5792, "step": 189895 }, { "epoch": 2.1, "learning_rate": 1.4954823279298432e-05, "loss": 0.5816, "step": 189900 }, { "epoch": 2.1, "learning_rate": 1.4953900552159919e-05, "loss": 0.5998, "step": 189905 }, { "epoch": 2.1, "learning_rate": 1.4952977825021408e-05, "loss": 0.5667, "step": 189910 }, { "epoch": 2.1, "learning_rate": 1.4952055097882894e-05, "loss": 0.5715, "step": 189915 }, { "epoch": 2.1, "learning_rate": 1.4951132370744384e-05, "loss": 0.6046, "step": 189920 }, { "epoch": 2.1, "learning_rate": 1.495020964360587e-05, "loss": 0.6406, "step": 189925 }, { "epoch": 2.1, "learning_rate": 1.494928691646736e-05, "loss": 0.6016, "step": 189930 }, { "epoch": 2.1, "learning_rate": 1.4948364189328846e-05, "loss": 0.6181, "step": 189935 }, { "epoch": 2.1, "learning_rate": 1.4947441462190333e-05, "loss": 0.6052, "step": 189940 }, { "epoch": 2.1, "learning_rate": 1.494651873505182e-05, "loss": 0.5865, "step": 189945 }, { "epoch": 2.1, "learning_rate": 1.494559600791331e-05, "loss": 0.5884, "step": 189950 }, { "epoch": 2.1, "learning_rate": 1.4944673280774795e-05, "loss": 0.5898, "step": 189955 }, { "epoch": 2.1, "learning_rate": 1.4943750553636285e-05, "loss": 0.5888, "step": 189960 }, { "epoch": 2.1, "learning_rate": 1.4942827826497771e-05, "loss": 0.5802, "step": 189965 }, { "epoch": 2.1, "learning_rate": 1.494190509935926e-05, "loss": 0.588, "step": 189970 }, { "epoch": 2.1, "learning_rate": 1.4940982372220747e-05, "loss": 0.6109, "step": 189975 }, { "epoch": 2.1, "learning_rate": 1.4940059645082236e-05, "loss": 0.5541, "step": 189980 }, { "epoch": 2.1, "learning_rate": 1.4939136917943722e-05, "loss": 0.6108, "step": 189985 }, { "epoch": 2.1, "learning_rate": 1.4938214190805209e-05, "loss": 0.5358, "step": 189990 }, { "epoch": 2.1, "learning_rate": 1.4937291463666696e-05, "loss": 0.6421, "step": 189995 }, { "epoch": 2.1, "learning_rate": 1.4936368736528184e-05, "loss": 0.6039, "step": 190000 }, { "epoch": 2.1, "eval_loss": 0.5984124541282654, "eval_runtime": 69.0984, "eval_samples_per_second": 28.944, "eval_steps_per_second": 14.472, "step": 190000 }, { "epoch": 2.1, "learning_rate": 1.4935446009389672e-05, "loss": 0.5768, "step": 190005 }, { "epoch": 2.1, "learning_rate": 1.4934523282251158e-05, "loss": 0.6617, "step": 190010 }, { "epoch": 2.1, "learning_rate": 1.4933600555112648e-05, "loss": 0.5879, "step": 190015 }, { "epoch": 2.1, "learning_rate": 1.4932677827974134e-05, "loss": 0.5767, "step": 190020 }, { "epoch": 2.1, "learning_rate": 1.4931755100835624e-05, "loss": 0.5768, "step": 190025 }, { "epoch": 2.1, "learning_rate": 1.493083237369711e-05, "loss": 0.668, "step": 190030 }, { "epoch": 2.1, "learning_rate": 1.49299096465586e-05, "loss": 0.5937, "step": 190035 }, { "epoch": 2.1, "learning_rate": 1.4928986919420085e-05, "loss": 0.5583, "step": 190040 }, { "epoch": 2.1, "learning_rate": 1.4928064192281573e-05, "loss": 0.6137, "step": 190045 }, { "epoch": 2.1, "learning_rate": 1.4927141465143061e-05, "loss": 0.5902, "step": 190050 }, { "epoch": 2.1, "learning_rate": 1.4926218738004549e-05, "loss": 0.5902, "step": 190055 }, { "epoch": 2.1, "learning_rate": 1.4925296010866035e-05, "loss": 0.5991, "step": 190060 }, { "epoch": 2.1, "learning_rate": 1.4924373283727521e-05, "loss": 0.5853, "step": 190065 }, { "epoch": 2.1, "learning_rate": 1.492345055658901e-05, "loss": 0.619, "step": 190070 }, { "epoch": 2.1, "learning_rate": 1.4922527829450497e-05, "loss": 0.5646, "step": 190075 }, { "epoch": 2.1, "learning_rate": 1.4921605102311986e-05, "loss": 0.5763, "step": 190080 }, { "epoch": 2.1, "learning_rate": 1.4920682375173473e-05, "loss": 0.628, "step": 190085 }, { "epoch": 2.1, "learning_rate": 1.4919759648034962e-05, "loss": 0.589, "step": 190090 }, { "epoch": 2.1, "learning_rate": 1.4918836920896448e-05, "loss": 0.57, "step": 190095 }, { "epoch": 2.1, "learning_rate": 1.4917914193757936e-05, "loss": 0.6244, "step": 190100 }, { "epoch": 2.1, "learning_rate": 1.4916991466619424e-05, "loss": 0.5956, "step": 190105 }, { "epoch": 2.11, "learning_rate": 1.4916068739480912e-05, "loss": 0.6665, "step": 190110 }, { "epoch": 2.11, "learning_rate": 1.4915146012342398e-05, "loss": 0.6027, "step": 190115 }, { "epoch": 2.11, "learning_rate": 1.4914223285203888e-05, "loss": 0.6184, "step": 190120 }, { "epoch": 2.11, "learning_rate": 1.4913300558065374e-05, "loss": 0.5434, "step": 190125 }, { "epoch": 2.11, "learning_rate": 1.4912377830926863e-05, "loss": 0.5756, "step": 190130 }, { "epoch": 2.11, "learning_rate": 1.491145510378835e-05, "loss": 0.5946, "step": 190135 }, { "epoch": 2.11, "learning_rate": 1.4910532376649836e-05, "loss": 0.6054, "step": 190140 }, { "epoch": 2.11, "learning_rate": 1.4909609649511325e-05, "loss": 0.5925, "step": 190145 }, { "epoch": 2.11, "learning_rate": 1.4908686922372811e-05, "loss": 0.6363, "step": 190150 }, { "epoch": 2.11, "learning_rate": 1.49077641952343e-05, "loss": 0.571, "step": 190155 }, { "epoch": 2.11, "learning_rate": 1.4906841468095787e-05, "loss": 0.6301, "step": 190160 }, { "epoch": 2.11, "learning_rate": 1.4905918740957275e-05, "loss": 0.5831, "step": 190165 }, { "epoch": 2.11, "learning_rate": 1.4904996013818761e-05, "loss": 0.6187, "step": 190170 }, { "epoch": 2.11, "learning_rate": 1.490407328668025e-05, "loss": 0.584, "step": 190175 }, { "epoch": 2.11, "learning_rate": 1.4903150559541737e-05, "loss": 0.6248, "step": 190180 }, { "epoch": 2.11, "learning_rate": 1.4902227832403226e-05, "loss": 0.6287, "step": 190185 }, { "epoch": 2.11, "learning_rate": 1.4901305105264712e-05, "loss": 0.6371, "step": 190190 }, { "epoch": 2.11, "learning_rate": 1.4900382378126202e-05, "loss": 0.6651, "step": 190195 }, { "epoch": 2.11, "learning_rate": 1.4899459650987688e-05, "loss": 0.5492, "step": 190200 }, { "epoch": 2.11, "learning_rate": 1.4898536923849178e-05, "loss": 0.6016, "step": 190205 }, { "epoch": 2.11, "learning_rate": 1.4897614196710664e-05, "loss": 0.58, "step": 190210 }, { "epoch": 2.11, "learning_rate": 1.489669146957215e-05, "loss": 0.5775, "step": 190215 }, { "epoch": 2.11, "learning_rate": 1.4895768742433638e-05, "loss": 0.6426, "step": 190220 }, { "epoch": 2.11, "learning_rate": 1.4894846015295124e-05, "loss": 0.6268, "step": 190225 }, { "epoch": 2.11, "learning_rate": 1.4893923288156613e-05, "loss": 0.5909, "step": 190230 }, { "epoch": 2.11, "learning_rate": 1.48930005610181e-05, "loss": 0.5489, "step": 190235 }, { "epoch": 2.11, "learning_rate": 1.4892077833879589e-05, "loss": 0.567, "step": 190240 }, { "epoch": 2.11, "learning_rate": 1.4891155106741075e-05, "loss": 0.6465, "step": 190245 }, { "epoch": 2.11, "learning_rate": 1.4890232379602565e-05, "loss": 0.595, "step": 190250 }, { "epoch": 2.11, "learning_rate": 1.4889309652464051e-05, "loss": 0.5573, "step": 190255 }, { "epoch": 2.11, "learning_rate": 1.488838692532554e-05, "loss": 0.6257, "step": 190260 }, { "epoch": 2.11, "learning_rate": 1.4887464198187027e-05, "loss": 0.6062, "step": 190265 }, { "epoch": 2.11, "learning_rate": 1.4886541471048515e-05, "loss": 0.633, "step": 190270 }, { "epoch": 2.11, "learning_rate": 1.488561874391e-05, "loss": 0.5828, "step": 190275 }, { "epoch": 2.11, "learning_rate": 1.488469601677149e-05, "loss": 0.6005, "step": 190280 }, { "epoch": 2.11, "learning_rate": 1.4883773289632976e-05, "loss": 0.5971, "step": 190285 }, { "epoch": 2.11, "learning_rate": 1.4882850562494463e-05, "loss": 0.5832, "step": 190290 }, { "epoch": 2.11, "learning_rate": 1.4881927835355952e-05, "loss": 0.601, "step": 190295 }, { "epoch": 2.11, "learning_rate": 1.4881005108217438e-05, "loss": 0.6398, "step": 190300 }, { "epoch": 2.11, "learning_rate": 1.4880082381078928e-05, "loss": 0.5766, "step": 190305 }, { "epoch": 2.11, "learning_rate": 1.4879159653940414e-05, "loss": 0.6175, "step": 190310 }, { "epoch": 2.11, "learning_rate": 1.4878236926801903e-05, "loss": 0.6124, "step": 190315 }, { "epoch": 2.11, "learning_rate": 1.487731419966339e-05, "loss": 0.6117, "step": 190320 }, { "epoch": 2.11, "learning_rate": 1.4876391472524877e-05, "loss": 0.6022, "step": 190325 }, { "epoch": 2.11, "learning_rate": 1.4875468745386364e-05, "loss": 0.6058, "step": 190330 }, { "epoch": 2.11, "learning_rate": 1.4874546018247853e-05, "loss": 0.5314, "step": 190335 }, { "epoch": 2.11, "learning_rate": 1.487362329110934e-05, "loss": 0.6359, "step": 190340 }, { "epoch": 2.11, "learning_rate": 1.4872700563970829e-05, "loss": 0.5966, "step": 190345 }, { "epoch": 2.11, "learning_rate": 1.4871777836832315e-05, "loss": 0.5749, "step": 190350 }, { "epoch": 2.11, "learning_rate": 1.4870855109693805e-05, "loss": 0.6158, "step": 190355 }, { "epoch": 2.11, "learning_rate": 1.486993238255529e-05, "loss": 0.582, "step": 190360 }, { "epoch": 2.11, "learning_rate": 1.4869009655416777e-05, "loss": 0.6039, "step": 190365 }, { "epoch": 2.11, "learning_rate": 1.4868086928278266e-05, "loss": 0.5678, "step": 190370 }, { "epoch": 2.11, "learning_rate": 1.4867164201139753e-05, "loss": 0.5591, "step": 190375 }, { "epoch": 2.11, "learning_rate": 1.486624147400124e-05, "loss": 0.5991, "step": 190380 }, { "epoch": 2.11, "learning_rate": 1.4865318746862728e-05, "loss": 0.5795, "step": 190385 }, { "epoch": 2.11, "learning_rate": 1.4864396019724216e-05, "loss": 0.6635, "step": 190390 }, { "epoch": 2.11, "learning_rate": 1.4863473292585702e-05, "loss": 0.6085, "step": 190395 }, { "epoch": 2.11, "learning_rate": 1.4862550565447192e-05, "loss": 0.633, "step": 190400 }, { "epoch": 2.11, "learning_rate": 1.4861627838308678e-05, "loss": 0.592, "step": 190405 }, { "epoch": 2.11, "learning_rate": 1.4860705111170168e-05, "loss": 0.589, "step": 190410 }, { "epoch": 2.11, "learning_rate": 1.4859782384031654e-05, "loss": 0.6076, "step": 190415 }, { "epoch": 2.11, "learning_rate": 1.4858859656893143e-05, "loss": 0.6352, "step": 190420 }, { "epoch": 2.11, "learning_rate": 1.485793692975463e-05, "loss": 0.6182, "step": 190425 }, { "epoch": 2.11, "learning_rate": 1.4857014202616117e-05, "loss": 0.5838, "step": 190430 }, { "epoch": 2.11, "learning_rate": 1.4856091475477605e-05, "loss": 0.6047, "step": 190435 }, { "epoch": 2.11, "learning_rate": 1.4855168748339091e-05, "loss": 0.6296, "step": 190440 }, { "epoch": 2.11, "learning_rate": 1.4854246021200579e-05, "loss": 0.6393, "step": 190445 }, { "epoch": 2.11, "learning_rate": 1.4853323294062065e-05, "loss": 0.62, "step": 190450 }, { "epoch": 2.11, "learning_rate": 1.4852400566923555e-05, "loss": 0.6024, "step": 190455 }, { "epoch": 2.11, "learning_rate": 1.4851477839785041e-05, "loss": 0.6315, "step": 190460 }, { "epoch": 2.11, "learning_rate": 1.485055511264653e-05, "loss": 0.5475, "step": 190465 }, { "epoch": 2.11, "learning_rate": 1.4849632385508017e-05, "loss": 0.5747, "step": 190470 }, { "epoch": 2.11, "learning_rate": 1.4848709658369506e-05, "loss": 0.6003, "step": 190475 }, { "epoch": 2.11, "learning_rate": 1.4847786931230992e-05, "loss": 0.6253, "step": 190480 }, { "epoch": 2.11, "learning_rate": 1.484686420409248e-05, "loss": 0.5711, "step": 190485 }, { "epoch": 2.11, "learning_rate": 1.4845941476953968e-05, "loss": 0.5909, "step": 190490 }, { "epoch": 2.11, "learning_rate": 1.4845018749815456e-05, "loss": 0.596, "step": 190495 }, { "epoch": 2.11, "learning_rate": 1.4844096022676942e-05, "loss": 0.5402, "step": 190500 }, { "epoch": 2.11, "learning_rate": 1.4843173295538432e-05, "loss": 0.59, "step": 190505 }, { "epoch": 2.11, "learning_rate": 1.4842250568399918e-05, "loss": 0.585, "step": 190510 }, { "epoch": 2.11, "learning_rate": 1.4841327841261404e-05, "loss": 0.599, "step": 190515 }, { "epoch": 2.11, "learning_rate": 1.4840405114122893e-05, "loss": 0.6016, "step": 190520 }, { "epoch": 2.11, "learning_rate": 1.483948238698438e-05, "loss": 0.5656, "step": 190525 }, { "epoch": 2.11, "learning_rate": 1.4838559659845869e-05, "loss": 0.6069, "step": 190530 }, { "epoch": 2.11, "learning_rate": 1.4837636932707355e-05, "loss": 0.6295, "step": 190535 }, { "epoch": 2.11, "learning_rate": 1.4836714205568845e-05, "loss": 0.5765, "step": 190540 }, { "epoch": 2.11, "learning_rate": 1.4835791478430331e-05, "loss": 0.5347, "step": 190545 }, { "epoch": 2.11, "learning_rate": 1.4834868751291819e-05, "loss": 0.556, "step": 190550 }, { "epoch": 2.11, "learning_rate": 1.4833946024153305e-05, "loss": 0.5891, "step": 190555 }, { "epoch": 2.11, "learning_rate": 1.4833023297014795e-05, "loss": 0.5789, "step": 190560 }, { "epoch": 2.11, "learning_rate": 1.483210056987628e-05, "loss": 0.5389, "step": 190565 }, { "epoch": 2.11, "learning_rate": 1.483117784273777e-05, "loss": 0.5839, "step": 190570 }, { "epoch": 2.11, "learning_rate": 1.4830255115599256e-05, "loss": 0.6458, "step": 190575 }, { "epoch": 2.11, "learning_rate": 1.4829332388460746e-05, "loss": 0.607, "step": 190580 }, { "epoch": 2.11, "learning_rate": 1.4828409661322232e-05, "loss": 0.6561, "step": 190585 }, { "epoch": 2.11, "learning_rate": 1.4827486934183718e-05, "loss": 0.6223, "step": 190590 }, { "epoch": 2.11, "learning_rate": 1.4826564207045208e-05, "loss": 0.5639, "step": 190595 }, { "epoch": 2.11, "learning_rate": 1.4825641479906694e-05, "loss": 0.6042, "step": 190600 }, { "epoch": 2.11, "learning_rate": 1.4824718752768182e-05, "loss": 0.5939, "step": 190605 }, { "epoch": 2.11, "learning_rate": 1.4823796025629668e-05, "loss": 0.6186, "step": 190610 }, { "epoch": 2.11, "learning_rate": 1.4822873298491157e-05, "loss": 0.6067, "step": 190615 }, { "epoch": 2.11, "learning_rate": 1.4821950571352644e-05, "loss": 0.5825, "step": 190620 }, { "epoch": 2.11, "learning_rate": 1.4821027844214133e-05, "loss": 0.6414, "step": 190625 }, { "epoch": 2.11, "learning_rate": 1.482010511707562e-05, "loss": 0.5784, "step": 190630 }, { "epoch": 2.11, "learning_rate": 1.4819182389937109e-05, "loss": 0.5866, "step": 190635 }, { "epoch": 2.11, "learning_rate": 1.4818259662798595e-05, "loss": 0.6854, "step": 190640 }, { "epoch": 2.11, "learning_rate": 1.4817336935660085e-05, "loss": 0.5732, "step": 190645 }, { "epoch": 2.11, "learning_rate": 1.481641420852157e-05, "loss": 0.5442, "step": 190650 }, { "epoch": 2.11, "learning_rate": 1.4815491481383059e-05, "loss": 0.5891, "step": 190655 }, { "epoch": 2.11, "learning_rate": 1.4814568754244545e-05, "loss": 0.5657, "step": 190660 }, { "epoch": 2.11, "learning_rate": 1.4813646027106033e-05, "loss": 0.6082, "step": 190665 }, { "epoch": 2.11, "learning_rate": 1.481272329996752e-05, "loss": 0.6068, "step": 190670 }, { "epoch": 2.11, "learning_rate": 1.4811800572829007e-05, "loss": 0.5995, "step": 190675 }, { "epoch": 2.11, "learning_rate": 1.4810877845690496e-05, "loss": 0.5738, "step": 190680 }, { "epoch": 2.11, "learning_rate": 1.4809955118551982e-05, "loss": 0.5805, "step": 190685 }, { "epoch": 2.11, "learning_rate": 1.4809032391413472e-05, "loss": 0.6221, "step": 190690 }, { "epoch": 2.11, "learning_rate": 1.4808109664274958e-05, "loss": 0.6096, "step": 190695 }, { "epoch": 2.11, "learning_rate": 1.4807186937136448e-05, "loss": 0.5709, "step": 190700 }, { "epoch": 2.11, "learning_rate": 1.4806264209997934e-05, "loss": 0.5982, "step": 190705 }, { "epoch": 2.11, "learning_rate": 1.4805341482859422e-05, "loss": 0.6424, "step": 190710 }, { "epoch": 2.11, "learning_rate": 1.480441875572091e-05, "loss": 0.6047, "step": 190715 }, { "epoch": 2.11, "learning_rate": 1.4803496028582397e-05, "loss": 0.6108, "step": 190720 }, { "epoch": 2.11, "learning_rate": 1.4802573301443883e-05, "loss": 0.6173, "step": 190725 }, { "epoch": 2.11, "learning_rate": 1.4801650574305373e-05, "loss": 0.6114, "step": 190730 }, { "epoch": 2.11, "learning_rate": 1.4800727847166859e-05, "loss": 0.6114, "step": 190735 }, { "epoch": 2.11, "learning_rate": 1.4799805120028345e-05, "loss": 0.5922, "step": 190740 }, { "epoch": 2.11, "learning_rate": 1.4798882392889835e-05, "loss": 0.6247, "step": 190745 }, { "epoch": 2.11, "learning_rate": 1.4797959665751321e-05, "loss": 0.5965, "step": 190750 }, { "epoch": 2.11, "learning_rate": 1.479703693861281e-05, "loss": 0.6346, "step": 190755 }, { "epoch": 2.11, "learning_rate": 1.4796114211474297e-05, "loss": 0.6546, "step": 190760 }, { "epoch": 2.11, "learning_rate": 1.4795191484335784e-05, "loss": 0.5673, "step": 190765 }, { "epoch": 2.11, "learning_rate": 1.4794268757197272e-05, "loss": 0.5918, "step": 190770 }, { "epoch": 2.11, "learning_rate": 1.479334603005876e-05, "loss": 0.626, "step": 190775 }, { "epoch": 2.11, "learning_rate": 1.4792423302920246e-05, "loss": 0.5892, "step": 190780 }, { "epoch": 2.11, "learning_rate": 1.4791500575781736e-05, "loss": 0.629, "step": 190785 }, { "epoch": 2.11, "learning_rate": 1.4790577848643222e-05, "loss": 0.5419, "step": 190790 }, { "epoch": 2.11, "learning_rate": 1.4789655121504712e-05, "loss": 0.554, "step": 190795 }, { "epoch": 2.11, "learning_rate": 1.4788732394366198e-05, "loss": 0.6374, "step": 190800 }, { "epoch": 2.11, "learning_rate": 1.4787809667227687e-05, "loss": 0.6053, "step": 190805 }, { "epoch": 2.11, "learning_rate": 1.4786886940089173e-05, "loss": 0.5713, "step": 190810 }, { "epoch": 2.11, "learning_rate": 1.478596421295066e-05, "loss": 0.6214, "step": 190815 }, { "epoch": 2.11, "learning_rate": 1.4785041485812149e-05, "loss": 0.6145, "step": 190820 }, { "epoch": 2.11, "learning_rate": 1.4784118758673635e-05, "loss": 0.6411, "step": 190825 }, { "epoch": 2.11, "learning_rate": 1.4783196031535123e-05, "loss": 0.5989, "step": 190830 }, { "epoch": 2.11, "learning_rate": 1.478227330439661e-05, "loss": 0.5877, "step": 190835 }, { "epoch": 2.11, "learning_rate": 1.4781350577258099e-05, "loss": 0.5743, "step": 190840 }, { "epoch": 2.11, "learning_rate": 1.4780427850119585e-05, "loss": 0.6201, "step": 190845 }, { "epoch": 2.11, "learning_rate": 1.4779505122981074e-05, "loss": 0.626, "step": 190850 }, { "epoch": 2.11, "learning_rate": 1.477858239584256e-05, "loss": 0.6136, "step": 190855 }, { "epoch": 2.11, "learning_rate": 1.477765966870405e-05, "loss": 0.5464, "step": 190860 }, { "epoch": 2.11, "learning_rate": 1.4776736941565536e-05, "loss": 0.6291, "step": 190865 }, { "epoch": 2.11, "learning_rate": 1.4775814214427026e-05, "loss": 0.613, "step": 190870 }, { "epoch": 2.11, "learning_rate": 1.4774891487288512e-05, "loss": 0.6757, "step": 190875 }, { "epoch": 2.11, "learning_rate": 1.477396876015e-05, "loss": 0.6284, "step": 190880 }, { "epoch": 2.11, "learning_rate": 1.4773046033011486e-05, "loss": 0.6624, "step": 190885 }, { "epoch": 2.11, "learning_rate": 1.4772123305872976e-05, "loss": 0.5807, "step": 190890 }, { "epoch": 2.11, "learning_rate": 1.4771200578734462e-05, "loss": 0.6447, "step": 190895 }, { "epoch": 2.11, "learning_rate": 1.4770277851595948e-05, "loss": 0.6403, "step": 190900 }, { "epoch": 2.11, "learning_rate": 1.4769355124457437e-05, "loss": 0.6048, "step": 190905 }, { "epoch": 2.11, "learning_rate": 1.4768432397318924e-05, "loss": 0.6356, "step": 190910 }, { "epoch": 2.11, "learning_rate": 1.4767509670180413e-05, "loss": 0.6035, "step": 190915 }, { "epoch": 2.11, "learning_rate": 1.47665869430419e-05, "loss": 0.6348, "step": 190920 }, { "epoch": 2.11, "learning_rate": 1.4765664215903389e-05, "loss": 0.6543, "step": 190925 }, { "epoch": 2.11, "learning_rate": 1.4764741488764875e-05, "loss": 0.6746, "step": 190930 }, { "epoch": 2.11, "learning_rate": 1.4763818761626363e-05, "loss": 0.6374, "step": 190935 }, { "epoch": 2.11, "learning_rate": 1.4762896034487849e-05, "loss": 0.5979, "step": 190940 }, { "epoch": 2.11, "learning_rate": 1.4761973307349339e-05, "loss": 0.5971, "step": 190945 }, { "epoch": 2.11, "learning_rate": 1.4761050580210825e-05, "loss": 0.5914, "step": 190950 }, { "epoch": 2.11, "learning_rate": 1.4760127853072314e-05, "loss": 0.6234, "step": 190955 }, { "epoch": 2.11, "learning_rate": 1.47592051259338e-05, "loss": 0.5963, "step": 190960 }, { "epoch": 2.11, "learning_rate": 1.475828239879529e-05, "loss": 0.5501, "step": 190965 }, { "epoch": 2.11, "learning_rate": 1.4757359671656776e-05, "loss": 0.6281, "step": 190970 }, { "epoch": 2.11, "learning_rate": 1.4756436944518262e-05, "loss": 0.6624, "step": 190975 }, { "epoch": 2.11, "learning_rate": 1.4755514217379752e-05, "loss": 0.6198, "step": 190980 }, { "epoch": 2.11, "learning_rate": 1.4754591490241238e-05, "loss": 0.6438, "step": 190985 }, { "epoch": 2.11, "learning_rate": 1.4753668763102726e-05, "loss": 0.5982, "step": 190990 }, { "epoch": 2.11, "learning_rate": 1.4752746035964212e-05, "loss": 0.6118, "step": 190995 }, { "epoch": 2.11, "learning_rate": 1.4751823308825701e-05, "loss": 0.5911, "step": 191000 }, { "epoch": 2.11, "eval_loss": 0.5952701568603516, "eval_runtime": 69.1512, "eval_samples_per_second": 28.922, "eval_steps_per_second": 14.461, "step": 191000 }, { "epoch": 2.11, "learning_rate": 1.4750900581687188e-05, "loss": 0.6245, "step": 191005 }, { "epoch": 2.12, "learning_rate": 1.4749977854548677e-05, "loss": 0.6158, "step": 191010 }, { "epoch": 2.12, "learning_rate": 1.4749055127410163e-05, "loss": 0.5666, "step": 191015 }, { "epoch": 2.12, "learning_rate": 1.4748132400271653e-05, "loss": 0.6071, "step": 191020 }, { "epoch": 2.12, "learning_rate": 1.4747209673133139e-05, "loss": 0.6053, "step": 191025 }, { "epoch": 2.12, "learning_rate": 1.4746286945994629e-05, "loss": 0.6115, "step": 191030 }, { "epoch": 2.12, "learning_rate": 1.4745364218856115e-05, "loss": 0.685, "step": 191035 }, { "epoch": 2.12, "learning_rate": 1.4744441491717603e-05, "loss": 0.6182, "step": 191040 }, { "epoch": 2.12, "learning_rate": 1.4743518764579089e-05, "loss": 0.6046, "step": 191045 }, { "epoch": 2.12, "learning_rate": 1.4742596037440577e-05, "loss": 0.5505, "step": 191050 }, { "epoch": 2.12, "learning_rate": 1.4741673310302064e-05, "loss": 0.6282, "step": 191055 }, { "epoch": 2.12, "learning_rate": 1.474075058316355e-05, "loss": 0.5737, "step": 191060 }, { "epoch": 2.12, "learning_rate": 1.473982785602504e-05, "loss": 0.6161, "step": 191065 }, { "epoch": 2.12, "learning_rate": 1.4738905128886526e-05, "loss": 0.572, "step": 191070 }, { "epoch": 2.12, "learning_rate": 1.4737982401748016e-05, "loss": 0.5913, "step": 191075 }, { "epoch": 2.12, "learning_rate": 1.4737059674609502e-05, "loss": 0.6127, "step": 191080 }, { "epoch": 2.12, "learning_rate": 1.4736136947470992e-05, "loss": 0.6156, "step": 191085 }, { "epoch": 2.12, "learning_rate": 1.4735214220332478e-05, "loss": 0.5763, "step": 191090 }, { "epoch": 2.12, "learning_rate": 1.4734291493193966e-05, "loss": 0.664, "step": 191095 }, { "epoch": 2.12, "learning_rate": 1.4733368766055453e-05, "loss": 0.6178, "step": 191100 }, { "epoch": 2.12, "learning_rate": 1.4732446038916941e-05, "loss": 0.6057, "step": 191105 }, { "epoch": 2.12, "learning_rate": 1.4731523311778427e-05, "loss": 0.6626, "step": 191110 }, { "epoch": 2.12, "learning_rate": 1.4730600584639917e-05, "loss": 0.6052, "step": 191115 }, { "epoch": 2.12, "learning_rate": 1.4729677857501403e-05, "loss": 0.5414, "step": 191120 }, { "epoch": 2.12, "learning_rate": 1.472875513036289e-05, "loss": 0.576, "step": 191125 }, { "epoch": 2.12, "learning_rate": 1.4727832403224379e-05, "loss": 0.6269, "step": 191130 }, { "epoch": 2.12, "learning_rate": 1.4726909676085865e-05, "loss": 0.586, "step": 191135 }, { "epoch": 2.12, "learning_rate": 1.4725986948947354e-05, "loss": 0.6073, "step": 191140 }, { "epoch": 2.12, "learning_rate": 1.472506422180884e-05, "loss": 0.675, "step": 191145 }, { "epoch": 2.12, "learning_rate": 1.472414149467033e-05, "loss": 0.5805, "step": 191150 }, { "epoch": 2.12, "learning_rate": 1.4723218767531816e-05, "loss": 0.6423, "step": 191155 }, { "epoch": 2.12, "learning_rate": 1.4722296040393304e-05, "loss": 0.5654, "step": 191160 }, { "epoch": 2.12, "learning_rate": 1.472137331325479e-05, "loss": 0.6143, "step": 191165 }, { "epoch": 2.12, "learning_rate": 1.472045058611628e-05, "loss": 0.6597, "step": 191170 }, { "epoch": 2.12, "learning_rate": 1.4719527858977766e-05, "loss": 0.6571, "step": 191175 }, { "epoch": 2.12, "learning_rate": 1.4718605131839256e-05, "loss": 0.6333, "step": 191180 }, { "epoch": 2.12, "learning_rate": 1.4717682404700742e-05, "loss": 0.602, "step": 191185 }, { "epoch": 2.12, "learning_rate": 1.4716759677562231e-05, "loss": 0.605, "step": 191190 }, { "epoch": 2.12, "learning_rate": 1.4715836950423717e-05, "loss": 0.5405, "step": 191195 }, { "epoch": 2.12, "learning_rate": 1.4714914223285204e-05, "loss": 0.5879, "step": 191200 }, { "epoch": 2.12, "learning_rate": 1.4713991496146693e-05, "loss": 0.6063, "step": 191205 }, { "epoch": 2.12, "learning_rate": 1.471306876900818e-05, "loss": 0.6379, "step": 191210 }, { "epoch": 2.12, "learning_rate": 1.4712146041869667e-05, "loss": 0.604, "step": 191215 }, { "epoch": 2.12, "learning_rate": 1.4711223314731153e-05, "loss": 0.5867, "step": 191220 }, { "epoch": 2.12, "learning_rate": 1.4710300587592643e-05, "loss": 0.5743, "step": 191225 }, { "epoch": 2.12, "learning_rate": 1.4709377860454129e-05, "loss": 0.5674, "step": 191230 }, { "epoch": 2.12, "learning_rate": 1.4708455133315619e-05, "loss": 0.6458, "step": 191235 }, { "epoch": 2.12, "learning_rate": 1.4707532406177105e-05, "loss": 0.615, "step": 191240 }, { "epoch": 2.12, "learning_rate": 1.4706609679038594e-05, "loss": 0.5858, "step": 191245 }, { "epoch": 2.12, "learning_rate": 1.470568695190008e-05, "loss": 0.6136, "step": 191250 }, { "epoch": 2.12, "learning_rate": 1.470476422476157e-05, "loss": 0.6236, "step": 191255 }, { "epoch": 2.12, "learning_rate": 1.4703841497623056e-05, "loss": 0.5797, "step": 191260 }, { "epoch": 2.12, "learning_rate": 1.4702918770484544e-05, "loss": 0.5808, "step": 191265 }, { "epoch": 2.12, "learning_rate": 1.470199604334603e-05, "loss": 0.5634, "step": 191270 }, { "epoch": 2.12, "learning_rate": 1.4701073316207516e-05, "loss": 0.632, "step": 191275 }, { "epoch": 2.12, "learning_rate": 1.4700150589069006e-05, "loss": 0.5645, "step": 191280 }, { "epoch": 2.12, "learning_rate": 1.4699227861930492e-05, "loss": 0.6006, "step": 191285 }, { "epoch": 2.12, "learning_rate": 1.4698305134791981e-05, "loss": 0.6356, "step": 191290 }, { "epoch": 2.12, "learning_rate": 1.4697382407653468e-05, "loss": 0.6013, "step": 191295 }, { "epoch": 2.12, "learning_rate": 1.4696459680514957e-05, "loss": 0.6444, "step": 191300 }, { "epoch": 2.12, "learning_rate": 1.4695536953376443e-05, "loss": 0.5675, "step": 191305 }, { "epoch": 2.12, "learning_rate": 1.4694614226237933e-05, "loss": 0.6262, "step": 191310 }, { "epoch": 2.12, "learning_rate": 1.4693691499099419e-05, "loss": 0.5652, "step": 191315 }, { "epoch": 2.12, "learning_rate": 1.4692768771960907e-05, "loss": 0.6163, "step": 191320 }, { "epoch": 2.12, "learning_rate": 1.4691846044822393e-05, "loss": 0.6117, "step": 191325 }, { "epoch": 2.12, "learning_rate": 1.4690923317683883e-05, "loss": 0.569, "step": 191330 }, { "epoch": 2.12, "learning_rate": 1.4690000590545369e-05, "loss": 0.5646, "step": 191335 }, { "epoch": 2.12, "learning_rate": 1.4689077863406858e-05, "loss": 0.6052, "step": 191340 }, { "epoch": 2.12, "learning_rate": 1.4688155136268344e-05, "loss": 0.6157, "step": 191345 }, { "epoch": 2.12, "learning_rate": 1.468723240912983e-05, "loss": 0.6503, "step": 191350 }, { "epoch": 2.12, "learning_rate": 1.468630968199132e-05, "loss": 0.5741, "step": 191355 }, { "epoch": 2.12, "learning_rate": 1.4685386954852806e-05, "loss": 0.5353, "step": 191360 }, { "epoch": 2.12, "learning_rate": 1.4684464227714296e-05, "loss": 0.5567, "step": 191365 }, { "epoch": 2.12, "learning_rate": 1.4683541500575782e-05, "loss": 0.588, "step": 191370 }, { "epoch": 2.12, "learning_rate": 1.468261877343727e-05, "loss": 0.6433, "step": 191375 }, { "epoch": 2.12, "learning_rate": 1.4681696046298758e-05, "loss": 0.5719, "step": 191380 }, { "epoch": 2.12, "learning_rate": 1.4680773319160246e-05, "loss": 0.5938, "step": 191385 }, { "epoch": 2.12, "learning_rate": 1.4679850592021732e-05, "loss": 0.5681, "step": 191390 }, { "epoch": 2.12, "learning_rate": 1.4678927864883221e-05, "loss": 0.6078, "step": 191395 }, { "epoch": 2.12, "learning_rate": 1.4678005137744707e-05, "loss": 0.5938, "step": 191400 }, { "epoch": 2.12, "learning_rate": 1.4677082410606197e-05, "loss": 0.5842, "step": 191405 }, { "epoch": 2.12, "learning_rate": 1.4676159683467683e-05, "loss": 0.5799, "step": 191410 }, { "epoch": 2.12, "learning_rate": 1.4675236956329173e-05, "loss": 0.6876, "step": 191415 }, { "epoch": 2.12, "learning_rate": 1.4674314229190659e-05, "loss": 0.6471, "step": 191420 }, { "epoch": 2.12, "learning_rate": 1.4673391502052145e-05, "loss": 0.6502, "step": 191425 }, { "epoch": 2.12, "learning_rate": 1.4672468774913633e-05, "loss": 0.5914, "step": 191430 }, { "epoch": 2.12, "learning_rate": 1.467154604777512e-05, "loss": 0.6158, "step": 191435 }, { "epoch": 2.12, "learning_rate": 1.4670623320636608e-05, "loss": 0.5894, "step": 191440 }, { "epoch": 2.12, "learning_rate": 1.4669700593498095e-05, "loss": 0.5483, "step": 191445 }, { "epoch": 2.12, "learning_rate": 1.4668777866359584e-05, "loss": 0.6016, "step": 191450 }, { "epoch": 2.12, "learning_rate": 1.466785513922107e-05, "loss": 0.5855, "step": 191455 }, { "epoch": 2.12, "learning_rate": 1.466693241208256e-05, "loss": 0.5928, "step": 191460 }, { "epoch": 2.12, "learning_rate": 1.4666009684944046e-05, "loss": 0.561, "step": 191465 }, { "epoch": 2.12, "learning_rate": 1.4665086957805536e-05, "loss": 0.5632, "step": 191470 }, { "epoch": 2.12, "learning_rate": 1.4664164230667022e-05, "loss": 0.667, "step": 191475 }, { "epoch": 2.12, "learning_rate": 1.466324150352851e-05, "loss": 0.6407, "step": 191480 }, { "epoch": 2.12, "learning_rate": 1.4662318776389997e-05, "loss": 0.616, "step": 191485 }, { "epoch": 2.12, "learning_rate": 1.4661396049251485e-05, "loss": 0.5882, "step": 191490 }, { "epoch": 2.12, "learning_rate": 1.4660473322112971e-05, "loss": 0.5911, "step": 191495 }, { "epoch": 2.12, "learning_rate": 1.4659550594974458e-05, "loss": 0.5635, "step": 191500 }, { "epoch": 2.12, "learning_rate": 1.4658627867835947e-05, "loss": 0.5298, "step": 191505 }, { "epoch": 2.12, "learning_rate": 1.4657705140697433e-05, "loss": 0.6273, "step": 191510 }, { "epoch": 2.12, "learning_rate": 1.4656782413558923e-05, "loss": 0.5743, "step": 191515 }, { "epoch": 2.12, "learning_rate": 1.4655859686420409e-05, "loss": 0.6251, "step": 191520 }, { "epoch": 2.12, "learning_rate": 1.4654936959281898e-05, "loss": 0.5865, "step": 191525 }, { "epoch": 2.12, "learning_rate": 1.4654014232143385e-05, "loss": 0.6201, "step": 191530 }, { "epoch": 2.12, "learning_rate": 1.4653091505004874e-05, "loss": 0.6192, "step": 191535 }, { "epoch": 2.12, "learning_rate": 1.465216877786636e-05, "loss": 0.5948, "step": 191540 }, { "epoch": 2.12, "learning_rate": 1.4651246050727848e-05, "loss": 0.5763, "step": 191545 }, { "epoch": 2.12, "learning_rate": 1.4650323323589334e-05, "loss": 0.5841, "step": 191550 }, { "epoch": 2.12, "learning_rate": 1.4649400596450824e-05, "loss": 0.5431, "step": 191555 }, { "epoch": 2.12, "learning_rate": 1.464847786931231e-05, "loss": 0.6293, "step": 191560 }, { "epoch": 2.12, "learning_rate": 1.46475551421738e-05, "loss": 0.6053, "step": 191565 }, { "epoch": 2.12, "learning_rate": 1.4646632415035286e-05, "loss": 0.6168, "step": 191570 }, { "epoch": 2.12, "learning_rate": 1.4645709687896772e-05, "loss": 0.5837, "step": 191575 }, { "epoch": 2.12, "learning_rate": 1.4644786960758261e-05, "loss": 0.606, "step": 191580 }, { "epoch": 2.12, "learning_rate": 1.4643864233619748e-05, "loss": 0.6015, "step": 191585 }, { "epoch": 2.12, "learning_rate": 1.4642941506481237e-05, "loss": 0.6572, "step": 191590 }, { "epoch": 2.12, "learning_rate": 1.4642018779342723e-05, "loss": 0.605, "step": 191595 }, { "epoch": 2.12, "learning_rate": 1.4641096052204211e-05, "loss": 0.6016, "step": 191600 }, { "epoch": 2.12, "learning_rate": 1.4640173325065697e-05, "loss": 0.5859, "step": 191605 }, { "epoch": 2.12, "learning_rate": 1.4639250597927187e-05, "loss": 0.6129, "step": 191610 }, { "epoch": 2.12, "learning_rate": 1.4638327870788673e-05, "loss": 0.5985, "step": 191615 }, { "epoch": 2.12, "learning_rate": 1.4637405143650163e-05, "loss": 0.5985, "step": 191620 }, { "epoch": 2.12, "learning_rate": 1.4636482416511649e-05, "loss": 0.6014, "step": 191625 }, { "epoch": 2.12, "learning_rate": 1.4635559689373138e-05, "loss": 0.5544, "step": 191630 }, { "epoch": 2.12, "learning_rate": 1.4634636962234624e-05, "loss": 0.6172, "step": 191635 }, { "epoch": 2.12, "learning_rate": 1.4633714235096114e-05, "loss": 0.6098, "step": 191640 }, { "epoch": 2.12, "learning_rate": 1.46327915079576e-05, "loss": 0.5765, "step": 191645 }, { "epoch": 2.12, "learning_rate": 1.4631868780819086e-05, "loss": 0.5879, "step": 191650 }, { "epoch": 2.12, "learning_rate": 1.4630946053680574e-05, "loss": 0.6378, "step": 191655 }, { "epoch": 2.12, "learning_rate": 1.4630023326542062e-05, "loss": 0.6228, "step": 191660 }, { "epoch": 2.12, "learning_rate": 1.462910059940355e-05, "loss": 0.5782, "step": 191665 }, { "epoch": 2.12, "learning_rate": 1.4628177872265036e-05, "loss": 0.6214, "step": 191670 }, { "epoch": 2.12, "learning_rate": 1.4627255145126525e-05, "loss": 0.6051, "step": 191675 }, { "epoch": 2.12, "learning_rate": 1.4626332417988012e-05, "loss": 0.643, "step": 191680 }, { "epoch": 2.12, "learning_rate": 1.4625409690849501e-05, "loss": 0.5696, "step": 191685 }, { "epoch": 2.12, "learning_rate": 1.4624486963710987e-05, "loss": 0.5891, "step": 191690 }, { "epoch": 2.12, "learning_rate": 1.4623564236572477e-05, "loss": 0.5909, "step": 191695 }, { "epoch": 2.12, "learning_rate": 1.4622641509433963e-05, "loss": 0.6009, "step": 191700 }, { "epoch": 2.12, "learning_rate": 1.4621718782295451e-05, "loss": 0.5977, "step": 191705 }, { "epoch": 2.12, "learning_rate": 1.4620796055156937e-05, "loss": 0.5997, "step": 191710 }, { "epoch": 2.12, "learning_rate": 1.4619873328018427e-05, "loss": 0.6028, "step": 191715 }, { "epoch": 2.12, "learning_rate": 1.4618950600879913e-05, "loss": 0.6625, "step": 191720 }, { "epoch": 2.12, "learning_rate": 1.4618027873741402e-05, "loss": 0.6249, "step": 191725 }, { "epoch": 2.12, "learning_rate": 1.4617105146602888e-05, "loss": 0.6048, "step": 191730 }, { "epoch": 2.12, "learning_rate": 1.4616182419464375e-05, "loss": 0.6274, "step": 191735 }, { "epoch": 2.12, "learning_rate": 1.4615259692325864e-05, "loss": 0.6263, "step": 191740 }, { "epoch": 2.12, "learning_rate": 1.461433696518735e-05, "loss": 0.6132, "step": 191745 }, { "epoch": 2.12, "learning_rate": 1.461341423804884e-05, "loss": 0.6041, "step": 191750 }, { "epoch": 2.12, "learning_rate": 1.4612491510910326e-05, "loss": 0.6525, "step": 191755 }, { "epoch": 2.12, "learning_rate": 1.4611568783771814e-05, "loss": 0.5885, "step": 191760 }, { "epoch": 2.12, "learning_rate": 1.4610646056633302e-05, "loss": 0.5592, "step": 191765 }, { "epoch": 2.12, "learning_rate": 1.460972332949479e-05, "loss": 0.6076, "step": 191770 }, { "epoch": 2.12, "learning_rate": 1.4608800602356276e-05, "loss": 0.5551, "step": 191775 }, { "epoch": 2.12, "learning_rate": 1.4607877875217765e-05, "loss": 0.5976, "step": 191780 }, { "epoch": 2.12, "learning_rate": 1.4606955148079251e-05, "loss": 0.6087, "step": 191785 }, { "epoch": 2.12, "learning_rate": 1.4606032420940741e-05, "loss": 0.6077, "step": 191790 }, { "epoch": 2.12, "learning_rate": 1.4605109693802227e-05, "loss": 0.5862, "step": 191795 }, { "epoch": 2.12, "learning_rate": 1.4604186966663717e-05, "loss": 0.5933, "step": 191800 }, { "epoch": 2.12, "learning_rate": 1.4603264239525203e-05, "loss": 0.6224, "step": 191805 }, { "epoch": 2.12, "learning_rate": 1.4602341512386689e-05, "loss": 0.6039, "step": 191810 }, { "epoch": 2.12, "learning_rate": 1.4601418785248178e-05, "loss": 0.5868, "step": 191815 }, { "epoch": 2.12, "learning_rate": 1.4600496058109665e-05, "loss": 0.5739, "step": 191820 }, { "epoch": 2.12, "learning_rate": 1.4599573330971152e-05, "loss": 0.5925, "step": 191825 }, { "epoch": 2.12, "learning_rate": 1.4598650603832639e-05, "loss": 0.6941, "step": 191830 }, { "epoch": 2.12, "learning_rate": 1.4597727876694128e-05, "loss": 0.6303, "step": 191835 }, { "epoch": 2.12, "learning_rate": 1.4596805149555614e-05, "loss": 0.6007, "step": 191840 }, { "epoch": 2.12, "learning_rate": 1.4595882422417104e-05, "loss": 0.576, "step": 191845 }, { "epoch": 2.12, "learning_rate": 1.459495969527859e-05, "loss": 0.5313, "step": 191850 }, { "epoch": 2.12, "learning_rate": 1.459403696814008e-05, "loss": 0.5856, "step": 191855 }, { "epoch": 2.12, "learning_rate": 1.4593114241001566e-05, "loss": 0.5545, "step": 191860 }, { "epoch": 2.12, "learning_rate": 1.4592191513863054e-05, "loss": 0.6436, "step": 191865 }, { "epoch": 2.12, "learning_rate": 1.4591268786724541e-05, "loss": 0.6086, "step": 191870 }, { "epoch": 2.12, "learning_rate": 1.459034605958603e-05, "loss": 0.5963, "step": 191875 }, { "epoch": 2.12, "learning_rate": 1.4589423332447515e-05, "loss": 0.5818, "step": 191880 }, { "epoch": 2.12, "learning_rate": 1.4588500605309002e-05, "loss": 0.5721, "step": 191885 }, { "epoch": 2.12, "learning_rate": 1.4587577878170491e-05, "loss": 0.5914, "step": 191890 }, { "epoch": 2.12, "learning_rate": 1.4586655151031977e-05, "loss": 0.6287, "step": 191895 }, { "epoch": 2.12, "learning_rate": 1.4585732423893467e-05, "loss": 0.6431, "step": 191900 }, { "epoch": 2.12, "learning_rate": 1.4584809696754953e-05, "loss": 0.6208, "step": 191905 }, { "epoch": 2.12, "learning_rate": 1.4583886969616443e-05, "loss": 0.6043, "step": 191910 }, { "epoch": 2.13, "learning_rate": 1.4582964242477929e-05, "loss": 0.6939, "step": 191915 }, { "epoch": 2.13, "learning_rate": 1.4582041515339418e-05, "loss": 0.6153, "step": 191920 }, { "epoch": 2.13, "learning_rate": 1.4581118788200904e-05, "loss": 0.6105, "step": 191925 }, { "epoch": 2.13, "learning_rate": 1.4580196061062392e-05, "loss": 0.6018, "step": 191930 }, { "epoch": 2.13, "learning_rate": 1.4579273333923878e-05, "loss": 0.5892, "step": 191935 }, { "epoch": 2.13, "learning_rate": 1.4578350606785368e-05, "loss": 0.5693, "step": 191940 }, { "epoch": 2.13, "learning_rate": 1.4577427879646854e-05, "loss": 0.5504, "step": 191945 }, { "epoch": 2.13, "learning_rate": 1.4576505152508344e-05, "loss": 0.5987, "step": 191950 }, { "epoch": 2.13, "learning_rate": 1.457558242536983e-05, "loss": 0.5851, "step": 191955 }, { "epoch": 2.13, "learning_rate": 1.4574659698231316e-05, "loss": 0.6199, "step": 191960 }, { "epoch": 2.13, "learning_rate": 1.4573736971092805e-05, "loss": 0.544, "step": 191965 }, { "epoch": 2.13, "learning_rate": 1.4572814243954292e-05, "loss": 0.6165, "step": 191970 }, { "epoch": 2.13, "learning_rate": 1.4571891516815781e-05, "loss": 0.5458, "step": 191975 }, { "epoch": 2.13, "learning_rate": 1.4570968789677267e-05, "loss": 0.5698, "step": 191980 }, { "epoch": 2.13, "learning_rate": 1.4570046062538755e-05, "loss": 0.5953, "step": 191985 }, { "epoch": 2.13, "learning_rate": 1.4569123335400241e-05, "loss": 0.6482, "step": 191990 }, { "epoch": 2.13, "learning_rate": 1.4568200608261731e-05, "loss": 0.6231, "step": 191995 }, { "epoch": 2.13, "learning_rate": 1.4567277881123217e-05, "loss": 0.563, "step": 192000 }, { "epoch": 2.13, "eval_loss": 0.5772398710250854, "eval_runtime": 69.1511, "eval_samples_per_second": 28.922, "eval_steps_per_second": 14.461, "step": 192000 }, { "epoch": 2.13, "learning_rate": 1.4566355153984707e-05, "loss": 0.6178, "step": 192005 }, { "epoch": 2.13, "learning_rate": 1.4565432426846193e-05, "loss": 0.5767, "step": 192010 }, { "epoch": 2.13, "learning_rate": 1.4564509699707682e-05, "loss": 0.5911, "step": 192015 }, { "epoch": 2.13, "learning_rate": 1.4563586972569168e-05, "loss": 0.5645, "step": 192020 }, { "epoch": 2.13, "learning_rate": 1.4562664245430658e-05, "loss": 0.6303, "step": 192025 }, { "epoch": 2.13, "learning_rate": 1.4561741518292144e-05, "loss": 0.617, "step": 192030 }, { "epoch": 2.13, "learning_rate": 1.456081879115363e-05, "loss": 0.5257, "step": 192035 }, { "epoch": 2.13, "learning_rate": 1.4559896064015118e-05, "loss": 0.6002, "step": 192040 }, { "epoch": 2.13, "learning_rate": 1.4558973336876606e-05, "loss": 0.6074, "step": 192045 }, { "epoch": 2.13, "learning_rate": 1.4558050609738094e-05, "loss": 0.6191, "step": 192050 }, { "epoch": 2.13, "learning_rate": 1.455712788259958e-05, "loss": 0.6189, "step": 192055 }, { "epoch": 2.13, "learning_rate": 1.455620515546107e-05, "loss": 0.6275, "step": 192060 }, { "epoch": 2.13, "learning_rate": 1.4555282428322556e-05, "loss": 0.6002, "step": 192065 }, { "epoch": 2.13, "learning_rate": 1.4554359701184045e-05, "loss": 0.5816, "step": 192070 }, { "epoch": 2.13, "learning_rate": 1.4553436974045531e-05, "loss": 0.5855, "step": 192075 }, { "epoch": 2.13, "learning_rate": 1.4552514246907021e-05, "loss": 0.5452, "step": 192080 }, { "epoch": 2.13, "learning_rate": 1.4551591519768507e-05, "loss": 0.5935, "step": 192085 }, { "epoch": 2.13, "learning_rate": 1.4550668792629995e-05, "loss": 0.5825, "step": 192090 }, { "epoch": 2.13, "learning_rate": 1.4549746065491481e-05, "loss": 0.6298, "step": 192095 }, { "epoch": 2.13, "learning_rate": 1.454882333835297e-05, "loss": 0.6382, "step": 192100 }, { "epoch": 2.13, "learning_rate": 1.4547900611214457e-05, "loss": 0.6111, "step": 192105 }, { "epoch": 2.13, "learning_rate": 1.4546977884075943e-05, "loss": 0.6348, "step": 192110 }, { "epoch": 2.13, "learning_rate": 1.4546055156937432e-05, "loss": 0.6152, "step": 192115 }, { "epoch": 2.13, "learning_rate": 1.4545132429798919e-05, "loss": 0.4924, "step": 192120 }, { "epoch": 2.13, "learning_rate": 1.4544209702660408e-05, "loss": 0.585, "step": 192125 }, { "epoch": 2.13, "learning_rate": 1.4543286975521894e-05, "loss": 0.6054, "step": 192130 }, { "epoch": 2.13, "learning_rate": 1.4542364248383384e-05, "loss": 0.6094, "step": 192135 }, { "epoch": 2.13, "learning_rate": 1.454144152124487e-05, "loss": 0.592, "step": 192140 }, { "epoch": 2.13, "learning_rate": 1.4540518794106358e-05, "loss": 0.5972, "step": 192145 }, { "epoch": 2.13, "learning_rate": 1.4539596066967846e-05, "loss": 0.6018, "step": 192150 }, { "epoch": 2.13, "learning_rate": 1.4538673339829334e-05, "loss": 0.5924, "step": 192155 }, { "epoch": 2.13, "learning_rate": 1.453775061269082e-05, "loss": 0.5929, "step": 192160 }, { "epoch": 2.13, "learning_rate": 1.453682788555231e-05, "loss": 0.6201, "step": 192165 }, { "epoch": 2.13, "learning_rate": 1.4535905158413795e-05, "loss": 0.6668, "step": 192170 }, { "epoch": 2.13, "learning_rate": 1.4534982431275285e-05, "loss": 0.5575, "step": 192175 }, { "epoch": 2.13, "learning_rate": 1.4534059704136771e-05, "loss": 0.5479, "step": 192180 }, { "epoch": 2.13, "learning_rate": 1.4533136976998257e-05, "loss": 0.5749, "step": 192185 }, { "epoch": 2.13, "learning_rate": 1.4532214249859747e-05, "loss": 0.594, "step": 192190 }, { "epoch": 2.13, "learning_rate": 1.4531291522721233e-05, "loss": 0.5917, "step": 192195 }, { "epoch": 2.13, "learning_rate": 1.4530368795582722e-05, "loss": 0.6478, "step": 192200 }, { "epoch": 2.13, "learning_rate": 1.4529446068444209e-05, "loss": 0.6098, "step": 192205 }, { "epoch": 2.13, "learning_rate": 1.4528523341305696e-05, "loss": 0.5747, "step": 192210 }, { "epoch": 2.13, "learning_rate": 1.4527600614167183e-05, "loss": 0.6625, "step": 192215 }, { "epoch": 2.13, "learning_rate": 1.4526677887028672e-05, "loss": 0.6473, "step": 192220 }, { "epoch": 2.13, "learning_rate": 1.4525755159890158e-05, "loss": 0.6387, "step": 192225 }, { "epoch": 2.13, "learning_rate": 1.4524832432751648e-05, "loss": 0.6776, "step": 192230 }, { "epoch": 2.13, "learning_rate": 1.4523909705613134e-05, "loss": 0.5584, "step": 192235 }, { "epoch": 2.13, "learning_rate": 1.4522986978474624e-05, "loss": 0.6052, "step": 192240 }, { "epoch": 2.13, "learning_rate": 1.452206425133611e-05, "loss": 0.5867, "step": 192245 }, { "epoch": 2.13, "learning_rate": 1.45211415241976e-05, "loss": 0.5946, "step": 192250 }, { "epoch": 2.13, "learning_rate": 1.4520218797059085e-05, "loss": 0.6183, "step": 192255 }, { "epoch": 2.13, "learning_rate": 1.4519296069920572e-05, "loss": 0.6411, "step": 192260 }, { "epoch": 2.13, "learning_rate": 1.451837334278206e-05, "loss": 0.5998, "step": 192265 }, { "epoch": 2.13, "learning_rate": 1.4517450615643546e-05, "loss": 0.5732, "step": 192270 }, { "epoch": 2.13, "learning_rate": 1.4516527888505035e-05, "loss": 0.567, "step": 192275 }, { "epoch": 2.13, "learning_rate": 1.4515605161366521e-05, "loss": 0.5947, "step": 192280 }, { "epoch": 2.13, "learning_rate": 1.451468243422801e-05, "loss": 0.5624, "step": 192285 }, { "epoch": 2.13, "learning_rate": 1.4513759707089497e-05, "loss": 0.6014, "step": 192290 }, { "epoch": 2.13, "learning_rate": 1.4512836979950987e-05, "loss": 0.6041, "step": 192295 }, { "epoch": 2.13, "learning_rate": 1.4511914252812473e-05, "loss": 0.642, "step": 192300 }, { "epoch": 2.13, "learning_rate": 1.4510991525673962e-05, "loss": 0.6017, "step": 192305 }, { "epoch": 2.13, "learning_rate": 1.4510068798535448e-05, "loss": 0.6356, "step": 192310 }, { "epoch": 2.13, "learning_rate": 1.4509146071396936e-05, "loss": 0.6371, "step": 192315 }, { "epoch": 2.13, "learning_rate": 1.4508223344258422e-05, "loss": 0.607, "step": 192320 }, { "epoch": 2.13, "learning_rate": 1.4507300617119912e-05, "loss": 0.6265, "step": 192325 }, { "epoch": 2.13, "learning_rate": 1.4506377889981398e-05, "loss": 0.5705, "step": 192330 }, { "epoch": 2.13, "learning_rate": 1.4505455162842884e-05, "loss": 0.5534, "step": 192335 }, { "epoch": 2.13, "learning_rate": 1.4504532435704374e-05, "loss": 0.6192, "step": 192340 }, { "epoch": 2.13, "learning_rate": 1.450360970856586e-05, "loss": 0.5937, "step": 192345 }, { "epoch": 2.13, "learning_rate": 1.450268698142735e-05, "loss": 0.5782, "step": 192350 }, { "epoch": 2.13, "learning_rate": 1.4501764254288836e-05, "loss": 0.6422, "step": 192355 }, { "epoch": 2.13, "learning_rate": 1.4500841527150325e-05, "loss": 0.6093, "step": 192360 }, { "epoch": 2.13, "learning_rate": 1.4499918800011811e-05, "loss": 0.638, "step": 192365 }, { "epoch": 2.13, "learning_rate": 1.44989960728733e-05, "loss": 0.6061, "step": 192370 }, { "epoch": 2.13, "learning_rate": 1.4498073345734785e-05, "loss": 0.5775, "step": 192375 }, { "epoch": 2.13, "learning_rate": 1.4497150618596275e-05, "loss": 0.6168, "step": 192380 }, { "epoch": 2.13, "learning_rate": 1.4496227891457761e-05, "loss": 0.5723, "step": 192385 }, { "epoch": 2.13, "learning_rate": 1.449530516431925e-05, "loss": 0.5813, "step": 192390 }, { "epoch": 2.13, "learning_rate": 1.4494382437180737e-05, "loss": 0.5528, "step": 192395 }, { "epoch": 2.13, "learning_rate": 1.4493459710042226e-05, "loss": 0.6505, "step": 192400 }, { "epoch": 2.13, "learning_rate": 1.4492536982903712e-05, "loss": 0.5982, "step": 192405 }, { "epoch": 2.13, "learning_rate": 1.4491614255765199e-05, "loss": 0.6122, "step": 192410 }, { "epoch": 2.13, "learning_rate": 1.4490691528626688e-05, "loss": 0.6086, "step": 192415 }, { "epoch": 2.13, "learning_rate": 1.4489768801488174e-05, "loss": 0.5988, "step": 192420 }, { "epoch": 2.13, "learning_rate": 1.4488846074349662e-05, "loss": 0.6013, "step": 192425 }, { "epoch": 2.13, "learning_rate": 1.448792334721115e-05, "loss": 0.63, "step": 192430 }, { "epoch": 2.13, "learning_rate": 1.4487000620072638e-05, "loss": 0.6441, "step": 192435 }, { "epoch": 2.13, "learning_rate": 1.4486077892934124e-05, "loss": 0.5618, "step": 192440 }, { "epoch": 2.13, "learning_rate": 1.4485155165795614e-05, "loss": 0.6107, "step": 192445 }, { "epoch": 2.13, "learning_rate": 1.44842324386571e-05, "loss": 0.5994, "step": 192450 }, { "epoch": 2.13, "learning_rate": 1.448330971151859e-05, "loss": 0.5712, "step": 192455 }, { "epoch": 2.13, "learning_rate": 1.4482386984380075e-05, "loss": 0.5681, "step": 192460 }, { "epoch": 2.13, "learning_rate": 1.4481464257241565e-05, "loss": 0.6166, "step": 192465 }, { "epoch": 2.13, "learning_rate": 1.4480541530103051e-05, "loss": 0.5967, "step": 192470 }, { "epoch": 2.13, "learning_rate": 1.4479618802964539e-05, "loss": 0.6379, "step": 192475 }, { "epoch": 2.13, "learning_rate": 1.4478696075826027e-05, "loss": 0.5757, "step": 192480 }, { "epoch": 2.13, "learning_rate": 1.4477773348687513e-05, "loss": 0.5521, "step": 192485 }, { "epoch": 2.13, "learning_rate": 1.4476850621549e-05, "loss": 0.614, "step": 192490 }, { "epoch": 2.13, "learning_rate": 1.4475927894410487e-05, "loss": 0.5885, "step": 192495 }, { "epoch": 2.13, "learning_rate": 1.4475005167271976e-05, "loss": 0.5864, "step": 192500 }, { "epoch": 2.13, "learning_rate": 1.4474082440133463e-05, "loss": 0.6236, "step": 192505 }, { "epoch": 2.13, "learning_rate": 1.4473159712994952e-05, "loss": 0.5886, "step": 192510 }, { "epoch": 2.13, "learning_rate": 1.4472236985856438e-05, "loss": 0.6197, "step": 192515 }, { "epoch": 2.13, "learning_rate": 1.4471314258717928e-05, "loss": 0.6033, "step": 192520 }, { "epoch": 2.13, "learning_rate": 1.4470391531579414e-05, "loss": 0.63, "step": 192525 }, { "epoch": 2.13, "learning_rate": 1.4469468804440902e-05, "loss": 0.5963, "step": 192530 }, { "epoch": 2.13, "learning_rate": 1.446854607730239e-05, "loss": 0.6304, "step": 192535 }, { "epoch": 2.13, "learning_rate": 1.4467623350163878e-05, "loss": 0.6104, "step": 192540 }, { "epoch": 2.13, "learning_rate": 1.4466700623025364e-05, "loss": 0.5923, "step": 192545 }, { "epoch": 2.13, "learning_rate": 1.4465777895886853e-05, "loss": 0.6495, "step": 192550 }, { "epoch": 2.13, "learning_rate": 1.446485516874834e-05, "loss": 0.5981, "step": 192555 }, { "epoch": 2.13, "learning_rate": 1.4463932441609829e-05, "loss": 0.5307, "step": 192560 }, { "epoch": 2.13, "learning_rate": 1.4463009714471315e-05, "loss": 0.6187, "step": 192565 }, { "epoch": 2.13, "learning_rate": 1.4462086987332801e-05, "loss": 0.5906, "step": 192570 }, { "epoch": 2.13, "learning_rate": 1.446116426019429e-05, "loss": 0.5978, "step": 192575 }, { "epoch": 2.13, "learning_rate": 1.4460241533055777e-05, "loss": 0.5781, "step": 192580 }, { "epoch": 2.13, "learning_rate": 1.4459318805917267e-05, "loss": 0.5611, "step": 192585 }, { "epoch": 2.13, "learning_rate": 1.4458396078778753e-05, "loss": 0.639, "step": 192590 }, { "epoch": 2.13, "learning_rate": 1.445747335164024e-05, "loss": 0.597, "step": 192595 }, { "epoch": 2.13, "learning_rate": 1.4456550624501727e-05, "loss": 0.6816, "step": 192600 }, { "epoch": 2.13, "learning_rate": 1.4455627897363216e-05, "loss": 0.6178, "step": 192605 }, { "epoch": 2.13, "learning_rate": 1.4454705170224702e-05, "loss": 0.5943, "step": 192610 }, { "epoch": 2.13, "learning_rate": 1.4453782443086192e-05, "loss": 0.579, "step": 192615 }, { "epoch": 2.13, "learning_rate": 1.4452859715947678e-05, "loss": 0.6348, "step": 192620 }, { "epoch": 2.13, "learning_rate": 1.4451936988809168e-05, "loss": 0.613, "step": 192625 }, { "epoch": 2.13, "learning_rate": 1.4451014261670654e-05, "loss": 0.5826, "step": 192630 }, { "epoch": 2.13, "learning_rate": 1.4450091534532143e-05, "loss": 0.6297, "step": 192635 }, { "epoch": 2.13, "learning_rate": 1.444916880739363e-05, "loss": 0.6268, "step": 192640 }, { "epoch": 2.13, "learning_rate": 1.4448246080255116e-05, "loss": 0.6108, "step": 192645 }, { "epoch": 2.13, "learning_rate": 1.4447323353116603e-05, "loss": 0.6615, "step": 192650 }, { "epoch": 2.13, "learning_rate": 1.444640062597809e-05, "loss": 0.5855, "step": 192655 }, { "epoch": 2.13, "learning_rate": 1.4445477898839579e-05, "loss": 0.6155, "step": 192660 }, { "epoch": 2.13, "learning_rate": 1.4444555171701065e-05, "loss": 0.593, "step": 192665 }, { "epoch": 2.13, "learning_rate": 1.4443632444562555e-05, "loss": 0.6291, "step": 192670 }, { "epoch": 2.13, "learning_rate": 1.4442709717424041e-05, "loss": 0.5793, "step": 192675 }, { "epoch": 2.13, "learning_rate": 1.444178699028553e-05, "loss": 0.6076, "step": 192680 }, { "epoch": 2.13, "learning_rate": 1.4440864263147017e-05, "loss": 0.6364, "step": 192685 }, { "epoch": 2.13, "learning_rate": 1.4439941536008506e-05, "loss": 0.6207, "step": 192690 }, { "epoch": 2.13, "learning_rate": 1.4439018808869992e-05, "loss": 0.6286, "step": 192695 }, { "epoch": 2.13, "learning_rate": 1.443809608173148e-05, "loss": 0.6146, "step": 192700 }, { "epoch": 2.13, "learning_rate": 1.4437173354592966e-05, "loss": 0.5746, "step": 192705 }, { "epoch": 2.13, "learning_rate": 1.4436250627454456e-05, "loss": 0.6047, "step": 192710 }, { "epoch": 2.13, "learning_rate": 1.4435327900315942e-05, "loss": 0.6239, "step": 192715 }, { "epoch": 2.13, "learning_rate": 1.4434405173177428e-05, "loss": 0.5934, "step": 192720 }, { "epoch": 2.13, "learning_rate": 1.4433482446038918e-05, "loss": 0.6094, "step": 192725 }, { "epoch": 2.13, "learning_rate": 1.4432559718900404e-05, "loss": 0.6714, "step": 192730 }, { "epoch": 2.13, "learning_rate": 1.4431636991761893e-05, "loss": 0.6383, "step": 192735 }, { "epoch": 2.13, "learning_rate": 1.443071426462338e-05, "loss": 0.6221, "step": 192740 }, { "epoch": 2.13, "learning_rate": 1.442979153748487e-05, "loss": 0.6338, "step": 192745 }, { "epoch": 2.13, "learning_rate": 1.4428868810346355e-05, "loss": 0.6504, "step": 192750 }, { "epoch": 2.13, "learning_rate": 1.4427946083207843e-05, "loss": 0.5981, "step": 192755 }, { "epoch": 2.13, "learning_rate": 1.4427023356069331e-05, "loss": 0.5856, "step": 192760 }, { "epoch": 2.13, "learning_rate": 1.4426100628930819e-05, "loss": 0.5527, "step": 192765 }, { "epoch": 2.13, "learning_rate": 1.4425177901792305e-05, "loss": 0.6179, "step": 192770 }, { "epoch": 2.13, "learning_rate": 1.4424255174653795e-05, "loss": 0.5924, "step": 192775 }, { "epoch": 2.13, "learning_rate": 1.442333244751528e-05, "loss": 0.571, "step": 192780 }, { "epoch": 2.13, "learning_rate": 1.442240972037677e-05, "loss": 0.5978, "step": 192785 }, { "epoch": 2.13, "learning_rate": 1.4421486993238256e-05, "loss": 0.5808, "step": 192790 }, { "epoch": 2.13, "learning_rate": 1.4420564266099743e-05, "loss": 0.6092, "step": 192795 }, { "epoch": 2.13, "learning_rate": 1.4419641538961232e-05, "loss": 0.5952, "step": 192800 }, { "epoch": 2.13, "learning_rate": 1.4418718811822718e-05, "loss": 0.5645, "step": 192805 }, { "epoch": 2.13, "learning_rate": 1.4417796084684206e-05, "loss": 0.6045, "step": 192810 }, { "epoch": 2.13, "learning_rate": 1.4416873357545694e-05, "loss": 0.6124, "step": 192815 }, { "epoch": 2.14, "learning_rate": 1.4415950630407182e-05, "loss": 0.5899, "step": 192820 }, { "epoch": 2.14, "learning_rate": 1.4415027903268668e-05, "loss": 0.5466, "step": 192825 }, { "epoch": 2.14, "learning_rate": 1.4414105176130158e-05, "loss": 0.6226, "step": 192830 }, { "epoch": 2.14, "learning_rate": 1.4413182448991644e-05, "loss": 0.6417, "step": 192835 }, { "epoch": 2.14, "learning_rate": 1.4412259721853133e-05, "loss": 0.638, "step": 192840 }, { "epoch": 2.14, "learning_rate": 1.441133699471462e-05, "loss": 0.569, "step": 192845 }, { "epoch": 2.14, "learning_rate": 1.4410414267576109e-05, "loss": 0.6003, "step": 192850 }, { "epoch": 2.14, "learning_rate": 1.4409491540437595e-05, "loss": 0.5649, "step": 192855 }, { "epoch": 2.14, "learning_rate": 1.4408568813299083e-05, "loss": 0.6638, "step": 192860 }, { "epoch": 2.14, "learning_rate": 1.440764608616057e-05, "loss": 0.5681, "step": 192865 }, { "epoch": 2.14, "learning_rate": 1.4406723359022057e-05, "loss": 0.5765, "step": 192870 }, { "epoch": 2.14, "learning_rate": 1.4405800631883545e-05, "loss": 0.5697, "step": 192875 }, { "epoch": 2.14, "learning_rate": 1.4404877904745031e-05, "loss": 0.618, "step": 192880 }, { "epoch": 2.14, "learning_rate": 1.440395517760652e-05, "loss": 0.6064, "step": 192885 }, { "epoch": 2.14, "learning_rate": 1.4403032450468007e-05, "loss": 0.6547, "step": 192890 }, { "epoch": 2.14, "learning_rate": 1.4402109723329496e-05, "loss": 0.5726, "step": 192895 }, { "epoch": 2.14, "learning_rate": 1.4401186996190982e-05, "loss": 0.6135, "step": 192900 }, { "epoch": 2.14, "learning_rate": 1.4400264269052472e-05, "loss": 0.5344, "step": 192905 }, { "epoch": 2.14, "learning_rate": 1.4399341541913958e-05, "loss": 0.6178, "step": 192910 }, { "epoch": 2.14, "learning_rate": 1.4398418814775448e-05, "loss": 0.6126, "step": 192915 }, { "epoch": 2.14, "learning_rate": 1.4397496087636934e-05, "loss": 0.5739, "step": 192920 }, { "epoch": 2.14, "learning_rate": 1.4396573360498422e-05, "loss": 0.5839, "step": 192925 }, { "epoch": 2.14, "learning_rate": 1.4395650633359908e-05, "loss": 0.5554, "step": 192930 }, { "epoch": 2.14, "learning_rate": 1.4394727906221397e-05, "loss": 0.596, "step": 192935 }, { "epoch": 2.14, "learning_rate": 1.4393805179082883e-05, "loss": 0.5957, "step": 192940 }, { "epoch": 2.14, "learning_rate": 1.439288245194437e-05, "loss": 0.5882, "step": 192945 }, { "epoch": 2.14, "learning_rate": 1.4391959724805859e-05, "loss": 0.6289, "step": 192950 }, { "epoch": 2.14, "learning_rate": 1.4391036997667345e-05, "loss": 0.5833, "step": 192955 }, { "epoch": 2.14, "learning_rate": 1.4390114270528835e-05, "loss": 0.6018, "step": 192960 }, { "epoch": 2.14, "learning_rate": 1.4389191543390321e-05, "loss": 0.6279, "step": 192965 }, { "epoch": 2.14, "learning_rate": 1.438826881625181e-05, "loss": 0.5745, "step": 192970 }, { "epoch": 2.14, "learning_rate": 1.4387346089113297e-05, "loss": 0.5666, "step": 192975 }, { "epoch": 2.14, "learning_rate": 1.4386423361974785e-05, "loss": 0.5778, "step": 192980 }, { "epoch": 2.14, "learning_rate": 1.438550063483627e-05, "loss": 0.6185, "step": 192985 }, { "epoch": 2.14, "learning_rate": 1.438457790769776e-05, "loss": 0.6122, "step": 192990 }, { "epoch": 2.14, "learning_rate": 1.4383655180559246e-05, "loss": 0.604, "step": 192995 }, { "epoch": 2.14, "learning_rate": 1.4382732453420736e-05, "loss": 0.5671, "step": 193000 }, { "epoch": 2.14, "eval_loss": 0.5771419405937195, "eval_runtime": 69.1954, "eval_samples_per_second": 28.904, "eval_steps_per_second": 14.452, "step": 193000 }, { "epoch": 2.14, "learning_rate": 1.4381809726282222e-05, "loss": 0.586, "step": 193005 }, { "epoch": 2.14, "learning_rate": 1.4380886999143712e-05, "loss": 0.5705, "step": 193010 }, { "epoch": 2.14, "learning_rate": 1.4379964272005198e-05, "loss": 0.5993, "step": 193015 }, { "epoch": 2.14, "learning_rate": 1.4379041544866684e-05, "loss": 0.6069, "step": 193020 }, { "epoch": 2.14, "learning_rate": 1.4378118817728173e-05, "loss": 0.626, "step": 193025 }, { "epoch": 2.14, "learning_rate": 1.437719609058966e-05, "loss": 0.6097, "step": 193030 }, { "epoch": 2.14, "learning_rate": 1.4376273363451147e-05, "loss": 0.5713, "step": 193035 }, { "epoch": 2.14, "learning_rate": 1.4375350636312634e-05, "loss": 0.566, "step": 193040 }, { "epoch": 2.14, "learning_rate": 1.4374427909174123e-05, "loss": 0.5718, "step": 193045 }, { "epoch": 2.14, "learning_rate": 1.437350518203561e-05, "loss": 0.5616, "step": 193050 }, { "epoch": 2.14, "learning_rate": 1.4372582454897099e-05, "loss": 0.6477, "step": 193055 }, { "epoch": 2.14, "learning_rate": 1.4371659727758585e-05, "loss": 0.6238, "step": 193060 }, { "epoch": 2.14, "learning_rate": 1.4370737000620075e-05, "loss": 0.6666, "step": 193065 }, { "epoch": 2.14, "learning_rate": 1.436981427348156e-05, "loss": 0.6155, "step": 193070 }, { "epoch": 2.14, "learning_rate": 1.436889154634305e-05, "loss": 0.5755, "step": 193075 }, { "epoch": 2.14, "learning_rate": 1.4367968819204536e-05, "loss": 0.6206, "step": 193080 }, { "epoch": 2.14, "learning_rate": 1.4367046092066024e-05, "loss": 0.5191, "step": 193085 }, { "epoch": 2.14, "learning_rate": 1.436612336492751e-05, "loss": 0.605, "step": 193090 }, { "epoch": 2.14, "learning_rate": 1.4365200637788998e-05, "loss": 0.6565, "step": 193095 }, { "epoch": 2.14, "learning_rate": 1.4364277910650486e-05, "loss": 0.5682, "step": 193100 }, { "epoch": 2.14, "learning_rate": 1.4363355183511972e-05, "loss": 0.5878, "step": 193105 }, { "epoch": 2.14, "learning_rate": 1.4362432456373462e-05, "loss": 0.614, "step": 193110 }, { "epoch": 2.14, "learning_rate": 1.4361509729234948e-05, "loss": 0.6115, "step": 193115 }, { "epoch": 2.14, "learning_rate": 1.4360587002096438e-05, "loss": 0.5778, "step": 193120 }, { "epoch": 2.14, "learning_rate": 1.4359664274957924e-05, "loss": 0.6036, "step": 193125 }, { "epoch": 2.14, "learning_rate": 1.4358741547819413e-05, "loss": 0.6116, "step": 193130 }, { "epoch": 2.14, "learning_rate": 1.43578188206809e-05, "loss": 0.6405, "step": 193135 }, { "epoch": 2.14, "learning_rate": 1.4356896093542387e-05, "loss": 0.6375, "step": 193140 }, { "epoch": 2.14, "learning_rate": 1.4355973366403875e-05, "loss": 0.5684, "step": 193145 }, { "epoch": 2.14, "learning_rate": 1.4355050639265363e-05, "loss": 0.656, "step": 193150 }, { "epoch": 2.14, "learning_rate": 1.4354127912126849e-05, "loss": 0.6221, "step": 193155 }, { "epoch": 2.14, "learning_rate": 1.4353205184988339e-05, "loss": 0.6037, "step": 193160 }, { "epoch": 2.14, "learning_rate": 1.4352282457849825e-05, "loss": 0.6252, "step": 193165 }, { "epoch": 2.14, "learning_rate": 1.4351359730711311e-05, "loss": 0.6449, "step": 193170 }, { "epoch": 2.14, "learning_rate": 1.43504370035728e-05, "loss": 0.6073, "step": 193175 }, { "epoch": 2.14, "learning_rate": 1.4349514276434287e-05, "loss": 0.6352, "step": 193180 }, { "epoch": 2.14, "learning_rate": 1.4348591549295776e-05, "loss": 0.6269, "step": 193185 }, { "epoch": 2.14, "learning_rate": 1.4347668822157262e-05, "loss": 0.6071, "step": 193190 }, { "epoch": 2.14, "learning_rate": 1.434674609501875e-05, "loss": 0.5985, "step": 193195 }, { "epoch": 2.14, "learning_rate": 1.4345823367880238e-05, "loss": 0.5978, "step": 193200 }, { "epoch": 2.14, "learning_rate": 1.4344900640741726e-05, "loss": 0.5767, "step": 193205 }, { "epoch": 2.14, "learning_rate": 1.4343977913603212e-05, "loss": 0.5954, "step": 193210 }, { "epoch": 2.14, "learning_rate": 1.4343055186464702e-05, "loss": 0.5861, "step": 193215 }, { "epoch": 2.14, "learning_rate": 1.4342132459326188e-05, "loss": 0.6312, "step": 193220 }, { "epoch": 2.14, "learning_rate": 1.4341209732187677e-05, "loss": 0.6122, "step": 193225 }, { "epoch": 2.14, "learning_rate": 1.4340287005049163e-05, "loss": 0.6001, "step": 193230 }, { "epoch": 2.14, "learning_rate": 1.4339364277910653e-05, "loss": 0.5967, "step": 193235 }, { "epoch": 2.14, "learning_rate": 1.4338441550772139e-05, "loss": 0.5743, "step": 193240 }, { "epoch": 2.14, "learning_rate": 1.4337518823633625e-05, "loss": 0.5913, "step": 193245 }, { "epoch": 2.14, "learning_rate": 1.4336596096495115e-05, "loss": 0.5926, "step": 193250 }, { "epoch": 2.14, "learning_rate": 1.4335673369356601e-05, "loss": 0.5163, "step": 193255 }, { "epoch": 2.14, "learning_rate": 1.4334750642218089e-05, "loss": 0.5337, "step": 193260 }, { "epoch": 2.14, "learning_rate": 1.4333827915079575e-05, "loss": 0.5507, "step": 193265 }, { "epoch": 2.14, "learning_rate": 1.4332905187941065e-05, "loss": 0.6442, "step": 193270 }, { "epoch": 2.14, "learning_rate": 1.433198246080255e-05, "loss": 0.5505, "step": 193275 }, { "epoch": 2.14, "learning_rate": 1.433105973366404e-05, "loss": 0.6242, "step": 193280 }, { "epoch": 2.14, "learning_rate": 1.4330137006525526e-05, "loss": 0.5933, "step": 193285 }, { "epoch": 2.14, "learning_rate": 1.4329214279387016e-05, "loss": 0.5601, "step": 193290 }, { "epoch": 2.14, "learning_rate": 1.4328291552248502e-05, "loss": 0.6696, "step": 193295 }, { "epoch": 2.14, "learning_rate": 1.4327368825109992e-05, "loss": 0.5751, "step": 193300 }, { "epoch": 2.14, "learning_rate": 1.4326446097971478e-05, "loss": 0.6493, "step": 193305 }, { "epoch": 2.14, "learning_rate": 1.4325523370832966e-05, "loss": 0.6309, "step": 193310 }, { "epoch": 2.14, "learning_rate": 1.4324600643694452e-05, "loss": 0.5921, "step": 193315 }, { "epoch": 2.14, "learning_rate": 1.4323677916555938e-05, "loss": 0.5534, "step": 193320 }, { "epoch": 2.14, "learning_rate": 1.4322755189417427e-05, "loss": 0.5997, "step": 193325 }, { "epoch": 2.14, "learning_rate": 1.4321832462278914e-05, "loss": 0.5801, "step": 193330 }, { "epoch": 2.14, "learning_rate": 1.4320909735140403e-05, "loss": 0.5629, "step": 193335 }, { "epoch": 2.14, "learning_rate": 1.431998700800189e-05, "loss": 0.6346, "step": 193340 }, { "epoch": 2.14, "learning_rate": 1.4319064280863379e-05, "loss": 0.5927, "step": 193345 }, { "epoch": 2.14, "learning_rate": 1.4318141553724865e-05, "loss": 0.6072, "step": 193350 }, { "epoch": 2.14, "learning_rate": 1.4317218826586355e-05, "loss": 0.5762, "step": 193355 }, { "epoch": 2.14, "learning_rate": 1.431629609944784e-05, "loss": 0.6095, "step": 193360 }, { "epoch": 2.14, "learning_rate": 1.4315373372309329e-05, "loss": 0.6105, "step": 193365 }, { "epoch": 2.14, "learning_rate": 1.4314450645170815e-05, "loss": 0.5312, "step": 193370 }, { "epoch": 2.14, "learning_rate": 1.4313527918032304e-05, "loss": 0.6818, "step": 193375 }, { "epoch": 2.14, "learning_rate": 1.431260519089379e-05, "loss": 0.6414, "step": 193380 }, { "epoch": 2.14, "learning_rate": 1.431168246375528e-05, "loss": 0.593, "step": 193385 }, { "epoch": 2.14, "learning_rate": 1.4310759736616766e-05, "loss": 0.6602, "step": 193390 }, { "epoch": 2.14, "learning_rate": 1.4309837009478256e-05, "loss": 0.5965, "step": 193395 }, { "epoch": 2.14, "learning_rate": 1.4308914282339742e-05, "loss": 0.5925, "step": 193400 }, { "epoch": 2.14, "learning_rate": 1.4307991555201228e-05, "loss": 0.5768, "step": 193405 }, { "epoch": 2.14, "learning_rate": 1.4307068828062717e-05, "loss": 0.6365, "step": 193410 }, { "epoch": 2.14, "learning_rate": 1.4306146100924204e-05, "loss": 0.5769, "step": 193415 }, { "epoch": 2.14, "learning_rate": 1.4305223373785691e-05, "loss": 0.621, "step": 193420 }, { "epoch": 2.14, "learning_rate": 1.430430064664718e-05, "loss": 0.547, "step": 193425 }, { "epoch": 2.14, "learning_rate": 1.4303377919508667e-05, "loss": 0.5814, "step": 193430 }, { "epoch": 2.14, "learning_rate": 1.4302455192370153e-05, "loss": 0.5393, "step": 193435 }, { "epoch": 2.14, "learning_rate": 1.4301532465231643e-05, "loss": 0.5545, "step": 193440 }, { "epoch": 2.14, "learning_rate": 1.4300609738093129e-05, "loss": 0.621, "step": 193445 }, { "epoch": 2.14, "learning_rate": 1.4299687010954619e-05, "loss": 0.5705, "step": 193450 }, { "epoch": 2.14, "learning_rate": 1.4298764283816105e-05, "loss": 0.631, "step": 193455 }, { "epoch": 2.14, "learning_rate": 1.4297841556677594e-05, "loss": 0.6192, "step": 193460 }, { "epoch": 2.14, "learning_rate": 1.429691882953908e-05, "loss": 0.5814, "step": 193465 }, { "epoch": 2.14, "learning_rate": 1.4295996102400568e-05, "loss": 0.6324, "step": 193470 }, { "epoch": 2.14, "learning_rate": 1.4295073375262054e-05, "loss": 0.5731, "step": 193475 }, { "epoch": 2.14, "learning_rate": 1.4294150648123542e-05, "loss": 0.6344, "step": 193480 }, { "epoch": 2.14, "learning_rate": 1.429322792098503e-05, "loss": 0.6443, "step": 193485 }, { "epoch": 2.14, "learning_rate": 1.4292305193846516e-05, "loss": 0.634, "step": 193490 }, { "epoch": 2.14, "learning_rate": 1.4291382466708006e-05, "loss": 0.5639, "step": 193495 }, { "epoch": 2.14, "learning_rate": 1.4290459739569492e-05, "loss": 0.5722, "step": 193500 }, { "epoch": 2.14, "learning_rate": 1.4289537012430982e-05, "loss": 0.606, "step": 193505 }, { "epoch": 2.14, "learning_rate": 1.4288614285292468e-05, "loss": 0.5799, "step": 193510 }, { "epoch": 2.14, "learning_rate": 1.4287691558153957e-05, "loss": 0.5754, "step": 193515 }, { "epoch": 2.14, "learning_rate": 1.4286768831015443e-05, "loss": 0.6292, "step": 193520 }, { "epoch": 2.14, "learning_rate": 1.4285846103876931e-05, "loss": 0.6027, "step": 193525 }, { "epoch": 2.14, "learning_rate": 1.4284923376738419e-05, "loss": 0.548, "step": 193530 }, { "epoch": 2.14, "learning_rate": 1.4284000649599907e-05, "loss": 0.5547, "step": 193535 }, { "epoch": 2.14, "learning_rate": 1.4283077922461393e-05, "loss": 0.6133, "step": 193540 }, { "epoch": 2.14, "learning_rate": 1.4282155195322883e-05, "loss": 0.5835, "step": 193545 }, { "epoch": 2.14, "learning_rate": 1.4281232468184369e-05, "loss": 0.6042, "step": 193550 }, { "epoch": 2.14, "learning_rate": 1.4280309741045855e-05, "loss": 0.627, "step": 193555 }, { "epoch": 2.14, "learning_rate": 1.4279387013907344e-05, "loss": 0.6325, "step": 193560 }, { "epoch": 2.14, "learning_rate": 1.427846428676883e-05, "loss": 0.6059, "step": 193565 }, { "epoch": 2.14, "learning_rate": 1.427754155963032e-05, "loss": 0.5885, "step": 193570 }, { "epoch": 2.14, "learning_rate": 1.4276618832491806e-05, "loss": 0.5783, "step": 193575 }, { "epoch": 2.14, "learning_rate": 1.4275696105353296e-05, "loss": 0.6112, "step": 193580 }, { "epoch": 2.14, "learning_rate": 1.4274773378214782e-05, "loss": 0.5789, "step": 193585 }, { "epoch": 2.14, "learning_rate": 1.427385065107627e-05, "loss": 0.6129, "step": 193590 }, { "epoch": 2.14, "learning_rate": 1.4272927923937756e-05, "loss": 0.5629, "step": 193595 }, { "epoch": 2.14, "learning_rate": 1.4272005196799246e-05, "loss": 0.5922, "step": 193600 }, { "epoch": 2.14, "learning_rate": 1.4271082469660732e-05, "loss": 0.6715, "step": 193605 }, { "epoch": 2.14, "learning_rate": 1.4270159742522221e-05, "loss": 0.5863, "step": 193610 }, { "epoch": 2.14, "learning_rate": 1.4269237015383707e-05, "loss": 0.6163, "step": 193615 }, { "epoch": 2.14, "learning_rate": 1.4268314288245197e-05, "loss": 0.5675, "step": 193620 }, { "epoch": 2.14, "learning_rate": 1.4267391561106683e-05, "loss": 0.5757, "step": 193625 }, { "epoch": 2.14, "learning_rate": 1.426646883396817e-05, "loss": 0.5868, "step": 193630 }, { "epoch": 2.14, "learning_rate": 1.4265546106829659e-05, "loss": 0.5871, "step": 193635 }, { "epoch": 2.14, "learning_rate": 1.4264623379691145e-05, "loss": 0.5881, "step": 193640 }, { "epoch": 2.14, "learning_rate": 1.4263700652552633e-05, "loss": 0.6331, "step": 193645 }, { "epoch": 2.14, "learning_rate": 1.4262777925414119e-05, "loss": 0.5247, "step": 193650 }, { "epoch": 2.14, "learning_rate": 1.4261855198275609e-05, "loss": 0.6681, "step": 193655 }, { "epoch": 2.14, "learning_rate": 1.4260932471137095e-05, "loss": 0.6139, "step": 193660 }, { "epoch": 2.14, "learning_rate": 1.4260009743998584e-05, "loss": 0.5774, "step": 193665 }, { "epoch": 2.14, "learning_rate": 1.425908701686007e-05, "loss": 0.6868, "step": 193670 }, { "epoch": 2.14, "learning_rate": 1.425816428972156e-05, "loss": 0.5495, "step": 193675 }, { "epoch": 2.14, "learning_rate": 1.4257241562583046e-05, "loss": 0.5456, "step": 193680 }, { "epoch": 2.14, "learning_rate": 1.4256318835444536e-05, "loss": 0.59, "step": 193685 }, { "epoch": 2.14, "learning_rate": 1.4255396108306022e-05, "loss": 0.6106, "step": 193690 }, { "epoch": 2.14, "learning_rate": 1.425447338116751e-05, "loss": 0.6555, "step": 193695 }, { "epoch": 2.14, "learning_rate": 1.4253550654028996e-05, "loss": 0.6057, "step": 193700 }, { "epoch": 2.14, "learning_rate": 1.4252627926890482e-05, "loss": 0.5736, "step": 193705 }, { "epoch": 2.14, "learning_rate": 1.4251705199751971e-05, "loss": 0.6055, "step": 193710 }, { "epoch": 2.14, "learning_rate": 1.4250782472613458e-05, "loss": 0.6157, "step": 193715 }, { "epoch": 2.15, "learning_rate": 1.4249859745474947e-05, "loss": 0.5361, "step": 193720 }, { "epoch": 2.15, "learning_rate": 1.4248937018336433e-05, "loss": 0.6044, "step": 193725 }, { "epoch": 2.15, "learning_rate": 1.4248014291197923e-05, "loss": 0.6406, "step": 193730 }, { "epoch": 2.15, "learning_rate": 1.4247091564059409e-05, "loss": 0.6326, "step": 193735 }, { "epoch": 2.15, "learning_rate": 1.4246168836920899e-05, "loss": 0.5999, "step": 193740 }, { "epoch": 2.15, "learning_rate": 1.4245246109782385e-05, "loss": 0.6393, "step": 193745 }, { "epoch": 2.15, "learning_rate": 1.4244323382643873e-05, "loss": 0.5932, "step": 193750 }, { "epoch": 2.15, "learning_rate": 1.4243400655505359e-05, "loss": 0.6119, "step": 193755 }, { "epoch": 2.15, "learning_rate": 1.4242477928366848e-05, "loss": 0.6746, "step": 193760 }, { "epoch": 2.15, "learning_rate": 1.4241555201228334e-05, "loss": 0.6172, "step": 193765 }, { "epoch": 2.15, "learning_rate": 1.4240632474089824e-05, "loss": 0.6299, "step": 193770 }, { "epoch": 2.15, "learning_rate": 1.423970974695131e-05, "loss": 0.5871, "step": 193775 }, { "epoch": 2.15, "learning_rate": 1.4238787019812796e-05, "loss": 0.5937, "step": 193780 }, { "epoch": 2.15, "learning_rate": 1.4237864292674286e-05, "loss": 0.5942, "step": 193785 }, { "epoch": 2.15, "learning_rate": 1.4236941565535772e-05, "loss": 0.5833, "step": 193790 }, { "epoch": 2.15, "learning_rate": 1.4236018838397262e-05, "loss": 0.6051, "step": 193795 }, { "epoch": 2.15, "learning_rate": 1.4235096111258748e-05, "loss": 0.6139, "step": 193800 }, { "epoch": 2.15, "learning_rate": 1.4234173384120236e-05, "loss": 0.5999, "step": 193805 }, { "epoch": 2.15, "learning_rate": 1.4233250656981723e-05, "loss": 0.6134, "step": 193810 }, { "epoch": 2.15, "learning_rate": 1.4232327929843211e-05, "loss": 0.6058, "step": 193815 }, { "epoch": 2.15, "learning_rate": 1.4231405202704697e-05, "loss": 0.5956, "step": 193820 }, { "epoch": 2.15, "learning_rate": 1.4230482475566187e-05, "loss": 0.6237, "step": 193825 }, { "epoch": 2.15, "learning_rate": 1.4229559748427673e-05, "loss": 0.5826, "step": 193830 }, { "epoch": 2.15, "learning_rate": 1.4228637021289163e-05, "loss": 0.5937, "step": 193835 }, { "epoch": 2.15, "learning_rate": 1.4227714294150649e-05, "loss": 0.6216, "step": 193840 }, { "epoch": 2.15, "learning_rate": 1.4226791567012138e-05, "loss": 0.6581, "step": 193845 }, { "epoch": 2.15, "learning_rate": 1.4225868839873624e-05, "loss": 0.611, "step": 193850 }, { "epoch": 2.15, "learning_rate": 1.422494611273511e-05, "loss": 0.5982, "step": 193855 }, { "epoch": 2.15, "learning_rate": 1.4224023385596598e-05, "loss": 0.613, "step": 193860 }, { "epoch": 2.15, "learning_rate": 1.4223100658458086e-05, "loss": 0.6026, "step": 193865 }, { "epoch": 2.15, "learning_rate": 1.4222177931319574e-05, "loss": 0.5936, "step": 193870 }, { "epoch": 2.15, "learning_rate": 1.422125520418106e-05, "loss": 0.61, "step": 193875 }, { "epoch": 2.15, "learning_rate": 1.422033247704255e-05, "loss": 0.5695, "step": 193880 }, { "epoch": 2.15, "learning_rate": 1.4219409749904036e-05, "loss": 0.5665, "step": 193885 }, { "epoch": 2.15, "learning_rate": 1.4218487022765526e-05, "loss": 0.579, "step": 193890 }, { "epoch": 2.15, "learning_rate": 1.4217564295627012e-05, "loss": 0.6402, "step": 193895 }, { "epoch": 2.15, "learning_rate": 1.4216641568488501e-05, "loss": 0.5777, "step": 193900 }, { "epoch": 2.15, "learning_rate": 1.4215718841349987e-05, "loss": 0.6354, "step": 193905 }, { "epoch": 2.15, "learning_rate": 1.4214796114211475e-05, "loss": 0.6586, "step": 193910 }, { "epoch": 2.15, "learning_rate": 1.4213873387072963e-05, "loss": 0.5593, "step": 193915 }, { "epoch": 2.15, "learning_rate": 1.4212950659934451e-05, "loss": 0.5755, "step": 193920 }, { "epoch": 2.15, "learning_rate": 1.4212027932795937e-05, "loss": 0.6076, "step": 193925 }, { "epoch": 2.15, "learning_rate": 1.4211105205657423e-05, "loss": 0.5439, "step": 193930 }, { "epoch": 2.15, "learning_rate": 1.4210182478518913e-05, "loss": 0.6287, "step": 193935 }, { "epoch": 2.15, "learning_rate": 1.4209259751380399e-05, "loss": 0.6058, "step": 193940 }, { "epoch": 2.15, "learning_rate": 1.4208337024241888e-05, "loss": 0.5713, "step": 193945 }, { "epoch": 2.15, "learning_rate": 1.4207414297103375e-05, "loss": 0.5854, "step": 193950 }, { "epoch": 2.15, "learning_rate": 1.4206491569964864e-05, "loss": 0.6075, "step": 193955 }, { "epoch": 2.15, "learning_rate": 1.420556884282635e-05, "loss": 0.5797, "step": 193960 }, { "epoch": 2.15, "learning_rate": 1.420464611568784e-05, "loss": 0.6196, "step": 193965 }, { "epoch": 2.15, "learning_rate": 1.4203723388549326e-05, "loss": 0.5877, "step": 193970 }, { "epoch": 2.15, "learning_rate": 1.4202800661410814e-05, "loss": 0.5296, "step": 193975 }, { "epoch": 2.15, "learning_rate": 1.42018779342723e-05, "loss": 0.6222, "step": 193980 }, { "epoch": 2.15, "learning_rate": 1.420095520713379e-05, "loss": 0.6153, "step": 193985 }, { "epoch": 2.15, "learning_rate": 1.4200032479995276e-05, "loss": 0.5961, "step": 193990 }, { "epoch": 2.15, "learning_rate": 1.4199109752856765e-05, "loss": 0.5808, "step": 193995 }, { "epoch": 2.15, "learning_rate": 1.4198187025718251e-05, "loss": 0.6051, "step": 194000 }, { "epoch": 2.15, "eval_loss": 0.5972157120704651, "eval_runtime": 69.2227, "eval_samples_per_second": 28.892, "eval_steps_per_second": 14.446, "step": 194000 }, { "epoch": 2.15, "learning_rate": 1.4197264298579738e-05, "loss": 0.5925, "step": 194005 }, { "epoch": 2.15, "learning_rate": 1.4196341571441227e-05, "loss": 0.566, "step": 194010 }, { "epoch": 2.15, "learning_rate": 1.4195418844302713e-05, "loss": 0.5751, "step": 194015 }, { "epoch": 2.15, "learning_rate": 1.4194496117164203e-05, "loss": 0.5752, "step": 194020 }, { "epoch": 2.15, "learning_rate": 1.4193573390025689e-05, "loss": 0.6667, "step": 194025 }, { "epoch": 2.15, "learning_rate": 1.4192650662887177e-05, "loss": 0.5828, "step": 194030 }, { "epoch": 2.15, "learning_rate": 1.4191727935748663e-05, "loss": 0.5735, "step": 194035 }, { "epoch": 2.15, "learning_rate": 1.4190805208610153e-05, "loss": 0.632, "step": 194040 }, { "epoch": 2.15, "learning_rate": 1.4189882481471639e-05, "loss": 0.5705, "step": 194045 }, { "epoch": 2.15, "learning_rate": 1.4188959754333128e-05, "loss": 0.6108, "step": 194050 }, { "epoch": 2.15, "learning_rate": 1.4188037027194614e-05, "loss": 0.5874, "step": 194055 }, { "epoch": 2.15, "learning_rate": 1.4187114300056104e-05, "loss": 0.5859, "step": 194060 }, { "epoch": 2.15, "learning_rate": 1.418619157291759e-05, "loss": 0.5001, "step": 194065 }, { "epoch": 2.15, "learning_rate": 1.418526884577908e-05, "loss": 0.6211, "step": 194070 }, { "epoch": 2.15, "learning_rate": 1.4184346118640566e-05, "loss": 0.5818, "step": 194075 }, { "epoch": 2.15, "learning_rate": 1.4183423391502052e-05, "loss": 0.5989, "step": 194080 }, { "epoch": 2.15, "learning_rate": 1.418250066436354e-05, "loss": 0.5744, "step": 194085 }, { "epoch": 2.15, "learning_rate": 1.4181577937225028e-05, "loss": 0.5799, "step": 194090 }, { "epoch": 2.15, "learning_rate": 1.4180655210086515e-05, "loss": 0.5846, "step": 194095 }, { "epoch": 2.15, "learning_rate": 1.4179732482948002e-05, "loss": 0.578, "step": 194100 }, { "epoch": 2.15, "learning_rate": 1.4178809755809491e-05, "loss": 0.623, "step": 194105 }, { "epoch": 2.15, "learning_rate": 1.4177887028670977e-05, "loss": 0.612, "step": 194110 }, { "epoch": 2.15, "learning_rate": 1.4176964301532467e-05, "loss": 0.5769, "step": 194115 }, { "epoch": 2.15, "learning_rate": 1.4176041574393953e-05, "loss": 0.5634, "step": 194120 }, { "epoch": 2.15, "learning_rate": 1.4175118847255443e-05, "loss": 0.5751, "step": 194125 }, { "epoch": 2.15, "learning_rate": 1.4174196120116929e-05, "loss": 0.5559, "step": 194130 }, { "epoch": 2.15, "learning_rate": 1.4173273392978417e-05, "loss": 0.6109, "step": 194135 }, { "epoch": 2.15, "learning_rate": 1.4172350665839903e-05, "loss": 0.5878, "step": 194140 }, { "epoch": 2.15, "learning_rate": 1.4171427938701392e-05, "loss": 0.5858, "step": 194145 }, { "epoch": 2.15, "learning_rate": 1.4170505211562878e-05, "loss": 0.5998, "step": 194150 }, { "epoch": 2.15, "learning_rate": 1.4169582484424365e-05, "loss": 0.5978, "step": 194155 }, { "epoch": 2.15, "learning_rate": 1.4168659757285854e-05, "loss": 0.5627, "step": 194160 }, { "epoch": 2.15, "learning_rate": 1.416773703014734e-05, "loss": 0.5909, "step": 194165 }, { "epoch": 2.15, "learning_rate": 1.416681430300883e-05, "loss": 0.5282, "step": 194170 }, { "epoch": 2.15, "learning_rate": 1.4165891575870316e-05, "loss": 0.6157, "step": 194175 }, { "epoch": 2.15, "learning_rate": 1.4164968848731806e-05, "loss": 0.6037, "step": 194180 }, { "epoch": 2.15, "learning_rate": 1.4164046121593292e-05, "loss": 0.5594, "step": 194185 }, { "epoch": 2.15, "learning_rate": 1.416312339445478e-05, "loss": 0.5445, "step": 194190 }, { "epoch": 2.15, "learning_rate": 1.4162200667316267e-05, "loss": 0.5683, "step": 194195 }, { "epoch": 2.15, "learning_rate": 1.4161277940177755e-05, "loss": 0.5571, "step": 194200 }, { "epoch": 2.15, "learning_rate": 1.4160355213039241e-05, "loss": 0.5879, "step": 194205 }, { "epoch": 2.15, "learning_rate": 1.4159432485900731e-05, "loss": 0.5956, "step": 194210 }, { "epoch": 2.15, "learning_rate": 1.4158509758762217e-05, "loss": 0.5468, "step": 194215 }, { "epoch": 2.15, "learning_rate": 1.4157587031623707e-05, "loss": 0.5878, "step": 194220 }, { "epoch": 2.15, "learning_rate": 1.4156664304485193e-05, "loss": 0.5925, "step": 194225 }, { "epoch": 2.15, "learning_rate": 1.4155741577346682e-05, "loss": 0.6506, "step": 194230 }, { "epoch": 2.15, "learning_rate": 1.4154818850208168e-05, "loss": 0.6093, "step": 194235 }, { "epoch": 2.15, "learning_rate": 1.4153896123069655e-05, "loss": 0.5555, "step": 194240 }, { "epoch": 2.15, "learning_rate": 1.4152973395931144e-05, "loss": 0.5974, "step": 194245 }, { "epoch": 2.15, "learning_rate": 1.415205066879263e-05, "loss": 0.5819, "step": 194250 }, { "epoch": 2.15, "learning_rate": 1.4151127941654118e-05, "loss": 0.6399, "step": 194255 }, { "epoch": 2.15, "learning_rate": 1.4150205214515604e-05, "loss": 0.6293, "step": 194260 }, { "epoch": 2.15, "learning_rate": 1.4149282487377094e-05, "loss": 0.5773, "step": 194265 }, { "epoch": 2.15, "learning_rate": 1.414835976023858e-05, "loss": 0.6255, "step": 194270 }, { "epoch": 2.15, "learning_rate": 1.414743703310007e-05, "loss": 0.5731, "step": 194275 }, { "epoch": 2.15, "learning_rate": 1.4146514305961556e-05, "loss": 0.6047, "step": 194280 }, { "epoch": 2.15, "learning_rate": 1.4145591578823045e-05, "loss": 0.5863, "step": 194285 }, { "epoch": 2.15, "learning_rate": 1.4144668851684531e-05, "loss": 0.5757, "step": 194290 }, { "epoch": 2.15, "learning_rate": 1.414374612454602e-05, "loss": 0.6355, "step": 194295 }, { "epoch": 2.15, "learning_rate": 1.4142823397407507e-05, "loss": 0.6212, "step": 194300 }, { "epoch": 2.15, "learning_rate": 1.4141900670268995e-05, "loss": 0.5839, "step": 194305 }, { "epoch": 2.15, "learning_rate": 1.4140977943130481e-05, "loss": 0.6058, "step": 194310 }, { "epoch": 2.15, "learning_rate": 1.4140055215991967e-05, "loss": 0.5942, "step": 194315 }, { "epoch": 2.15, "learning_rate": 1.4139132488853457e-05, "loss": 0.5631, "step": 194320 }, { "epoch": 2.15, "learning_rate": 1.4138209761714943e-05, "loss": 0.5921, "step": 194325 }, { "epoch": 2.15, "learning_rate": 1.4137287034576433e-05, "loss": 0.6411, "step": 194330 }, { "epoch": 2.15, "learning_rate": 1.4136364307437919e-05, "loss": 0.6044, "step": 194335 }, { "epoch": 2.15, "learning_rate": 1.4135441580299408e-05, "loss": 0.58, "step": 194340 }, { "epoch": 2.15, "learning_rate": 1.4134518853160894e-05, "loss": 0.5703, "step": 194345 }, { "epoch": 2.15, "learning_rate": 1.4133596126022384e-05, "loss": 0.6122, "step": 194350 }, { "epoch": 2.15, "learning_rate": 1.413267339888387e-05, "loss": 0.5589, "step": 194355 }, { "epoch": 2.15, "learning_rate": 1.4131750671745358e-05, "loss": 0.5562, "step": 194360 }, { "epoch": 2.15, "learning_rate": 1.4130827944606844e-05, "loss": 0.6214, "step": 194365 }, { "epoch": 2.15, "learning_rate": 1.4129905217468334e-05, "loss": 0.6307, "step": 194370 }, { "epoch": 2.15, "learning_rate": 1.412898249032982e-05, "loss": 0.628, "step": 194375 }, { "epoch": 2.15, "learning_rate": 1.412805976319131e-05, "loss": 0.5588, "step": 194380 }, { "epoch": 2.15, "learning_rate": 1.4127137036052795e-05, "loss": 0.6152, "step": 194385 }, { "epoch": 2.15, "learning_rate": 1.4126214308914282e-05, "loss": 0.5954, "step": 194390 }, { "epoch": 2.15, "learning_rate": 1.4125291581775771e-05, "loss": 0.5702, "step": 194395 }, { "epoch": 2.15, "learning_rate": 1.4124368854637257e-05, "loss": 0.5797, "step": 194400 }, { "epoch": 2.15, "learning_rate": 1.4123446127498747e-05, "loss": 0.63, "step": 194405 }, { "epoch": 2.15, "learning_rate": 1.4122523400360233e-05, "loss": 0.6357, "step": 194410 }, { "epoch": 2.15, "learning_rate": 1.4121600673221721e-05, "loss": 0.602, "step": 194415 }, { "epoch": 2.15, "learning_rate": 1.4120677946083207e-05, "loss": 0.6011, "step": 194420 }, { "epoch": 2.15, "learning_rate": 1.4119755218944697e-05, "loss": 0.591, "step": 194425 }, { "epoch": 2.15, "learning_rate": 1.4118832491806183e-05, "loss": 0.5909, "step": 194430 }, { "epoch": 2.15, "learning_rate": 1.4117909764667672e-05, "loss": 0.5433, "step": 194435 }, { "epoch": 2.15, "learning_rate": 1.4116987037529158e-05, "loss": 0.5584, "step": 194440 }, { "epoch": 2.15, "learning_rate": 1.4116064310390648e-05, "loss": 0.6052, "step": 194445 }, { "epoch": 2.15, "learning_rate": 1.4115141583252134e-05, "loss": 0.6123, "step": 194450 }, { "epoch": 2.15, "learning_rate": 1.4114218856113624e-05, "loss": 0.5737, "step": 194455 }, { "epoch": 2.15, "learning_rate": 1.411329612897511e-05, "loss": 0.6019, "step": 194460 }, { "epoch": 2.15, "learning_rate": 1.4112373401836596e-05, "loss": 0.6452, "step": 194465 }, { "epoch": 2.15, "learning_rate": 1.4111450674698084e-05, "loss": 0.6165, "step": 194470 }, { "epoch": 2.15, "learning_rate": 1.4110527947559572e-05, "loss": 0.658, "step": 194475 }, { "epoch": 2.15, "learning_rate": 1.410960522042106e-05, "loss": 0.6202, "step": 194480 }, { "epoch": 2.15, "learning_rate": 1.4108682493282546e-05, "loss": 0.6031, "step": 194485 }, { "epoch": 2.15, "learning_rate": 1.4107759766144035e-05, "loss": 0.5862, "step": 194490 }, { "epoch": 2.15, "learning_rate": 1.4106837039005521e-05, "loss": 0.5464, "step": 194495 }, { "epoch": 2.15, "learning_rate": 1.4105914311867011e-05, "loss": 0.5958, "step": 194500 }, { "epoch": 2.15, "learning_rate": 1.4104991584728497e-05, "loss": 0.5555, "step": 194505 }, { "epoch": 2.15, "learning_rate": 1.4104068857589987e-05, "loss": 0.6419, "step": 194510 }, { "epoch": 2.15, "learning_rate": 1.4103146130451473e-05, "loss": 0.6185, "step": 194515 }, { "epoch": 2.15, "learning_rate": 1.410222340331296e-05, "loss": 0.6362, "step": 194520 }, { "epoch": 2.15, "learning_rate": 1.4101300676174448e-05, "loss": 0.569, "step": 194525 }, { "epoch": 2.15, "learning_rate": 1.4100377949035936e-05, "loss": 0.6123, "step": 194530 }, { "epoch": 2.15, "learning_rate": 1.4099455221897422e-05, "loss": 0.6121, "step": 194535 }, { "epoch": 2.15, "learning_rate": 1.4098532494758909e-05, "loss": 0.6019, "step": 194540 }, { "epoch": 2.15, "learning_rate": 1.4097609767620398e-05, "loss": 0.6376, "step": 194545 }, { "epoch": 2.15, "learning_rate": 1.4096687040481884e-05, "loss": 0.5922, "step": 194550 }, { "epoch": 2.15, "learning_rate": 1.4095764313343374e-05, "loss": 0.5973, "step": 194555 }, { "epoch": 2.15, "learning_rate": 1.409484158620486e-05, "loss": 0.5873, "step": 194560 }, { "epoch": 2.15, "learning_rate": 1.409391885906635e-05, "loss": 0.5875, "step": 194565 }, { "epoch": 2.15, "learning_rate": 1.4092996131927836e-05, "loss": 0.6096, "step": 194570 }, { "epoch": 2.15, "learning_rate": 1.4092073404789324e-05, "loss": 0.575, "step": 194575 }, { "epoch": 2.15, "learning_rate": 1.4091150677650811e-05, "loss": 0.5739, "step": 194580 }, { "epoch": 2.15, "learning_rate": 1.40902279505123e-05, "loss": 0.6193, "step": 194585 }, { "epoch": 2.15, "learning_rate": 1.4089305223373785e-05, "loss": 0.59, "step": 194590 }, { "epoch": 2.15, "learning_rate": 1.4088382496235275e-05, "loss": 0.6571, "step": 194595 }, { "epoch": 2.15, "learning_rate": 1.4087459769096761e-05, "loss": 0.5439, "step": 194600 }, { "epoch": 2.15, "learning_rate": 1.408653704195825e-05, "loss": 0.6173, "step": 194605 }, { "epoch": 2.15, "learning_rate": 1.4085614314819737e-05, "loss": 0.571, "step": 194610 }, { "epoch": 2.15, "learning_rate": 1.4084691587681223e-05, "loss": 0.6063, "step": 194615 }, { "epoch": 2.15, "learning_rate": 1.4083768860542712e-05, "loss": 0.6157, "step": 194620 }, { "epoch": 2.16, "learning_rate": 1.4082846133404199e-05, "loss": 0.5621, "step": 194625 }, { "epoch": 2.16, "learning_rate": 1.4081923406265688e-05, "loss": 0.5506, "step": 194630 }, { "epoch": 2.16, "learning_rate": 1.4081000679127174e-05, "loss": 0.5629, "step": 194635 }, { "epoch": 2.16, "learning_rate": 1.4080077951988662e-05, "loss": 0.6525, "step": 194640 }, { "epoch": 2.16, "learning_rate": 1.4079155224850148e-05, "loss": 0.6436, "step": 194645 }, { "epoch": 2.16, "learning_rate": 1.4078232497711638e-05, "loss": 0.5956, "step": 194650 }, { "epoch": 2.16, "learning_rate": 1.4077309770573124e-05, "loss": 0.5796, "step": 194655 }, { "epoch": 2.16, "learning_rate": 1.4076387043434614e-05, "loss": 0.6183, "step": 194660 }, { "epoch": 2.16, "learning_rate": 1.40754643162961e-05, "loss": 0.5807, "step": 194665 }, { "epoch": 2.16, "learning_rate": 1.407454158915759e-05, "loss": 0.608, "step": 194670 }, { "epoch": 2.16, "learning_rate": 1.4073618862019075e-05, "loss": 0.618, "step": 194675 }, { "epoch": 2.16, "learning_rate": 1.4072696134880565e-05, "loss": 0.6964, "step": 194680 }, { "epoch": 2.16, "learning_rate": 1.4071773407742051e-05, "loss": 0.659, "step": 194685 }, { "epoch": 2.16, "learning_rate": 1.4070850680603537e-05, "loss": 0.5835, "step": 194690 }, { "epoch": 2.16, "learning_rate": 1.4069927953465025e-05, "loss": 0.5534, "step": 194695 }, { "epoch": 2.16, "learning_rate": 1.4069005226326511e-05, "loss": 0.5798, "step": 194700 }, { "epoch": 2.16, "learning_rate": 1.4068082499188e-05, "loss": 0.5783, "step": 194705 }, { "epoch": 2.16, "learning_rate": 1.4067159772049487e-05, "loss": 0.5852, "step": 194710 }, { "epoch": 2.16, "learning_rate": 1.4066237044910977e-05, "loss": 0.5883, "step": 194715 }, { "epoch": 2.16, "learning_rate": 1.4065314317772463e-05, "loss": 0.6369, "step": 194720 }, { "epoch": 2.16, "learning_rate": 1.4064391590633952e-05, "loss": 0.5665, "step": 194725 }, { "epoch": 2.16, "learning_rate": 1.4063468863495438e-05, "loss": 0.6208, "step": 194730 }, { "epoch": 2.16, "learning_rate": 1.4062546136356928e-05, "loss": 0.6047, "step": 194735 }, { "epoch": 2.16, "learning_rate": 1.4061623409218414e-05, "loss": 0.5931, "step": 194740 }, { "epoch": 2.16, "learning_rate": 1.4060700682079902e-05, "loss": 0.5631, "step": 194745 }, { "epoch": 2.16, "learning_rate": 1.4059777954941388e-05, "loss": 0.5475, "step": 194750 }, { "epoch": 2.16, "learning_rate": 1.4058855227802878e-05, "loss": 0.6222, "step": 194755 }, { "epoch": 2.16, "learning_rate": 1.4057932500664364e-05, "loss": 0.5982, "step": 194760 }, { "epoch": 2.16, "learning_rate": 1.405700977352585e-05, "loss": 0.6107, "step": 194765 }, { "epoch": 2.16, "learning_rate": 1.405608704638734e-05, "loss": 0.603, "step": 194770 }, { "epoch": 2.16, "learning_rate": 1.4055164319248826e-05, "loss": 0.5513, "step": 194775 }, { "epoch": 2.16, "learning_rate": 1.4054241592110315e-05, "loss": 0.6037, "step": 194780 }, { "epoch": 2.16, "learning_rate": 1.4053318864971801e-05, "loss": 0.6153, "step": 194785 }, { "epoch": 2.16, "learning_rate": 1.4052396137833291e-05, "loss": 0.6045, "step": 194790 }, { "epoch": 2.16, "learning_rate": 1.4051473410694777e-05, "loss": 0.5943, "step": 194795 }, { "epoch": 2.16, "learning_rate": 1.4050550683556265e-05, "loss": 0.6189, "step": 194800 }, { "epoch": 2.16, "learning_rate": 1.4049627956417751e-05, "loss": 0.583, "step": 194805 }, { "epoch": 2.16, "learning_rate": 1.404870522927924e-05, "loss": 0.6008, "step": 194810 }, { "epoch": 2.16, "learning_rate": 1.4047782502140727e-05, "loss": 0.6012, "step": 194815 }, { "epoch": 2.16, "learning_rate": 1.4046859775002216e-05, "loss": 0.605, "step": 194820 }, { "epoch": 2.16, "learning_rate": 1.4045937047863702e-05, "loss": 0.6002, "step": 194825 }, { "epoch": 2.16, "learning_rate": 1.4045014320725192e-05, "loss": 0.5961, "step": 194830 }, { "epoch": 2.16, "learning_rate": 1.4044091593586678e-05, "loss": 0.5991, "step": 194835 }, { "epoch": 2.16, "learning_rate": 1.4043168866448164e-05, "loss": 0.5705, "step": 194840 }, { "epoch": 2.16, "learning_rate": 1.4042246139309654e-05, "loss": 0.5572, "step": 194845 }, { "epoch": 2.16, "learning_rate": 1.404132341217114e-05, "loss": 0.6107, "step": 194850 }, { "epoch": 2.16, "learning_rate": 1.4040400685032628e-05, "loss": 0.5941, "step": 194855 }, { "epoch": 2.16, "learning_rate": 1.4039477957894116e-05, "loss": 0.6231, "step": 194860 }, { "epoch": 2.16, "learning_rate": 1.4038555230755604e-05, "loss": 0.5764, "step": 194865 }, { "epoch": 2.16, "learning_rate": 1.403763250361709e-05, "loss": 0.5771, "step": 194870 }, { "epoch": 2.16, "learning_rate": 1.403670977647858e-05, "loss": 0.6085, "step": 194875 }, { "epoch": 2.16, "learning_rate": 1.4035787049340065e-05, "loss": 0.5736, "step": 194880 }, { "epoch": 2.16, "learning_rate": 1.4034864322201555e-05, "loss": 0.6421, "step": 194885 }, { "epoch": 2.16, "learning_rate": 1.4033941595063041e-05, "loss": 0.5415, "step": 194890 }, { "epoch": 2.16, "learning_rate": 1.403301886792453e-05, "loss": 0.6171, "step": 194895 }, { "epoch": 2.16, "learning_rate": 1.4032096140786017e-05, "loss": 0.6025, "step": 194900 }, { "epoch": 2.16, "learning_rate": 1.4031173413647505e-05, "loss": 0.6007, "step": 194905 }, { "epoch": 2.16, "learning_rate": 1.4030250686508992e-05, "loss": 0.5755, "step": 194910 }, { "epoch": 2.16, "learning_rate": 1.4029327959370479e-05, "loss": 0.5609, "step": 194915 }, { "epoch": 2.16, "learning_rate": 1.4028405232231966e-05, "loss": 0.6022, "step": 194920 }, { "epoch": 2.16, "learning_rate": 1.4027482505093453e-05, "loss": 0.6034, "step": 194925 }, { "epoch": 2.16, "learning_rate": 1.4026559777954942e-05, "loss": 0.5938, "step": 194930 }, { "epoch": 2.16, "learning_rate": 1.4025637050816428e-05, "loss": 0.5639, "step": 194935 }, { "epoch": 2.16, "learning_rate": 1.4024714323677918e-05, "loss": 0.6027, "step": 194940 }, { "epoch": 2.16, "learning_rate": 1.4023791596539404e-05, "loss": 0.5908, "step": 194945 }, { "epoch": 2.16, "learning_rate": 1.4022868869400894e-05, "loss": 0.6125, "step": 194950 }, { "epoch": 2.16, "learning_rate": 1.402194614226238e-05, "loss": 0.6159, "step": 194955 }, { "epoch": 2.16, "learning_rate": 1.4021023415123868e-05, "loss": 0.5939, "step": 194960 }, { "epoch": 2.16, "learning_rate": 1.4020100687985355e-05, "loss": 0.609, "step": 194965 }, { "epoch": 2.16, "learning_rate": 1.4019177960846843e-05, "loss": 0.6366, "step": 194970 }, { "epoch": 2.16, "learning_rate": 1.401825523370833e-05, "loss": 0.5957, "step": 194975 }, { "epoch": 2.16, "learning_rate": 1.4017332506569819e-05, "loss": 0.5372, "step": 194980 }, { "epoch": 2.16, "learning_rate": 1.4016409779431305e-05, "loss": 0.5652, "step": 194985 }, { "epoch": 2.16, "learning_rate": 1.4015487052292791e-05, "loss": 0.6101, "step": 194990 }, { "epoch": 2.16, "learning_rate": 1.401456432515428e-05, "loss": 0.633, "step": 194995 }, { "epoch": 2.16, "learning_rate": 1.4013641598015767e-05, "loss": 0.5852, "step": 195000 }, { "epoch": 2.16, "eval_loss": 0.5917216539382935, "eval_runtime": 69.143, "eval_samples_per_second": 28.926, "eval_steps_per_second": 14.463, "step": 195000 }, { "epoch": 2.16, "learning_rate": 1.4012718870877257e-05, "loss": 0.6233, "step": 195005 }, { "epoch": 2.16, "learning_rate": 1.4011796143738743e-05, "loss": 0.6689, "step": 195010 }, { "epoch": 2.16, "learning_rate": 1.4010873416600232e-05, "loss": 0.6147, "step": 195015 }, { "epoch": 2.16, "learning_rate": 1.4009950689461718e-05, "loss": 0.5863, "step": 195020 }, { "epoch": 2.16, "learning_rate": 1.4009027962323206e-05, "loss": 0.6254, "step": 195025 }, { "epoch": 2.16, "learning_rate": 1.4008105235184692e-05, "loss": 0.6186, "step": 195030 }, { "epoch": 2.16, "learning_rate": 1.4007182508046182e-05, "loss": 0.6385, "step": 195035 }, { "epoch": 2.16, "learning_rate": 1.4006259780907668e-05, "loss": 0.5885, "step": 195040 }, { "epoch": 2.16, "learning_rate": 1.4005337053769158e-05, "loss": 0.5916, "step": 195045 }, { "epoch": 2.16, "learning_rate": 1.4004414326630644e-05, "loss": 0.6011, "step": 195050 }, { "epoch": 2.16, "learning_rate": 1.4003491599492133e-05, "loss": 0.6261, "step": 195055 }, { "epoch": 2.16, "learning_rate": 1.400256887235362e-05, "loss": 0.6015, "step": 195060 }, { "epoch": 2.16, "learning_rate": 1.4001646145215109e-05, "loss": 0.6289, "step": 195065 }, { "epoch": 2.16, "learning_rate": 1.4000723418076595e-05, "loss": 0.6094, "step": 195070 }, { "epoch": 2.16, "learning_rate": 1.3999800690938081e-05, "loss": 0.6127, "step": 195075 }, { "epoch": 2.16, "learning_rate": 1.399887796379957e-05, "loss": 0.6256, "step": 195080 }, { "epoch": 2.16, "learning_rate": 1.3997955236661055e-05, "loss": 0.5719, "step": 195085 }, { "epoch": 2.16, "learning_rate": 1.3997032509522545e-05, "loss": 0.6156, "step": 195090 }, { "epoch": 2.16, "learning_rate": 1.3996109782384031e-05, "loss": 0.6132, "step": 195095 }, { "epoch": 2.16, "learning_rate": 1.399518705524552e-05, "loss": 0.5895, "step": 195100 }, { "epoch": 2.16, "learning_rate": 1.3994264328107007e-05, "loss": 0.6157, "step": 195105 }, { "epoch": 2.16, "learning_rate": 1.3993341600968496e-05, "loss": 0.6454, "step": 195110 }, { "epoch": 2.16, "learning_rate": 1.3992418873829982e-05, "loss": 0.5985, "step": 195115 }, { "epoch": 2.16, "learning_rate": 1.3991496146691472e-05, "loss": 0.6392, "step": 195120 }, { "epoch": 2.16, "learning_rate": 1.3990573419552958e-05, "loss": 0.5994, "step": 195125 }, { "epoch": 2.16, "learning_rate": 1.3989650692414446e-05, "loss": 0.5922, "step": 195130 }, { "epoch": 2.16, "learning_rate": 1.3988727965275932e-05, "loss": 0.5858, "step": 195135 }, { "epoch": 2.16, "learning_rate": 1.3987805238137422e-05, "loss": 0.5626, "step": 195140 }, { "epoch": 2.16, "learning_rate": 1.3986882510998908e-05, "loss": 0.6109, "step": 195145 }, { "epoch": 2.16, "learning_rate": 1.3985959783860394e-05, "loss": 0.5287, "step": 195150 }, { "epoch": 2.16, "learning_rate": 1.3985037056721884e-05, "loss": 0.599, "step": 195155 }, { "epoch": 2.16, "learning_rate": 1.398411432958337e-05, "loss": 0.6153, "step": 195160 }, { "epoch": 2.16, "learning_rate": 1.398319160244486e-05, "loss": 0.6395, "step": 195165 }, { "epoch": 2.16, "learning_rate": 1.3982268875306345e-05, "loss": 0.6017, "step": 195170 }, { "epoch": 2.16, "learning_rate": 1.3981346148167835e-05, "loss": 0.5574, "step": 195175 }, { "epoch": 2.16, "learning_rate": 1.3980423421029321e-05, "loss": 0.5264, "step": 195180 }, { "epoch": 2.16, "learning_rate": 1.3979500693890809e-05, "loss": 0.6103, "step": 195185 }, { "epoch": 2.16, "learning_rate": 1.3978577966752297e-05, "loss": 0.6245, "step": 195190 }, { "epoch": 2.16, "learning_rate": 1.3977655239613785e-05, "loss": 0.5533, "step": 195195 }, { "epoch": 2.16, "learning_rate": 1.397673251247527e-05, "loss": 0.5986, "step": 195200 }, { "epoch": 2.16, "learning_rate": 1.397580978533676e-05, "loss": 0.6036, "step": 195205 }, { "epoch": 2.16, "learning_rate": 1.3974887058198246e-05, "loss": 0.5954, "step": 195210 }, { "epoch": 2.16, "learning_rate": 1.3973964331059736e-05, "loss": 0.6155, "step": 195215 }, { "epoch": 2.16, "learning_rate": 1.3973041603921222e-05, "loss": 0.6253, "step": 195220 }, { "epoch": 2.16, "learning_rate": 1.3972118876782708e-05, "loss": 0.6219, "step": 195225 }, { "epoch": 2.16, "learning_rate": 1.3971196149644198e-05, "loss": 0.6019, "step": 195230 }, { "epoch": 2.16, "learning_rate": 1.3970273422505684e-05, "loss": 0.5826, "step": 195235 }, { "epoch": 2.16, "learning_rate": 1.3969350695367172e-05, "loss": 0.5502, "step": 195240 }, { "epoch": 2.16, "learning_rate": 1.396842796822866e-05, "loss": 0.6494, "step": 195245 }, { "epoch": 2.16, "learning_rate": 1.3967505241090148e-05, "loss": 0.6285, "step": 195250 }, { "epoch": 2.16, "learning_rate": 1.3966582513951634e-05, "loss": 0.607, "step": 195255 }, { "epoch": 2.16, "learning_rate": 1.3965659786813123e-05, "loss": 0.5989, "step": 195260 }, { "epoch": 2.16, "learning_rate": 1.396473705967461e-05, "loss": 0.5567, "step": 195265 }, { "epoch": 2.16, "learning_rate": 1.3963814332536099e-05, "loss": 0.5973, "step": 195270 }, { "epoch": 2.16, "learning_rate": 1.3962891605397585e-05, "loss": 0.6154, "step": 195275 }, { "epoch": 2.16, "learning_rate": 1.3961968878259075e-05, "loss": 0.5682, "step": 195280 }, { "epoch": 2.16, "learning_rate": 1.396104615112056e-05, "loss": 0.5481, "step": 195285 }, { "epoch": 2.16, "learning_rate": 1.3960123423982049e-05, "loss": 0.5952, "step": 195290 }, { "epoch": 2.16, "learning_rate": 1.3959200696843536e-05, "loss": 0.6683, "step": 195295 }, { "epoch": 2.16, "learning_rate": 1.3958277969705023e-05, "loss": 0.6106, "step": 195300 }, { "epoch": 2.16, "learning_rate": 1.395735524256651e-05, "loss": 0.5235, "step": 195305 }, { "epoch": 2.16, "learning_rate": 1.3956432515427997e-05, "loss": 0.6133, "step": 195310 }, { "epoch": 2.16, "learning_rate": 1.3955509788289486e-05, "loss": 0.544, "step": 195315 }, { "epoch": 2.16, "learning_rate": 1.3954587061150972e-05, "loss": 0.6016, "step": 195320 }, { "epoch": 2.16, "learning_rate": 1.3953664334012462e-05, "loss": 0.5852, "step": 195325 }, { "epoch": 2.16, "learning_rate": 1.3952741606873948e-05, "loss": 0.6222, "step": 195330 }, { "epoch": 2.16, "learning_rate": 1.3951818879735438e-05, "loss": 0.5704, "step": 195335 }, { "epoch": 2.16, "learning_rate": 1.3950896152596924e-05, "loss": 0.5636, "step": 195340 }, { "epoch": 2.16, "learning_rate": 1.3949973425458413e-05, "loss": 0.608, "step": 195345 }, { "epoch": 2.16, "learning_rate": 1.39490506983199e-05, "loss": 0.6745, "step": 195350 }, { "epoch": 2.16, "learning_rate": 1.3948127971181387e-05, "loss": 0.6082, "step": 195355 }, { "epoch": 2.16, "learning_rate": 1.3947205244042873e-05, "loss": 0.6282, "step": 195360 }, { "epoch": 2.16, "learning_rate": 1.3946282516904363e-05, "loss": 0.5959, "step": 195365 }, { "epoch": 2.16, "learning_rate": 1.3945359789765849e-05, "loss": 0.639, "step": 195370 }, { "epoch": 2.16, "learning_rate": 1.3944437062627335e-05, "loss": 0.6054, "step": 195375 }, { "epoch": 2.16, "learning_rate": 1.3943514335488825e-05, "loss": 0.5706, "step": 195380 }, { "epoch": 2.16, "learning_rate": 1.3942591608350311e-05, "loss": 0.6191, "step": 195385 }, { "epoch": 2.16, "learning_rate": 1.39416688812118e-05, "loss": 0.5884, "step": 195390 }, { "epoch": 2.16, "learning_rate": 1.3940746154073287e-05, "loss": 0.6161, "step": 195395 }, { "epoch": 2.16, "learning_rate": 1.3939823426934776e-05, "loss": 0.6059, "step": 195400 }, { "epoch": 2.16, "learning_rate": 1.3938900699796262e-05, "loss": 0.5772, "step": 195405 }, { "epoch": 2.16, "learning_rate": 1.393797797265775e-05, "loss": 0.633, "step": 195410 }, { "epoch": 2.16, "learning_rate": 1.3937055245519236e-05, "loss": 0.5804, "step": 195415 }, { "epoch": 2.16, "learning_rate": 1.3936132518380726e-05, "loss": 0.5814, "step": 195420 }, { "epoch": 2.16, "learning_rate": 1.3935209791242212e-05, "loss": 0.6029, "step": 195425 }, { "epoch": 2.16, "learning_rate": 1.3934287064103702e-05, "loss": 0.5964, "step": 195430 }, { "epoch": 2.16, "learning_rate": 1.3933364336965188e-05, "loss": 0.6832, "step": 195435 }, { "epoch": 2.16, "learning_rate": 1.3932441609826677e-05, "loss": 0.6773, "step": 195440 }, { "epoch": 2.16, "learning_rate": 1.3931518882688163e-05, "loss": 0.6032, "step": 195445 }, { "epoch": 2.16, "learning_rate": 1.393059615554965e-05, "loss": 0.6272, "step": 195450 }, { "epoch": 2.16, "learning_rate": 1.392967342841114e-05, "loss": 0.5598, "step": 195455 }, { "epoch": 2.16, "learning_rate": 1.3928750701272625e-05, "loss": 0.6062, "step": 195460 }, { "epoch": 2.16, "learning_rate": 1.3927827974134113e-05, "loss": 0.6227, "step": 195465 }, { "epoch": 2.16, "learning_rate": 1.39269052469956e-05, "loss": 0.6345, "step": 195470 }, { "epoch": 2.16, "learning_rate": 1.3925982519857089e-05, "loss": 0.6014, "step": 195475 }, { "epoch": 2.16, "learning_rate": 1.3925059792718575e-05, "loss": 0.6456, "step": 195480 }, { "epoch": 2.16, "learning_rate": 1.3924137065580065e-05, "loss": 0.6119, "step": 195485 }, { "epoch": 2.16, "learning_rate": 1.392321433844155e-05, "loss": 0.626, "step": 195490 }, { "epoch": 2.16, "learning_rate": 1.392229161130304e-05, "loss": 0.5372, "step": 195495 }, { "epoch": 2.16, "learning_rate": 1.3921368884164526e-05, "loss": 0.5514, "step": 195500 }, { "epoch": 2.16, "learning_rate": 1.3920446157026016e-05, "loss": 0.5845, "step": 195505 }, { "epoch": 2.16, "learning_rate": 1.3919523429887502e-05, "loss": 0.575, "step": 195510 }, { "epoch": 2.16, "learning_rate": 1.391860070274899e-05, "loss": 0.5871, "step": 195515 }, { "epoch": 2.16, "learning_rate": 1.3917677975610476e-05, "loss": 0.6295, "step": 195520 }, { "epoch": 2.16, "learning_rate": 1.3916755248471964e-05, "loss": 0.5663, "step": 195525 }, { "epoch": 2.17, "learning_rate": 1.3915832521333452e-05, "loss": 0.6008, "step": 195530 }, { "epoch": 2.17, "learning_rate": 1.3914909794194938e-05, "loss": 0.5608, "step": 195535 }, { "epoch": 2.17, "learning_rate": 1.3913987067056428e-05, "loss": 0.6252, "step": 195540 }, { "epoch": 2.17, "learning_rate": 1.3913064339917914e-05, "loss": 0.6022, "step": 195545 }, { "epoch": 2.17, "learning_rate": 1.3912141612779403e-05, "loss": 0.5976, "step": 195550 }, { "epoch": 2.17, "learning_rate": 1.391121888564089e-05, "loss": 0.5683, "step": 195555 }, { "epoch": 2.17, "learning_rate": 1.3910296158502379e-05, "loss": 0.6331, "step": 195560 }, { "epoch": 2.17, "learning_rate": 1.3909373431363865e-05, "loss": 0.6447, "step": 195565 }, { "epoch": 2.17, "learning_rate": 1.3908450704225353e-05, "loss": 0.6257, "step": 195570 }, { "epoch": 2.17, "learning_rate": 1.390752797708684e-05, "loss": 0.6103, "step": 195575 }, { "epoch": 2.17, "learning_rate": 1.3906605249948329e-05, "loss": 0.5764, "step": 195580 }, { "epoch": 2.17, "learning_rate": 1.3905682522809815e-05, "loss": 0.5682, "step": 195585 }, { "epoch": 2.17, "learning_rate": 1.3904759795671304e-05, "loss": 0.6411, "step": 195590 }, { "epoch": 2.17, "learning_rate": 1.390383706853279e-05, "loss": 0.5606, "step": 195595 }, { "epoch": 2.17, "learning_rate": 1.3902914341394277e-05, "loss": 0.6038, "step": 195600 }, { "epoch": 2.17, "learning_rate": 1.3901991614255766e-05, "loss": 0.6031, "step": 195605 }, { "epoch": 2.17, "learning_rate": 1.3901068887117252e-05, "loss": 0.565, "step": 195610 }, { "epoch": 2.17, "learning_rate": 1.3900146159978742e-05, "loss": 0.5807, "step": 195615 }, { "epoch": 2.17, "learning_rate": 1.3899223432840228e-05, "loss": 0.6104, "step": 195620 }, { "epoch": 2.17, "learning_rate": 1.3898300705701718e-05, "loss": 0.589, "step": 195625 }, { "epoch": 2.17, "learning_rate": 1.3897377978563204e-05, "loss": 0.6064, "step": 195630 }, { "epoch": 2.17, "learning_rate": 1.3896455251424692e-05, "loss": 0.5802, "step": 195635 }, { "epoch": 2.17, "learning_rate": 1.3895532524286178e-05, "loss": 0.6036, "step": 195640 }, { "epoch": 2.17, "learning_rate": 1.3894609797147667e-05, "loss": 0.5669, "step": 195645 }, { "epoch": 2.17, "learning_rate": 1.3893687070009153e-05, "loss": 0.5728, "step": 195650 }, { "epoch": 2.17, "learning_rate": 1.3892764342870643e-05, "loss": 0.6166, "step": 195655 }, { "epoch": 2.17, "learning_rate": 1.3891841615732129e-05, "loss": 0.62, "step": 195660 }, { "epoch": 2.17, "learning_rate": 1.3890918888593619e-05, "loss": 0.5665, "step": 195665 }, { "epoch": 2.17, "learning_rate": 1.3889996161455105e-05, "loss": 0.6175, "step": 195670 }, { "epoch": 2.17, "learning_rate": 1.3889073434316591e-05, "loss": 0.6311, "step": 195675 }, { "epoch": 2.17, "learning_rate": 1.388815070717808e-05, "loss": 0.6007, "step": 195680 }, { "epoch": 2.17, "learning_rate": 1.3887227980039567e-05, "loss": 0.6052, "step": 195685 }, { "epoch": 2.17, "learning_rate": 1.3886305252901055e-05, "loss": 0.6184, "step": 195690 }, { "epoch": 2.17, "learning_rate": 1.388538252576254e-05, "loss": 0.6276, "step": 195695 }, { "epoch": 2.17, "learning_rate": 1.388445979862403e-05, "loss": 0.5786, "step": 195700 }, { "epoch": 2.17, "learning_rate": 1.3883537071485516e-05, "loss": 0.5948, "step": 195705 }, { "epoch": 2.17, "learning_rate": 1.3882614344347006e-05, "loss": 0.664, "step": 195710 }, { "epoch": 2.17, "learning_rate": 1.3881691617208492e-05, "loss": 0.5723, "step": 195715 }, { "epoch": 2.17, "learning_rate": 1.3880768890069982e-05, "loss": 0.5981, "step": 195720 }, { "epoch": 2.17, "learning_rate": 1.3879846162931468e-05, "loss": 0.5891, "step": 195725 }, { "epoch": 2.17, "learning_rate": 1.3878923435792957e-05, "loss": 0.5621, "step": 195730 }, { "epoch": 2.17, "learning_rate": 1.3878000708654443e-05, "loss": 0.6173, "step": 195735 }, { "epoch": 2.17, "learning_rate": 1.3877077981515931e-05, "loss": 0.5693, "step": 195740 }, { "epoch": 2.17, "learning_rate": 1.3876155254377417e-05, "loss": 0.6627, "step": 195745 }, { "epoch": 2.17, "learning_rate": 1.3875232527238904e-05, "loss": 0.6116, "step": 195750 }, { "epoch": 2.17, "learning_rate": 1.3874309800100393e-05, "loss": 0.5612, "step": 195755 }, { "epoch": 2.17, "learning_rate": 1.387338707296188e-05, "loss": 0.6281, "step": 195760 }, { "epoch": 2.17, "learning_rate": 1.3872464345823369e-05, "loss": 0.6054, "step": 195765 }, { "epoch": 2.17, "learning_rate": 1.3871541618684855e-05, "loss": 0.5946, "step": 195770 }, { "epoch": 2.17, "learning_rate": 1.3870618891546345e-05, "loss": 0.5836, "step": 195775 }, { "epoch": 2.17, "learning_rate": 1.386969616440783e-05, "loss": 0.6659, "step": 195780 }, { "epoch": 2.17, "learning_rate": 1.386877343726932e-05, "loss": 0.5982, "step": 195785 }, { "epoch": 2.17, "learning_rate": 1.3867850710130806e-05, "loss": 0.6195, "step": 195790 }, { "epoch": 2.17, "learning_rate": 1.3866927982992294e-05, "loss": 0.5573, "step": 195795 }, { "epoch": 2.17, "learning_rate": 1.386600525585378e-05, "loss": 0.5918, "step": 195800 }, { "epoch": 2.17, "learning_rate": 1.386508252871527e-05, "loss": 0.5646, "step": 195805 }, { "epoch": 2.17, "learning_rate": 1.3864159801576756e-05, "loss": 0.6334, "step": 195810 }, { "epoch": 2.17, "learning_rate": 1.3863237074438246e-05, "loss": 0.6027, "step": 195815 }, { "epoch": 2.17, "learning_rate": 1.3862314347299732e-05, "loss": 0.5792, "step": 195820 }, { "epoch": 2.17, "learning_rate": 1.3861391620161218e-05, "loss": 0.6467, "step": 195825 }, { "epoch": 2.17, "learning_rate": 1.3860468893022708e-05, "loss": 0.5652, "step": 195830 }, { "epoch": 2.17, "learning_rate": 1.3859546165884194e-05, "loss": 0.5966, "step": 195835 }, { "epoch": 2.17, "learning_rate": 1.3858623438745683e-05, "loss": 0.6494, "step": 195840 }, { "epoch": 2.17, "learning_rate": 1.385770071160717e-05, "loss": 0.5855, "step": 195845 }, { "epoch": 2.17, "learning_rate": 1.3856777984468657e-05, "loss": 0.61, "step": 195850 }, { "epoch": 2.17, "learning_rate": 1.3855855257330145e-05, "loss": 0.5714, "step": 195855 }, { "epoch": 2.17, "learning_rate": 1.3854932530191633e-05, "loss": 0.5996, "step": 195860 }, { "epoch": 2.17, "learning_rate": 1.3854009803053119e-05, "loss": 0.604, "step": 195865 }, { "epoch": 2.17, "learning_rate": 1.3853087075914609e-05, "loss": 0.5781, "step": 195870 }, { "epoch": 2.17, "learning_rate": 1.3852164348776095e-05, "loss": 0.5913, "step": 195875 }, { "epoch": 2.17, "learning_rate": 1.3851241621637584e-05, "loss": 0.539, "step": 195880 }, { "epoch": 2.17, "learning_rate": 1.385031889449907e-05, "loss": 0.5958, "step": 195885 }, { "epoch": 2.17, "learning_rate": 1.384939616736056e-05, "loss": 0.6711, "step": 195890 }, { "epoch": 2.17, "learning_rate": 1.3848473440222046e-05, "loss": 0.5983, "step": 195895 }, { "epoch": 2.17, "learning_rate": 1.3847550713083534e-05, "loss": 0.5806, "step": 195900 }, { "epoch": 2.17, "learning_rate": 1.384662798594502e-05, "loss": 0.5943, "step": 195905 }, { "epoch": 2.17, "learning_rate": 1.3845705258806508e-05, "loss": 0.6014, "step": 195910 }, { "epoch": 2.17, "learning_rate": 1.3844782531667996e-05, "loss": 0.6314, "step": 195915 }, { "epoch": 2.17, "learning_rate": 1.3843859804529482e-05, "loss": 0.6009, "step": 195920 }, { "epoch": 2.17, "learning_rate": 1.3842937077390972e-05, "loss": 0.5866, "step": 195925 }, { "epoch": 2.17, "learning_rate": 1.3842014350252458e-05, "loss": 0.6134, "step": 195930 }, { "epoch": 2.17, "learning_rate": 1.3841091623113947e-05, "loss": 0.5897, "step": 195935 }, { "epoch": 2.17, "learning_rate": 1.3840168895975433e-05, "loss": 0.6312, "step": 195940 }, { "epoch": 2.17, "learning_rate": 1.3839246168836923e-05, "loss": 0.5905, "step": 195945 }, { "epoch": 2.17, "learning_rate": 1.3838323441698409e-05, "loss": 0.5809, "step": 195950 }, { "epoch": 2.17, "learning_rate": 1.3837400714559897e-05, "loss": 0.5958, "step": 195955 }, { "epoch": 2.17, "learning_rate": 1.3836477987421385e-05, "loss": 0.6433, "step": 195960 }, { "epoch": 2.17, "learning_rate": 1.3835555260282873e-05, "loss": 0.5764, "step": 195965 }, { "epoch": 2.17, "learning_rate": 1.3834632533144359e-05, "loss": 0.5988, "step": 195970 }, { "epoch": 2.17, "learning_rate": 1.3833709806005848e-05, "loss": 0.5743, "step": 195975 }, { "epoch": 2.17, "learning_rate": 1.3832787078867334e-05, "loss": 0.6291, "step": 195980 }, { "epoch": 2.17, "learning_rate": 1.383186435172882e-05, "loss": 0.5726, "step": 195985 }, { "epoch": 2.17, "learning_rate": 1.383094162459031e-05, "loss": 0.6102, "step": 195990 }, { "epoch": 2.17, "learning_rate": 1.3830018897451796e-05, "loss": 0.5631, "step": 195995 }, { "epoch": 2.17, "learning_rate": 1.3829096170313286e-05, "loss": 0.5757, "step": 196000 }, { "epoch": 2.17, "eval_loss": 0.5818880200386047, "eval_runtime": 69.1431, "eval_samples_per_second": 28.926, "eval_steps_per_second": 14.463, "step": 196000 }, { "epoch": 2.17, "learning_rate": 1.3828173443174772e-05, "loss": 0.6259, "step": 196005 }, { "epoch": 2.17, "learning_rate": 1.3827250716036262e-05, "loss": 0.5583, "step": 196010 }, { "epoch": 2.17, "learning_rate": 1.3826327988897748e-05, "loss": 0.6091, "step": 196015 }, { "epoch": 2.17, "learning_rate": 1.3825405261759236e-05, "loss": 0.6094, "step": 196020 }, { "epoch": 2.17, "learning_rate": 1.3824482534620722e-05, "loss": 0.6187, "step": 196025 }, { "epoch": 2.17, "learning_rate": 1.3823559807482211e-05, "loss": 0.5654, "step": 196030 }, { "epoch": 2.17, "learning_rate": 1.3822637080343697e-05, "loss": 0.5683, "step": 196035 }, { "epoch": 2.17, "learning_rate": 1.3821714353205187e-05, "loss": 0.5415, "step": 196040 }, { "epoch": 2.17, "learning_rate": 1.3820791626066673e-05, "loss": 0.6227, "step": 196045 }, { "epoch": 2.17, "learning_rate": 1.3819868898928163e-05, "loss": 0.6305, "step": 196050 }, { "epoch": 2.17, "learning_rate": 1.3818946171789649e-05, "loss": 0.5717, "step": 196055 }, { "epoch": 2.17, "learning_rate": 1.3818023444651135e-05, "loss": 0.6211, "step": 196060 }, { "epoch": 2.17, "learning_rate": 1.3817100717512625e-05, "loss": 0.6077, "step": 196065 }, { "epoch": 2.17, "learning_rate": 1.381617799037411e-05, "loss": 0.613, "step": 196070 }, { "epoch": 2.17, "learning_rate": 1.3815255263235599e-05, "loss": 0.5893, "step": 196075 }, { "epoch": 2.17, "learning_rate": 1.3814332536097085e-05, "loss": 0.6434, "step": 196080 }, { "epoch": 2.17, "learning_rate": 1.3813409808958574e-05, "loss": 0.6074, "step": 196085 }, { "epoch": 2.17, "learning_rate": 1.381248708182006e-05, "loss": 0.5986, "step": 196090 }, { "epoch": 2.17, "learning_rate": 1.381156435468155e-05, "loss": 0.5912, "step": 196095 }, { "epoch": 2.17, "learning_rate": 1.3810641627543036e-05, "loss": 0.5903, "step": 196100 }, { "epoch": 2.17, "learning_rate": 1.3809718900404526e-05, "loss": 0.5825, "step": 196105 }, { "epoch": 2.17, "learning_rate": 1.3808796173266012e-05, "loss": 0.6165, "step": 196110 }, { "epoch": 2.17, "learning_rate": 1.3807873446127501e-05, "loss": 0.5788, "step": 196115 }, { "epoch": 2.17, "learning_rate": 1.3806950718988987e-05, "loss": 0.5721, "step": 196120 }, { "epoch": 2.17, "learning_rate": 1.3806027991850475e-05, "loss": 0.5973, "step": 196125 }, { "epoch": 2.17, "learning_rate": 1.3805105264711961e-05, "loss": 0.5884, "step": 196130 }, { "epoch": 2.17, "learning_rate": 1.3804182537573448e-05, "loss": 0.6112, "step": 196135 }, { "epoch": 2.17, "learning_rate": 1.3803259810434937e-05, "loss": 0.597, "step": 196140 }, { "epoch": 2.17, "learning_rate": 1.3802337083296423e-05, "loss": 0.6305, "step": 196145 }, { "epoch": 2.17, "learning_rate": 1.3801414356157913e-05, "loss": 0.6403, "step": 196150 }, { "epoch": 2.17, "learning_rate": 1.3800491629019399e-05, "loss": 0.6518, "step": 196155 }, { "epoch": 2.17, "learning_rate": 1.3799568901880889e-05, "loss": 0.5451, "step": 196160 }, { "epoch": 2.17, "learning_rate": 1.3798646174742375e-05, "loss": 0.6009, "step": 196165 }, { "epoch": 2.17, "learning_rate": 1.3797723447603864e-05, "loss": 0.6547, "step": 196170 }, { "epoch": 2.17, "learning_rate": 1.379680072046535e-05, "loss": 0.553, "step": 196175 }, { "epoch": 2.17, "learning_rate": 1.3795877993326838e-05, "loss": 0.5739, "step": 196180 }, { "epoch": 2.17, "learning_rate": 1.3794955266188324e-05, "loss": 0.5233, "step": 196185 }, { "epoch": 2.17, "learning_rate": 1.3794032539049814e-05, "loss": 0.6065, "step": 196190 }, { "epoch": 2.17, "learning_rate": 1.37931098119113e-05, "loss": 0.6009, "step": 196195 }, { "epoch": 2.17, "learning_rate": 1.379218708477279e-05, "loss": 0.6471, "step": 196200 }, { "epoch": 2.17, "learning_rate": 1.3791264357634276e-05, "loss": 0.6221, "step": 196205 }, { "epoch": 2.17, "learning_rate": 1.3790341630495762e-05, "loss": 0.6223, "step": 196210 }, { "epoch": 2.17, "learning_rate": 1.3789418903357252e-05, "loss": 0.6127, "step": 196215 }, { "epoch": 2.17, "learning_rate": 1.3788496176218738e-05, "loss": 0.5871, "step": 196220 }, { "epoch": 2.17, "learning_rate": 1.3787573449080227e-05, "loss": 0.5938, "step": 196225 }, { "epoch": 2.17, "learning_rate": 1.3786650721941713e-05, "loss": 0.5764, "step": 196230 }, { "epoch": 2.17, "learning_rate": 1.3785727994803201e-05, "loss": 0.5998, "step": 196235 }, { "epoch": 2.17, "learning_rate": 1.3784805267664689e-05, "loss": 0.6308, "step": 196240 }, { "epoch": 2.17, "learning_rate": 1.3783882540526177e-05, "loss": 0.578, "step": 196245 }, { "epoch": 2.17, "learning_rate": 1.3782959813387663e-05, "loss": 0.6161, "step": 196250 }, { "epoch": 2.17, "learning_rate": 1.3782037086249153e-05, "loss": 0.5898, "step": 196255 }, { "epoch": 2.17, "learning_rate": 1.3781114359110639e-05, "loss": 0.6033, "step": 196260 }, { "epoch": 2.17, "learning_rate": 1.3780191631972128e-05, "loss": 0.6108, "step": 196265 }, { "epoch": 2.17, "learning_rate": 1.3779268904833614e-05, "loss": 0.5606, "step": 196270 }, { "epoch": 2.17, "learning_rate": 1.3778346177695104e-05, "loss": 0.5948, "step": 196275 }, { "epoch": 2.17, "learning_rate": 1.377742345055659e-05, "loss": 0.6119, "step": 196280 }, { "epoch": 2.17, "learning_rate": 1.3776500723418076e-05, "loss": 0.5825, "step": 196285 }, { "epoch": 2.17, "learning_rate": 1.3775577996279566e-05, "loss": 0.6468, "step": 196290 }, { "epoch": 2.17, "learning_rate": 1.3774655269141052e-05, "loss": 0.5139, "step": 196295 }, { "epoch": 2.17, "learning_rate": 1.377373254200254e-05, "loss": 0.6286, "step": 196300 }, { "epoch": 2.17, "learning_rate": 1.3772809814864026e-05, "loss": 0.6066, "step": 196305 }, { "epoch": 2.17, "learning_rate": 1.3771887087725516e-05, "loss": 0.594, "step": 196310 }, { "epoch": 2.17, "learning_rate": 1.3770964360587002e-05, "loss": 0.5535, "step": 196315 }, { "epoch": 2.17, "learning_rate": 1.3770041633448491e-05, "loss": 0.5754, "step": 196320 }, { "epoch": 2.17, "learning_rate": 1.3769118906309977e-05, "loss": 0.5521, "step": 196325 }, { "epoch": 2.17, "learning_rate": 1.3768196179171467e-05, "loss": 0.5701, "step": 196330 }, { "epoch": 2.17, "learning_rate": 1.3767273452032953e-05, "loss": 0.5753, "step": 196335 }, { "epoch": 2.17, "learning_rate": 1.3766350724894441e-05, "loss": 0.6238, "step": 196340 }, { "epoch": 2.17, "learning_rate": 1.3765427997755929e-05, "loss": 0.6235, "step": 196345 }, { "epoch": 2.17, "learning_rate": 1.3764505270617417e-05, "loss": 0.6721, "step": 196350 }, { "epoch": 2.17, "learning_rate": 1.3763582543478903e-05, "loss": 0.5595, "step": 196355 }, { "epoch": 2.17, "learning_rate": 1.3762659816340389e-05, "loss": 0.6191, "step": 196360 }, { "epoch": 2.17, "learning_rate": 1.3761737089201879e-05, "loss": 0.6193, "step": 196365 }, { "epoch": 2.17, "learning_rate": 1.3760814362063365e-05, "loss": 0.6292, "step": 196370 }, { "epoch": 2.17, "learning_rate": 1.3759891634924854e-05, "loss": 0.6078, "step": 196375 }, { "epoch": 2.17, "learning_rate": 1.375896890778634e-05, "loss": 0.5668, "step": 196380 }, { "epoch": 2.17, "learning_rate": 1.375804618064783e-05, "loss": 0.5678, "step": 196385 }, { "epoch": 2.17, "learning_rate": 1.3757123453509316e-05, "loss": 0.5814, "step": 196390 }, { "epoch": 2.17, "learning_rate": 1.3756200726370806e-05, "loss": 0.5884, "step": 196395 }, { "epoch": 2.17, "learning_rate": 1.3755277999232292e-05, "loss": 0.5631, "step": 196400 }, { "epoch": 2.17, "learning_rate": 1.375435527209378e-05, "loss": 0.5758, "step": 196405 }, { "epoch": 2.17, "learning_rate": 1.3753432544955266e-05, "loss": 0.5867, "step": 196410 }, { "epoch": 2.17, "learning_rate": 1.3752509817816755e-05, "loss": 0.5772, "step": 196415 }, { "epoch": 2.17, "learning_rate": 1.3751587090678241e-05, "loss": 0.6702, "step": 196420 }, { "epoch": 2.17, "learning_rate": 1.3750664363539731e-05, "loss": 0.6078, "step": 196425 }, { "epoch": 2.18, "learning_rate": 1.3749741636401217e-05, "loss": 0.5586, "step": 196430 }, { "epoch": 2.18, "learning_rate": 1.3748818909262703e-05, "loss": 0.5968, "step": 196435 }, { "epoch": 2.18, "learning_rate": 1.3747896182124193e-05, "loss": 0.5476, "step": 196440 }, { "epoch": 2.18, "learning_rate": 1.3746973454985679e-05, "loss": 0.6145, "step": 196445 }, { "epoch": 2.18, "learning_rate": 1.3746050727847169e-05, "loss": 0.5562, "step": 196450 }, { "epoch": 2.18, "learning_rate": 1.3745128000708655e-05, "loss": 0.666, "step": 196455 }, { "epoch": 2.18, "learning_rate": 1.3744205273570143e-05, "loss": 0.6205, "step": 196460 }, { "epoch": 2.18, "learning_rate": 1.3743282546431629e-05, "loss": 0.5582, "step": 196465 }, { "epoch": 2.18, "learning_rate": 1.3742359819293118e-05, "loss": 0.6113, "step": 196470 }, { "epoch": 2.18, "learning_rate": 1.3741437092154604e-05, "loss": 0.6419, "step": 196475 }, { "epoch": 2.18, "learning_rate": 1.3740514365016094e-05, "loss": 0.632, "step": 196480 }, { "epoch": 2.18, "learning_rate": 1.373959163787758e-05, "loss": 0.6193, "step": 196485 }, { "epoch": 2.18, "learning_rate": 1.373866891073907e-05, "loss": 0.6376, "step": 196490 }, { "epoch": 2.18, "learning_rate": 1.3737746183600556e-05, "loss": 0.6193, "step": 196495 }, { "epoch": 2.18, "learning_rate": 1.3736823456462045e-05, "loss": 0.6016, "step": 196500 }, { "epoch": 2.18, "learning_rate": 1.3735900729323531e-05, "loss": 0.5911, "step": 196505 }, { "epoch": 2.18, "learning_rate": 1.3734978002185018e-05, "loss": 0.6251, "step": 196510 }, { "epoch": 2.18, "learning_rate": 1.3734055275046506e-05, "loss": 0.5711, "step": 196515 }, { "epoch": 2.18, "learning_rate": 1.3733132547907993e-05, "loss": 0.6274, "step": 196520 }, { "epoch": 2.18, "learning_rate": 1.3732209820769481e-05, "loss": 0.6007, "step": 196525 }, { "epoch": 2.18, "learning_rate": 1.3731287093630967e-05, "loss": 0.6088, "step": 196530 }, { "epoch": 2.18, "learning_rate": 1.3730364366492457e-05, "loss": 0.5743, "step": 196535 }, { "epoch": 2.18, "learning_rate": 1.3729441639353943e-05, "loss": 0.5949, "step": 196540 }, { "epoch": 2.18, "learning_rate": 1.3728518912215433e-05, "loss": 0.5702, "step": 196545 }, { "epoch": 2.18, "learning_rate": 1.3727596185076919e-05, "loss": 0.5848, "step": 196550 }, { "epoch": 2.18, "learning_rate": 1.3726673457938408e-05, "loss": 0.5306, "step": 196555 }, { "epoch": 2.18, "learning_rate": 1.3725750730799894e-05, "loss": 0.5422, "step": 196560 }, { "epoch": 2.18, "learning_rate": 1.3724828003661382e-05, "loss": 0.592, "step": 196565 }, { "epoch": 2.18, "learning_rate": 1.3723905276522868e-05, "loss": 0.5537, "step": 196570 }, { "epoch": 2.18, "learning_rate": 1.3722982549384358e-05, "loss": 0.6005, "step": 196575 }, { "epoch": 2.18, "learning_rate": 1.3722059822245844e-05, "loss": 0.5781, "step": 196580 }, { "epoch": 2.18, "learning_rate": 1.372113709510733e-05, "loss": 0.5914, "step": 196585 }, { "epoch": 2.18, "learning_rate": 1.372021436796882e-05, "loss": 0.5921, "step": 196590 }, { "epoch": 2.18, "learning_rate": 1.3719291640830306e-05, "loss": 0.6242, "step": 196595 }, { "epoch": 2.18, "learning_rate": 1.3718368913691796e-05, "loss": 0.6443, "step": 196600 }, { "epoch": 2.18, "learning_rate": 1.3717446186553282e-05, "loss": 0.6708, "step": 196605 }, { "epoch": 2.18, "learning_rate": 1.3716523459414771e-05, "loss": 0.5833, "step": 196610 }, { "epoch": 2.18, "learning_rate": 1.3715600732276257e-05, "loss": 0.6334, "step": 196615 }, { "epoch": 2.18, "learning_rate": 1.3714678005137745e-05, "loss": 0.6369, "step": 196620 }, { "epoch": 2.18, "learning_rate": 1.3713755277999233e-05, "loss": 0.5526, "step": 196625 }, { "epoch": 2.18, "learning_rate": 1.3712832550860721e-05, "loss": 0.5633, "step": 196630 }, { "epoch": 2.18, "learning_rate": 1.3711909823722207e-05, "loss": 0.5554, "step": 196635 }, { "epoch": 2.18, "learning_rate": 1.3710987096583697e-05, "loss": 0.6159, "step": 196640 }, { "epoch": 2.18, "learning_rate": 1.3710064369445183e-05, "loss": 0.6124, "step": 196645 }, { "epoch": 2.18, "learning_rate": 1.3709141642306672e-05, "loss": 0.6208, "step": 196650 }, { "epoch": 2.18, "learning_rate": 1.3708218915168158e-05, "loss": 0.5576, "step": 196655 }, { "epoch": 2.18, "learning_rate": 1.3707296188029645e-05, "loss": 0.6032, "step": 196660 }, { "epoch": 2.18, "learning_rate": 1.3706373460891134e-05, "loss": 0.5934, "step": 196665 }, { "epoch": 2.18, "learning_rate": 1.370545073375262e-05, "loss": 0.5796, "step": 196670 }, { "epoch": 2.18, "learning_rate": 1.370452800661411e-05, "loss": 0.6375, "step": 196675 }, { "epoch": 2.18, "learning_rate": 1.3703605279475596e-05, "loss": 0.6233, "step": 196680 }, { "epoch": 2.18, "learning_rate": 1.3702682552337084e-05, "loss": 0.5575, "step": 196685 }, { "epoch": 2.18, "learning_rate": 1.370175982519857e-05, "loss": 0.5991, "step": 196690 }, { "epoch": 2.18, "learning_rate": 1.370083709806006e-05, "loss": 0.5566, "step": 196695 }, { "epoch": 2.18, "learning_rate": 1.3699914370921546e-05, "loss": 0.5924, "step": 196700 }, { "epoch": 2.18, "learning_rate": 1.3698991643783035e-05, "loss": 0.606, "step": 196705 }, { "epoch": 2.18, "learning_rate": 1.3698068916644521e-05, "loss": 0.5798, "step": 196710 }, { "epoch": 2.18, "learning_rate": 1.3697146189506011e-05, "loss": 0.6142, "step": 196715 }, { "epoch": 2.18, "learning_rate": 1.3696223462367497e-05, "loss": 0.6631, "step": 196720 }, { "epoch": 2.18, "learning_rate": 1.3695300735228985e-05, "loss": 0.521, "step": 196725 }, { "epoch": 2.18, "learning_rate": 1.3694378008090473e-05, "loss": 0.5898, "step": 196730 }, { "epoch": 2.18, "learning_rate": 1.369345528095196e-05, "loss": 0.6025, "step": 196735 }, { "epoch": 2.18, "learning_rate": 1.3692532553813447e-05, "loss": 0.5713, "step": 196740 }, { "epoch": 2.18, "learning_rate": 1.3691609826674933e-05, "loss": 0.6103, "step": 196745 }, { "epoch": 2.18, "learning_rate": 1.3690687099536423e-05, "loss": 0.6626, "step": 196750 }, { "epoch": 2.18, "learning_rate": 1.3689764372397909e-05, "loss": 0.6097, "step": 196755 }, { "epoch": 2.18, "learning_rate": 1.3688841645259398e-05, "loss": 0.6197, "step": 196760 }, { "epoch": 2.18, "learning_rate": 1.3687918918120884e-05, "loss": 0.6091, "step": 196765 }, { "epoch": 2.18, "learning_rate": 1.3686996190982374e-05, "loss": 0.5737, "step": 196770 }, { "epoch": 2.18, "learning_rate": 1.368607346384386e-05, "loss": 0.6442, "step": 196775 }, { "epoch": 2.18, "learning_rate": 1.368515073670535e-05, "loss": 0.6013, "step": 196780 }, { "epoch": 2.18, "learning_rate": 1.3684228009566836e-05, "loss": 0.5932, "step": 196785 }, { "epoch": 2.18, "learning_rate": 1.3683305282428324e-05, "loss": 0.5484, "step": 196790 }, { "epoch": 2.18, "learning_rate": 1.368238255528981e-05, "loss": 0.6448, "step": 196795 }, { "epoch": 2.18, "learning_rate": 1.36814598281513e-05, "loss": 0.5999, "step": 196800 }, { "epoch": 2.18, "learning_rate": 1.3680537101012785e-05, "loss": 0.5992, "step": 196805 }, { "epoch": 2.18, "learning_rate": 1.3679614373874275e-05, "loss": 0.6056, "step": 196810 }, { "epoch": 2.18, "learning_rate": 1.3678691646735761e-05, "loss": 0.6716, "step": 196815 }, { "epoch": 2.18, "learning_rate": 1.3677768919597247e-05, "loss": 0.6238, "step": 196820 }, { "epoch": 2.18, "learning_rate": 1.3676846192458737e-05, "loss": 0.5709, "step": 196825 }, { "epoch": 2.18, "learning_rate": 1.3675923465320223e-05, "loss": 0.6546, "step": 196830 }, { "epoch": 2.18, "learning_rate": 1.3675000738181713e-05, "loss": 0.557, "step": 196835 }, { "epoch": 2.18, "learning_rate": 1.3674078011043199e-05, "loss": 0.6322, "step": 196840 }, { "epoch": 2.18, "learning_rate": 1.3673155283904687e-05, "loss": 0.5813, "step": 196845 }, { "epoch": 2.18, "learning_rate": 1.3672232556766173e-05, "loss": 0.6241, "step": 196850 }, { "epoch": 2.18, "learning_rate": 1.3671309829627662e-05, "loss": 0.5853, "step": 196855 }, { "epoch": 2.18, "learning_rate": 1.3670387102489148e-05, "loss": 0.5889, "step": 196860 }, { "epoch": 2.18, "learning_rate": 1.3669464375350638e-05, "loss": 0.6503, "step": 196865 }, { "epoch": 2.18, "learning_rate": 1.3668541648212124e-05, "loss": 0.6432, "step": 196870 }, { "epoch": 2.18, "learning_rate": 1.3667618921073614e-05, "loss": 0.5919, "step": 196875 }, { "epoch": 2.18, "learning_rate": 1.36666961939351e-05, "loss": 0.5896, "step": 196880 }, { "epoch": 2.18, "learning_rate": 1.366577346679659e-05, "loss": 0.5712, "step": 196885 }, { "epoch": 2.18, "learning_rate": 1.3664850739658076e-05, "loss": 0.6417, "step": 196890 }, { "epoch": 2.18, "learning_rate": 1.3663928012519562e-05, "loss": 0.6123, "step": 196895 }, { "epoch": 2.18, "learning_rate": 1.366300528538105e-05, "loss": 0.5485, "step": 196900 }, { "epoch": 2.18, "learning_rate": 1.3662082558242537e-05, "loss": 0.6388, "step": 196905 }, { "epoch": 2.18, "learning_rate": 1.3661159831104025e-05, "loss": 0.6183, "step": 196910 }, { "epoch": 2.18, "learning_rate": 1.3660237103965511e-05, "loss": 0.5356, "step": 196915 }, { "epoch": 2.18, "learning_rate": 1.3659314376827001e-05, "loss": 0.5266, "step": 196920 }, { "epoch": 2.18, "learning_rate": 1.3658391649688487e-05, "loss": 0.6078, "step": 196925 }, { "epoch": 2.18, "learning_rate": 1.3657468922549977e-05, "loss": 0.5759, "step": 196930 }, { "epoch": 2.18, "learning_rate": 1.3656546195411463e-05, "loss": 0.6141, "step": 196935 }, { "epoch": 2.18, "learning_rate": 1.3655623468272952e-05, "loss": 0.5749, "step": 196940 }, { "epoch": 2.18, "learning_rate": 1.3654700741134438e-05, "loss": 0.6249, "step": 196945 }, { "epoch": 2.18, "learning_rate": 1.3653778013995926e-05, "loss": 0.5831, "step": 196950 }, { "epoch": 2.18, "learning_rate": 1.3652855286857414e-05, "loss": 0.5345, "step": 196955 }, { "epoch": 2.18, "learning_rate": 1.3651932559718902e-05, "loss": 0.6083, "step": 196960 }, { "epoch": 2.18, "learning_rate": 1.3651009832580388e-05, "loss": 0.6414, "step": 196965 }, { "epoch": 2.18, "learning_rate": 1.3650087105441874e-05, "loss": 0.5955, "step": 196970 }, { "epoch": 2.18, "learning_rate": 1.3649164378303364e-05, "loss": 0.6461, "step": 196975 }, { "epoch": 2.18, "learning_rate": 1.364824165116485e-05, "loss": 0.6192, "step": 196980 }, { "epoch": 2.18, "learning_rate": 1.364731892402634e-05, "loss": 0.5953, "step": 196985 }, { "epoch": 2.18, "learning_rate": 1.3646396196887826e-05, "loss": 0.6104, "step": 196990 }, { "epoch": 2.18, "learning_rate": 1.3645473469749315e-05, "loss": 0.5663, "step": 196995 }, { "epoch": 2.18, "learning_rate": 1.3644550742610801e-05, "loss": 0.6557, "step": 197000 }, { "epoch": 2.18, "eval_loss": 0.5654886364936829, "eval_runtime": 69.1757, "eval_samples_per_second": 28.912, "eval_steps_per_second": 14.456, "step": 197000 }, { "epoch": 2.18, "learning_rate": 1.364362801547229e-05, "loss": 0.5993, "step": 197005 }, { "epoch": 2.18, "learning_rate": 1.3642705288333777e-05, "loss": 0.6664, "step": 197010 }, { "epoch": 2.18, "learning_rate": 1.3641782561195265e-05, "loss": 0.5822, "step": 197015 }, { "epoch": 2.18, "learning_rate": 1.3640859834056751e-05, "loss": 0.5922, "step": 197020 }, { "epoch": 2.18, "learning_rate": 1.363993710691824e-05, "loss": 0.6145, "step": 197025 }, { "epoch": 2.18, "learning_rate": 1.3639014379779727e-05, "loss": 0.6327, "step": 197030 }, { "epoch": 2.18, "learning_rate": 1.3638091652641216e-05, "loss": 0.5656, "step": 197035 }, { "epoch": 2.18, "learning_rate": 1.3637168925502703e-05, "loss": 0.5746, "step": 197040 }, { "epoch": 2.18, "learning_rate": 1.3636246198364189e-05, "loss": 0.563, "step": 197045 }, { "epoch": 2.18, "learning_rate": 1.3635323471225678e-05, "loss": 0.5456, "step": 197050 }, { "epoch": 2.18, "learning_rate": 1.3634400744087164e-05, "loss": 0.6494, "step": 197055 }, { "epoch": 2.18, "learning_rate": 1.3633478016948654e-05, "loss": 0.5764, "step": 197060 }, { "epoch": 2.18, "learning_rate": 1.363255528981014e-05, "loss": 0.6345, "step": 197065 }, { "epoch": 2.18, "learning_rate": 1.3631632562671628e-05, "loss": 0.5964, "step": 197070 }, { "epoch": 2.18, "learning_rate": 1.3630709835533114e-05, "loss": 0.5895, "step": 197075 }, { "epoch": 2.18, "learning_rate": 1.3629787108394604e-05, "loss": 0.5948, "step": 197080 }, { "epoch": 2.18, "learning_rate": 1.362886438125609e-05, "loss": 0.6067, "step": 197085 }, { "epoch": 2.18, "learning_rate": 1.362794165411758e-05, "loss": 0.5702, "step": 197090 }, { "epoch": 2.18, "learning_rate": 1.3627018926979065e-05, "loss": 0.6162, "step": 197095 }, { "epoch": 2.18, "learning_rate": 1.3626096199840555e-05, "loss": 0.568, "step": 197100 }, { "epoch": 2.18, "learning_rate": 1.3625173472702041e-05, "loss": 0.6379, "step": 197105 }, { "epoch": 2.18, "learning_rate": 1.362425074556353e-05, "loss": 0.5743, "step": 197110 }, { "epoch": 2.18, "learning_rate": 1.3623328018425017e-05, "loss": 0.545, "step": 197115 }, { "epoch": 2.18, "learning_rate": 1.3622405291286503e-05, "loss": 0.624, "step": 197120 }, { "epoch": 2.18, "learning_rate": 1.3621482564147991e-05, "loss": 0.6401, "step": 197125 }, { "epoch": 2.18, "learning_rate": 1.3620559837009477e-05, "loss": 0.5802, "step": 197130 }, { "epoch": 2.18, "learning_rate": 1.3619637109870967e-05, "loss": 0.6192, "step": 197135 }, { "epoch": 2.18, "learning_rate": 1.3618714382732453e-05, "loss": 0.5515, "step": 197140 }, { "epoch": 2.18, "learning_rate": 1.3617791655593942e-05, "loss": 0.6266, "step": 197145 }, { "epoch": 2.18, "learning_rate": 1.3616868928455428e-05, "loss": 0.5806, "step": 197150 }, { "epoch": 2.18, "learning_rate": 1.3615946201316918e-05, "loss": 0.6098, "step": 197155 }, { "epoch": 2.18, "learning_rate": 1.3615023474178404e-05, "loss": 0.6022, "step": 197160 }, { "epoch": 2.18, "learning_rate": 1.3614100747039894e-05, "loss": 0.5848, "step": 197165 }, { "epoch": 2.18, "learning_rate": 1.361317801990138e-05, "loss": 0.6314, "step": 197170 }, { "epoch": 2.18, "learning_rate": 1.3612255292762868e-05, "loss": 0.6193, "step": 197175 }, { "epoch": 2.18, "learning_rate": 1.3611332565624354e-05, "loss": 0.6666, "step": 197180 }, { "epoch": 2.18, "learning_rate": 1.3610409838485843e-05, "loss": 0.6701, "step": 197185 }, { "epoch": 2.18, "learning_rate": 1.360948711134733e-05, "loss": 0.5843, "step": 197190 }, { "epoch": 2.18, "learning_rate": 1.3608564384208816e-05, "loss": 0.6466, "step": 197195 }, { "epoch": 2.18, "learning_rate": 1.3607641657070305e-05, "loss": 0.5665, "step": 197200 }, { "epoch": 2.18, "learning_rate": 1.3606718929931791e-05, "loss": 0.6072, "step": 197205 }, { "epoch": 2.18, "learning_rate": 1.3605796202793281e-05, "loss": 0.6487, "step": 197210 }, { "epoch": 2.18, "learning_rate": 1.3604873475654767e-05, "loss": 0.5633, "step": 197215 }, { "epoch": 2.18, "learning_rate": 1.3603950748516257e-05, "loss": 0.5971, "step": 197220 }, { "epoch": 2.18, "learning_rate": 1.3603028021377743e-05, "loss": 0.5577, "step": 197225 }, { "epoch": 2.18, "learning_rate": 1.360210529423923e-05, "loss": 0.5526, "step": 197230 }, { "epoch": 2.18, "learning_rate": 1.3601182567100717e-05, "loss": 0.6057, "step": 197235 }, { "epoch": 2.18, "learning_rate": 1.3600259839962206e-05, "loss": 0.6403, "step": 197240 }, { "epoch": 2.18, "learning_rate": 1.3599337112823692e-05, "loss": 0.5842, "step": 197245 }, { "epoch": 2.18, "learning_rate": 1.3598414385685182e-05, "loss": 0.5861, "step": 197250 }, { "epoch": 2.18, "learning_rate": 1.3597491658546668e-05, "loss": 0.6164, "step": 197255 }, { "epoch": 2.18, "learning_rate": 1.3596568931408158e-05, "loss": 0.6213, "step": 197260 }, { "epoch": 2.18, "learning_rate": 1.3595646204269644e-05, "loss": 0.5752, "step": 197265 }, { "epoch": 2.18, "learning_rate": 1.359472347713113e-05, "loss": 0.6464, "step": 197270 }, { "epoch": 2.18, "learning_rate": 1.359380074999262e-05, "loss": 0.5251, "step": 197275 }, { "epoch": 2.18, "learning_rate": 1.3592878022854106e-05, "loss": 0.5752, "step": 197280 }, { "epoch": 2.18, "learning_rate": 1.3591955295715594e-05, "loss": 0.5866, "step": 197285 }, { "epoch": 2.18, "learning_rate": 1.3591032568577081e-05, "loss": 0.5975, "step": 197290 }, { "epoch": 2.18, "learning_rate": 1.359010984143857e-05, "loss": 0.5806, "step": 197295 }, { "epoch": 2.18, "learning_rate": 1.3589187114300055e-05, "loss": 0.5906, "step": 197300 }, { "epoch": 2.18, "learning_rate": 1.3588264387161545e-05, "loss": 0.5626, "step": 197305 }, { "epoch": 2.18, "learning_rate": 1.3587341660023031e-05, "loss": 0.6237, "step": 197310 }, { "epoch": 2.18, "learning_rate": 1.358641893288452e-05, "loss": 0.6336, "step": 197315 }, { "epoch": 2.18, "learning_rate": 1.3585496205746007e-05, "loss": 0.6426, "step": 197320 }, { "epoch": 2.18, "learning_rate": 1.3584573478607496e-05, "loss": 0.6103, "step": 197325 }, { "epoch": 2.18, "learning_rate": 1.3583650751468982e-05, "loss": 0.6018, "step": 197330 }, { "epoch": 2.19, "learning_rate": 1.358272802433047e-05, "loss": 0.6083, "step": 197335 }, { "epoch": 2.19, "learning_rate": 1.3581805297191958e-05, "loss": 0.5992, "step": 197340 }, { "epoch": 2.19, "learning_rate": 1.3580882570053444e-05, "loss": 0.633, "step": 197345 }, { "epoch": 2.19, "learning_rate": 1.3579959842914932e-05, "loss": 0.6444, "step": 197350 }, { "epoch": 2.19, "learning_rate": 1.3579037115776418e-05, "loss": 0.5896, "step": 197355 }, { "epoch": 2.19, "learning_rate": 1.3578114388637908e-05, "loss": 0.6091, "step": 197360 }, { "epoch": 2.19, "learning_rate": 1.3577191661499394e-05, "loss": 0.6157, "step": 197365 }, { "epoch": 2.19, "learning_rate": 1.3576268934360884e-05, "loss": 0.6001, "step": 197370 }, { "epoch": 2.19, "learning_rate": 1.357534620722237e-05, "loss": 0.618, "step": 197375 }, { "epoch": 2.19, "learning_rate": 1.357442348008386e-05, "loss": 0.6365, "step": 197380 }, { "epoch": 2.19, "learning_rate": 1.3573500752945345e-05, "loss": 0.5964, "step": 197385 }, { "epoch": 2.19, "learning_rate": 1.3572578025806835e-05, "loss": 0.5822, "step": 197390 }, { "epoch": 2.19, "learning_rate": 1.3571655298668321e-05, "loss": 0.5718, "step": 197395 }, { "epoch": 2.19, "learning_rate": 1.3570732571529809e-05, "loss": 0.636, "step": 197400 }, { "epoch": 2.19, "learning_rate": 1.3569809844391295e-05, "loss": 0.6117, "step": 197405 }, { "epoch": 2.19, "learning_rate": 1.3568887117252785e-05, "loss": 0.5762, "step": 197410 }, { "epoch": 2.19, "learning_rate": 1.356796439011427e-05, "loss": 0.589, "step": 197415 }, { "epoch": 2.19, "learning_rate": 1.3567041662975757e-05, "loss": 0.6164, "step": 197420 }, { "epoch": 2.19, "learning_rate": 1.3566118935837247e-05, "loss": 0.6911, "step": 197425 }, { "epoch": 2.19, "learning_rate": 1.3565196208698733e-05, "loss": 0.6192, "step": 197430 }, { "epoch": 2.19, "learning_rate": 1.3564273481560222e-05, "loss": 0.6226, "step": 197435 }, { "epoch": 2.19, "learning_rate": 1.3563350754421708e-05, "loss": 0.5795, "step": 197440 }, { "epoch": 2.19, "learning_rate": 1.3562428027283198e-05, "loss": 0.5948, "step": 197445 }, { "epoch": 2.19, "learning_rate": 1.3561505300144684e-05, "loss": 0.5726, "step": 197450 }, { "epoch": 2.19, "learning_rate": 1.3560582573006172e-05, "loss": 0.5969, "step": 197455 }, { "epoch": 2.19, "learning_rate": 1.3559659845867658e-05, "loss": 0.5959, "step": 197460 }, { "epoch": 2.19, "learning_rate": 1.3558737118729148e-05, "loss": 0.5883, "step": 197465 }, { "epoch": 2.19, "learning_rate": 1.3557814391590634e-05, "loss": 0.6019, "step": 197470 }, { "epoch": 2.19, "learning_rate": 1.3556891664452123e-05, "loss": 0.634, "step": 197475 }, { "epoch": 2.19, "learning_rate": 1.355596893731361e-05, "loss": 0.6132, "step": 197480 }, { "epoch": 2.19, "learning_rate": 1.3555046210175099e-05, "loss": 0.5656, "step": 197485 }, { "epoch": 2.19, "learning_rate": 1.3554123483036585e-05, "loss": 0.6556, "step": 197490 }, { "epoch": 2.19, "learning_rate": 1.3553200755898071e-05, "loss": 0.5944, "step": 197495 }, { "epoch": 2.19, "learning_rate": 1.3552278028759561e-05, "loss": 0.6359, "step": 197500 }, { "epoch": 2.19, "learning_rate": 1.3551355301621047e-05, "loss": 0.5909, "step": 197505 }, { "epoch": 2.19, "learning_rate": 1.3550432574482535e-05, "loss": 0.6094, "step": 197510 }, { "epoch": 2.19, "learning_rate": 1.3549509847344021e-05, "loss": 0.6128, "step": 197515 }, { "epoch": 2.19, "learning_rate": 1.354858712020551e-05, "loss": 0.6163, "step": 197520 }, { "epoch": 2.19, "learning_rate": 1.3547664393066997e-05, "loss": 0.5713, "step": 197525 }, { "epoch": 2.19, "learning_rate": 1.3546741665928486e-05, "loss": 0.5571, "step": 197530 }, { "epoch": 2.19, "learning_rate": 1.3545818938789972e-05, "loss": 0.5859, "step": 197535 }, { "epoch": 2.19, "learning_rate": 1.3544896211651462e-05, "loss": 0.5847, "step": 197540 }, { "epoch": 2.19, "learning_rate": 1.3543973484512948e-05, "loss": 0.581, "step": 197545 }, { "epoch": 2.19, "learning_rate": 1.3543050757374438e-05, "loss": 0.5703, "step": 197550 }, { "epoch": 2.19, "learning_rate": 1.3542128030235924e-05, "loss": 0.6462, "step": 197555 }, { "epoch": 2.19, "learning_rate": 1.3541205303097412e-05, "loss": 0.5931, "step": 197560 }, { "epoch": 2.19, "learning_rate": 1.3540282575958898e-05, "loss": 0.5645, "step": 197565 }, { "epoch": 2.19, "learning_rate": 1.3539359848820387e-05, "loss": 0.6385, "step": 197570 }, { "epoch": 2.19, "learning_rate": 1.3538437121681874e-05, "loss": 0.5648, "step": 197575 }, { "epoch": 2.19, "learning_rate": 1.353751439454336e-05, "loss": 0.6388, "step": 197580 }, { "epoch": 2.19, "learning_rate": 1.353659166740485e-05, "loss": 0.6475, "step": 197585 }, { "epoch": 2.19, "learning_rate": 1.3535668940266335e-05, "loss": 0.6083, "step": 197590 }, { "epoch": 2.19, "learning_rate": 1.3534746213127825e-05, "loss": 0.5853, "step": 197595 }, { "epoch": 2.19, "learning_rate": 1.3533823485989311e-05, "loss": 0.6007, "step": 197600 }, { "epoch": 2.19, "learning_rate": 1.35329007588508e-05, "loss": 0.6357, "step": 197605 }, { "epoch": 2.19, "learning_rate": 1.3531978031712287e-05, "loss": 0.5778, "step": 197610 }, { "epoch": 2.19, "learning_rate": 1.3531055304573775e-05, "loss": 0.5186, "step": 197615 }, { "epoch": 2.19, "learning_rate": 1.3530132577435262e-05, "loss": 0.6102, "step": 197620 }, { "epoch": 2.19, "learning_rate": 1.352920985029675e-05, "loss": 0.6322, "step": 197625 }, { "epoch": 2.19, "learning_rate": 1.3528287123158236e-05, "loss": 0.5827, "step": 197630 }, { "epoch": 2.19, "learning_rate": 1.3527364396019726e-05, "loss": 0.6057, "step": 197635 }, { "epoch": 2.19, "learning_rate": 1.3526441668881212e-05, "loss": 0.6155, "step": 197640 }, { "epoch": 2.19, "learning_rate": 1.3525518941742702e-05, "loss": 0.5979, "step": 197645 }, { "epoch": 2.19, "learning_rate": 1.3524596214604188e-05, "loss": 0.625, "step": 197650 }, { "epoch": 2.19, "learning_rate": 1.3523673487465674e-05, "loss": 0.6016, "step": 197655 }, { "epoch": 2.19, "learning_rate": 1.3522750760327164e-05, "loss": 0.6295, "step": 197660 }, { "epoch": 2.19, "learning_rate": 1.352182803318865e-05, "loss": 0.5419, "step": 197665 }, { "epoch": 2.19, "learning_rate": 1.3520905306050138e-05, "loss": 0.6421, "step": 197670 }, { "epoch": 2.19, "learning_rate": 1.3519982578911625e-05, "loss": 0.5582, "step": 197675 }, { "epoch": 2.19, "learning_rate": 1.3519059851773113e-05, "loss": 0.5475, "step": 197680 }, { "epoch": 2.19, "learning_rate": 1.35181371246346e-05, "loss": 0.6231, "step": 197685 }, { "epoch": 2.19, "learning_rate": 1.3517214397496089e-05, "loss": 0.5379, "step": 197690 }, { "epoch": 2.19, "learning_rate": 1.3516291670357575e-05, "loss": 0.6051, "step": 197695 }, { "epoch": 2.19, "learning_rate": 1.3515368943219065e-05, "loss": 0.6084, "step": 197700 }, { "epoch": 2.19, "learning_rate": 1.351444621608055e-05, "loss": 0.6374, "step": 197705 }, { "epoch": 2.19, "learning_rate": 1.351352348894204e-05, "loss": 0.5982, "step": 197710 }, { "epoch": 2.19, "learning_rate": 1.3512600761803527e-05, "loss": 0.6797, "step": 197715 }, { "epoch": 2.19, "learning_rate": 1.3511678034665014e-05, "loss": 0.5948, "step": 197720 }, { "epoch": 2.19, "learning_rate": 1.3510755307526502e-05, "loss": 0.6441, "step": 197725 }, { "epoch": 2.19, "learning_rate": 1.3509832580387988e-05, "loss": 0.5905, "step": 197730 }, { "epoch": 2.19, "learning_rate": 1.3508909853249476e-05, "loss": 0.6455, "step": 197735 }, { "epoch": 2.19, "learning_rate": 1.3507987126110962e-05, "loss": 0.5678, "step": 197740 }, { "epoch": 2.19, "learning_rate": 1.3507064398972452e-05, "loss": 0.5846, "step": 197745 }, { "epoch": 2.19, "learning_rate": 1.3506141671833938e-05, "loss": 0.5991, "step": 197750 }, { "epoch": 2.19, "learning_rate": 1.3505218944695428e-05, "loss": 0.5666, "step": 197755 }, { "epoch": 2.19, "learning_rate": 1.3504296217556914e-05, "loss": 0.5971, "step": 197760 }, { "epoch": 2.19, "learning_rate": 1.3503373490418403e-05, "loss": 0.5988, "step": 197765 }, { "epoch": 2.19, "learning_rate": 1.350245076327989e-05, "loss": 0.6498, "step": 197770 }, { "epoch": 2.19, "learning_rate": 1.3501528036141379e-05, "loss": 0.6192, "step": 197775 }, { "epoch": 2.19, "learning_rate": 1.3500605309002865e-05, "loss": 0.5313, "step": 197780 }, { "epoch": 2.19, "learning_rate": 1.3499682581864353e-05, "loss": 0.6041, "step": 197785 }, { "epoch": 2.19, "learning_rate": 1.3498759854725839e-05, "loss": 0.6558, "step": 197790 }, { "epoch": 2.19, "learning_rate": 1.3497837127587329e-05, "loss": 0.61, "step": 197795 }, { "epoch": 2.19, "learning_rate": 1.3496914400448815e-05, "loss": 0.5777, "step": 197800 }, { "epoch": 2.19, "learning_rate": 1.3495991673310301e-05, "loss": 0.6313, "step": 197805 }, { "epoch": 2.19, "learning_rate": 1.349506894617179e-05, "loss": 0.596, "step": 197810 }, { "epoch": 2.19, "learning_rate": 1.3494146219033277e-05, "loss": 0.5513, "step": 197815 }, { "epoch": 2.19, "learning_rate": 1.3493223491894766e-05, "loss": 0.6277, "step": 197820 }, { "epoch": 2.19, "learning_rate": 1.3492300764756252e-05, "loss": 0.5569, "step": 197825 }, { "epoch": 2.19, "learning_rate": 1.3491378037617742e-05, "loss": 0.6284, "step": 197830 }, { "epoch": 2.19, "learning_rate": 1.3490455310479228e-05, "loss": 0.592, "step": 197835 }, { "epoch": 2.19, "learning_rate": 1.3489532583340716e-05, "loss": 0.5931, "step": 197840 }, { "epoch": 2.19, "learning_rate": 1.3488609856202202e-05, "loss": 0.6059, "step": 197845 }, { "epoch": 2.19, "learning_rate": 1.3487687129063692e-05, "loss": 0.618, "step": 197850 }, { "epoch": 2.19, "learning_rate": 1.3486764401925178e-05, "loss": 0.5927, "step": 197855 }, { "epoch": 2.19, "learning_rate": 1.3485841674786667e-05, "loss": 0.6546, "step": 197860 }, { "epoch": 2.19, "learning_rate": 1.3484918947648153e-05, "loss": 0.5508, "step": 197865 }, { "epoch": 2.19, "learning_rate": 1.3483996220509643e-05, "loss": 0.6032, "step": 197870 }, { "epoch": 2.19, "learning_rate": 1.348307349337113e-05, "loss": 0.5637, "step": 197875 }, { "epoch": 2.19, "learning_rate": 1.3482150766232615e-05, "loss": 0.5941, "step": 197880 }, { "epoch": 2.19, "learning_rate": 1.3481228039094105e-05, "loss": 0.5998, "step": 197885 }, { "epoch": 2.19, "learning_rate": 1.3480305311955591e-05, "loss": 0.5666, "step": 197890 }, { "epoch": 2.19, "learning_rate": 1.3479382584817079e-05, "loss": 0.6101, "step": 197895 }, { "epoch": 2.19, "learning_rate": 1.3478459857678567e-05, "loss": 0.6069, "step": 197900 }, { "epoch": 2.19, "learning_rate": 1.3477537130540055e-05, "loss": 0.5527, "step": 197905 }, { "epoch": 2.19, "learning_rate": 1.347661440340154e-05, "loss": 0.5577, "step": 197910 }, { "epoch": 2.19, "learning_rate": 1.347569167626303e-05, "loss": 0.6339, "step": 197915 }, { "epoch": 2.19, "learning_rate": 1.3474768949124516e-05, "loss": 0.592, "step": 197920 }, { "epoch": 2.19, "learning_rate": 1.3473846221986006e-05, "loss": 0.6272, "step": 197925 }, { "epoch": 2.19, "learning_rate": 1.3472923494847492e-05, "loss": 0.6029, "step": 197930 }, { "epoch": 2.19, "learning_rate": 1.3472000767708982e-05, "loss": 0.6146, "step": 197935 }, { "epoch": 2.19, "learning_rate": 1.3471078040570468e-05, "loss": 0.5877, "step": 197940 }, { "epoch": 2.19, "learning_rate": 1.3470155313431956e-05, "loss": 0.612, "step": 197945 }, { "epoch": 2.19, "learning_rate": 1.3469232586293442e-05, "loss": 0.6219, "step": 197950 }, { "epoch": 2.19, "learning_rate": 1.346830985915493e-05, "loss": 0.587, "step": 197955 }, { "epoch": 2.19, "learning_rate": 1.3467387132016418e-05, "loss": 0.5476, "step": 197960 }, { "epoch": 2.19, "learning_rate": 1.3466464404877904e-05, "loss": 0.5798, "step": 197965 }, { "epoch": 2.19, "learning_rate": 1.3465541677739393e-05, "loss": 0.5666, "step": 197970 }, { "epoch": 2.19, "learning_rate": 1.346461895060088e-05, "loss": 0.5846, "step": 197975 }, { "epoch": 2.19, "learning_rate": 1.3463696223462369e-05, "loss": 0.6474, "step": 197980 }, { "epoch": 2.19, "learning_rate": 1.3462773496323855e-05, "loss": 0.6189, "step": 197985 }, { "epoch": 2.19, "learning_rate": 1.3461850769185345e-05, "loss": 0.5922, "step": 197990 }, { "epoch": 2.19, "learning_rate": 1.346092804204683e-05, "loss": 0.623, "step": 197995 }, { "epoch": 2.19, "learning_rate": 1.3460005314908319e-05, "loss": 0.6055, "step": 198000 }, { "epoch": 2.19, "eval_loss": 0.5820457935333252, "eval_runtime": 69.4574, "eval_samples_per_second": 28.795, "eval_steps_per_second": 14.397, "step": 198000 }, { "epoch": 2.19, "learning_rate": 1.3459082587769806e-05, "loss": 0.5941, "step": 198005 }, { "epoch": 2.19, "learning_rate": 1.3458159860631294e-05, "loss": 0.6348, "step": 198010 }, { "epoch": 2.19, "learning_rate": 1.345723713349278e-05, "loss": 0.6167, "step": 198015 }, { "epoch": 2.19, "learning_rate": 1.345631440635427e-05, "loss": 0.5749, "step": 198020 }, { "epoch": 2.19, "learning_rate": 1.3455391679215756e-05, "loss": 0.644, "step": 198025 }, { "epoch": 2.19, "learning_rate": 1.3454468952077242e-05, "loss": 0.5878, "step": 198030 }, { "epoch": 2.19, "learning_rate": 1.3453546224938732e-05, "loss": 0.6319, "step": 198035 }, { "epoch": 2.19, "learning_rate": 1.3452623497800218e-05, "loss": 0.6046, "step": 198040 }, { "epoch": 2.19, "learning_rate": 1.3451700770661708e-05, "loss": 0.6208, "step": 198045 }, { "epoch": 2.19, "learning_rate": 1.3450778043523194e-05, "loss": 0.5847, "step": 198050 }, { "epoch": 2.19, "learning_rate": 1.3449855316384683e-05, "loss": 0.6157, "step": 198055 }, { "epoch": 2.19, "learning_rate": 1.344893258924617e-05, "loss": 0.5667, "step": 198060 }, { "epoch": 2.19, "learning_rate": 1.3448009862107657e-05, "loss": 0.606, "step": 198065 }, { "epoch": 2.19, "learning_rate": 1.3447087134969143e-05, "loss": 0.6163, "step": 198070 }, { "epoch": 2.19, "learning_rate": 1.3446164407830633e-05, "loss": 0.585, "step": 198075 }, { "epoch": 2.19, "learning_rate": 1.3445241680692119e-05, "loss": 0.5961, "step": 198080 }, { "epoch": 2.19, "learning_rate": 1.3444318953553609e-05, "loss": 0.5699, "step": 198085 }, { "epoch": 2.19, "learning_rate": 1.3443396226415095e-05, "loss": 0.5986, "step": 198090 }, { "epoch": 2.19, "learning_rate": 1.3442473499276584e-05, "loss": 0.576, "step": 198095 }, { "epoch": 2.19, "learning_rate": 1.344155077213807e-05, "loss": 0.6028, "step": 198100 }, { "epoch": 2.19, "learning_rate": 1.3440628044999557e-05, "loss": 0.6214, "step": 198105 }, { "epoch": 2.19, "learning_rate": 1.3439705317861046e-05, "loss": 0.5953, "step": 198110 }, { "epoch": 2.19, "learning_rate": 1.3438782590722532e-05, "loss": 0.6098, "step": 198115 }, { "epoch": 2.19, "learning_rate": 1.343785986358402e-05, "loss": 0.6716, "step": 198120 }, { "epoch": 2.19, "learning_rate": 1.3436937136445506e-05, "loss": 0.5887, "step": 198125 }, { "epoch": 2.19, "learning_rate": 1.3436014409306996e-05, "loss": 0.5942, "step": 198130 }, { "epoch": 2.19, "learning_rate": 1.3435091682168482e-05, "loss": 0.6455, "step": 198135 }, { "epoch": 2.19, "learning_rate": 1.3434168955029972e-05, "loss": 0.5837, "step": 198140 }, { "epoch": 2.19, "learning_rate": 1.3433246227891458e-05, "loss": 0.6207, "step": 198145 }, { "epoch": 2.19, "learning_rate": 1.3432323500752947e-05, "loss": 0.603, "step": 198150 }, { "epoch": 2.19, "learning_rate": 1.3431400773614433e-05, "loss": 0.6598, "step": 198155 }, { "epoch": 2.19, "learning_rate": 1.3430478046475923e-05, "loss": 0.5863, "step": 198160 }, { "epoch": 2.19, "learning_rate": 1.342955531933741e-05, "loss": 0.5523, "step": 198165 }, { "epoch": 2.19, "learning_rate": 1.3428632592198897e-05, "loss": 0.645, "step": 198170 }, { "epoch": 2.19, "learning_rate": 1.3427709865060383e-05, "loss": 0.5678, "step": 198175 }, { "epoch": 2.19, "learning_rate": 1.342678713792187e-05, "loss": 0.5818, "step": 198180 }, { "epoch": 2.19, "learning_rate": 1.3425864410783359e-05, "loss": 0.5855, "step": 198185 }, { "epoch": 2.19, "learning_rate": 1.3424941683644845e-05, "loss": 0.5996, "step": 198190 }, { "epoch": 2.19, "learning_rate": 1.3424018956506335e-05, "loss": 0.6155, "step": 198195 }, { "epoch": 2.19, "learning_rate": 1.342309622936782e-05, "loss": 0.5629, "step": 198200 }, { "epoch": 2.19, "learning_rate": 1.342217350222931e-05, "loss": 0.6098, "step": 198205 }, { "epoch": 2.19, "learning_rate": 1.3421250775090796e-05, "loss": 0.629, "step": 198210 }, { "epoch": 2.19, "learning_rate": 1.3420328047952286e-05, "loss": 0.6098, "step": 198215 }, { "epoch": 2.19, "learning_rate": 1.3419405320813772e-05, "loss": 0.5911, "step": 198220 }, { "epoch": 2.19, "learning_rate": 1.341848259367526e-05, "loss": 0.645, "step": 198225 }, { "epoch": 2.19, "learning_rate": 1.3417559866536746e-05, "loss": 0.6192, "step": 198230 }, { "epoch": 2.2, "learning_rate": 1.3416637139398236e-05, "loss": 0.5755, "step": 198235 }, { "epoch": 2.2, "learning_rate": 1.3415714412259722e-05, "loss": 0.6397, "step": 198240 }, { "epoch": 2.2, "learning_rate": 1.3414791685121211e-05, "loss": 0.5074, "step": 198245 }, { "epoch": 2.2, "learning_rate": 1.3413868957982698e-05, "loss": 0.6531, "step": 198250 }, { "epoch": 2.2, "learning_rate": 1.3412946230844184e-05, "loss": 0.5728, "step": 198255 }, { "epoch": 2.2, "learning_rate": 1.3412023503705673e-05, "loss": 0.5656, "step": 198260 }, { "epoch": 2.2, "learning_rate": 1.341110077656716e-05, "loss": 0.6312, "step": 198265 }, { "epoch": 2.2, "learning_rate": 1.3410178049428649e-05, "loss": 0.6039, "step": 198270 }, { "epoch": 2.2, "learning_rate": 1.3409255322290135e-05, "loss": 0.6073, "step": 198275 }, { "epoch": 2.2, "learning_rate": 1.3408332595151623e-05, "loss": 0.558, "step": 198280 }, { "epoch": 2.2, "learning_rate": 1.340740986801311e-05, "loss": 0.604, "step": 198285 }, { "epoch": 2.2, "learning_rate": 1.3406487140874599e-05, "loss": 0.6118, "step": 198290 }, { "epoch": 2.2, "learning_rate": 1.3405564413736085e-05, "loss": 0.5942, "step": 198295 }, { "epoch": 2.2, "learning_rate": 1.3404641686597574e-05, "loss": 0.5641, "step": 198300 }, { "epoch": 2.2, "learning_rate": 1.340371895945906e-05, "loss": 0.589, "step": 198305 }, { "epoch": 2.2, "learning_rate": 1.340279623232055e-05, "loss": 0.568, "step": 198310 }, { "epoch": 2.2, "learning_rate": 1.3401873505182036e-05, "loss": 0.5536, "step": 198315 }, { "epoch": 2.2, "learning_rate": 1.3400950778043526e-05, "loss": 0.623, "step": 198320 }, { "epoch": 2.2, "learning_rate": 1.3400028050905012e-05, "loss": 0.6092, "step": 198325 }, { "epoch": 2.2, "learning_rate": 1.3399105323766498e-05, "loss": 0.6118, "step": 198330 }, { "epoch": 2.2, "learning_rate": 1.3398182596627986e-05, "loss": 0.5567, "step": 198335 }, { "epoch": 2.2, "learning_rate": 1.3397259869489474e-05, "loss": 0.5639, "step": 198340 }, { "epoch": 2.2, "learning_rate": 1.3396337142350962e-05, "loss": 0.585, "step": 198345 }, { "epoch": 2.2, "learning_rate": 1.3395414415212448e-05, "loss": 0.5899, "step": 198350 }, { "epoch": 2.2, "learning_rate": 1.3394491688073937e-05, "loss": 0.6388, "step": 198355 }, { "epoch": 2.2, "learning_rate": 1.3393568960935423e-05, "loss": 0.5382, "step": 198360 }, { "epoch": 2.2, "learning_rate": 1.3392646233796913e-05, "loss": 0.5807, "step": 198365 }, { "epoch": 2.2, "learning_rate": 1.3391723506658399e-05, "loss": 0.6469, "step": 198370 }, { "epoch": 2.2, "learning_rate": 1.3390800779519889e-05, "loss": 0.5861, "step": 198375 }, { "epoch": 2.2, "learning_rate": 1.3389878052381375e-05, "loss": 0.5938, "step": 198380 }, { "epoch": 2.2, "learning_rate": 1.3388955325242863e-05, "loss": 0.5655, "step": 198385 }, { "epoch": 2.2, "learning_rate": 1.338803259810435e-05, "loss": 0.5919, "step": 198390 }, { "epoch": 2.2, "learning_rate": 1.3387109870965838e-05, "loss": 0.5962, "step": 198395 }, { "epoch": 2.2, "learning_rate": 1.3386187143827325e-05, "loss": 0.5883, "step": 198400 }, { "epoch": 2.2, "learning_rate": 1.3385264416688814e-05, "loss": 0.6292, "step": 198405 }, { "epoch": 2.2, "learning_rate": 1.33843416895503e-05, "loss": 0.5529, "step": 198410 }, { "epoch": 2.2, "learning_rate": 1.3383418962411786e-05, "loss": 0.5904, "step": 198415 }, { "epoch": 2.2, "learning_rate": 1.3382496235273276e-05, "loss": 0.6056, "step": 198420 }, { "epoch": 2.2, "learning_rate": 1.3381573508134762e-05, "loss": 0.6217, "step": 198425 }, { "epoch": 2.2, "learning_rate": 1.3380650780996252e-05, "loss": 0.5534, "step": 198430 }, { "epoch": 2.2, "learning_rate": 1.3379728053857738e-05, "loss": 0.584, "step": 198435 }, { "epoch": 2.2, "learning_rate": 1.3378805326719227e-05, "loss": 0.5986, "step": 198440 }, { "epoch": 2.2, "learning_rate": 1.3377882599580713e-05, "loss": 0.6692, "step": 198445 }, { "epoch": 2.2, "learning_rate": 1.3376959872442201e-05, "loss": 0.6022, "step": 198450 }, { "epoch": 2.2, "learning_rate": 1.3376037145303687e-05, "loss": 0.567, "step": 198455 }, { "epoch": 2.2, "learning_rate": 1.3375114418165177e-05, "loss": 0.6128, "step": 198460 }, { "epoch": 2.2, "learning_rate": 1.3374191691026663e-05, "loss": 0.5616, "step": 198465 }, { "epoch": 2.2, "learning_rate": 1.3373268963888153e-05, "loss": 0.5926, "step": 198470 }, { "epoch": 2.2, "learning_rate": 1.3372346236749639e-05, "loss": 0.5869, "step": 198475 }, { "epoch": 2.2, "learning_rate": 1.3371423509611128e-05, "loss": 0.6205, "step": 198480 }, { "epoch": 2.2, "learning_rate": 1.3370500782472615e-05, "loss": 0.632, "step": 198485 }, { "epoch": 2.2, "learning_rate": 1.33695780553341e-05, "loss": 0.6021, "step": 198490 }, { "epoch": 2.2, "learning_rate": 1.336865532819559e-05, "loss": 0.5996, "step": 198495 }, { "epoch": 2.2, "learning_rate": 1.3367732601057076e-05, "loss": 0.5976, "step": 198500 }, { "epoch": 2.2, "learning_rate": 1.3366809873918564e-05, "loss": 0.6566, "step": 198505 }, { "epoch": 2.2, "learning_rate": 1.336588714678005e-05, "loss": 0.5622, "step": 198510 }, { "epoch": 2.2, "learning_rate": 1.336496441964154e-05, "loss": 0.5593, "step": 198515 }, { "epoch": 2.2, "learning_rate": 1.3364041692503026e-05, "loss": 0.6388, "step": 198520 }, { "epoch": 2.2, "learning_rate": 1.3363118965364516e-05, "loss": 0.6281, "step": 198525 }, { "epoch": 2.2, "learning_rate": 1.3362196238226002e-05, "loss": 0.5184, "step": 198530 }, { "epoch": 2.2, "learning_rate": 1.3361273511087491e-05, "loss": 0.5673, "step": 198535 }, { "epoch": 2.2, "learning_rate": 1.3360350783948977e-05, "loss": 0.5746, "step": 198540 }, { "epoch": 2.2, "learning_rate": 1.3359428056810467e-05, "loss": 0.5963, "step": 198545 }, { "epoch": 2.2, "learning_rate": 1.3358505329671953e-05, "loss": 0.6265, "step": 198550 }, { "epoch": 2.2, "learning_rate": 1.3357582602533441e-05, "loss": 0.5865, "step": 198555 }, { "epoch": 2.2, "learning_rate": 1.3356659875394927e-05, "loss": 0.534, "step": 198560 }, { "epoch": 2.2, "learning_rate": 1.3355737148256415e-05, "loss": 0.6241, "step": 198565 }, { "epoch": 2.2, "learning_rate": 1.3354814421117903e-05, "loss": 0.5979, "step": 198570 }, { "epoch": 2.2, "learning_rate": 1.3353891693979389e-05, "loss": 0.6066, "step": 198575 }, { "epoch": 2.2, "learning_rate": 1.3352968966840879e-05, "loss": 0.5539, "step": 198580 }, { "epoch": 2.2, "learning_rate": 1.3352046239702365e-05, "loss": 0.5715, "step": 198585 }, { "epoch": 2.2, "learning_rate": 1.3351123512563854e-05, "loss": 0.5883, "step": 198590 }, { "epoch": 2.2, "learning_rate": 1.335020078542534e-05, "loss": 0.5881, "step": 198595 }, { "epoch": 2.2, "learning_rate": 1.334927805828683e-05, "loss": 0.6098, "step": 198600 }, { "epoch": 2.2, "learning_rate": 1.3348355331148316e-05, "loss": 0.57, "step": 198605 }, { "epoch": 2.2, "learning_rate": 1.3347432604009804e-05, "loss": 0.6419, "step": 198610 }, { "epoch": 2.2, "learning_rate": 1.334650987687129e-05, "loss": 0.5852, "step": 198615 }, { "epoch": 2.2, "learning_rate": 1.334558714973278e-05, "loss": 0.5021, "step": 198620 }, { "epoch": 2.2, "learning_rate": 1.3344664422594266e-05, "loss": 0.5626, "step": 198625 }, { "epoch": 2.2, "learning_rate": 1.3343741695455755e-05, "loss": 0.5992, "step": 198630 }, { "epoch": 2.2, "learning_rate": 1.3342818968317242e-05, "loss": 0.6264, "step": 198635 }, { "epoch": 2.2, "learning_rate": 1.3341896241178728e-05, "loss": 0.6614, "step": 198640 }, { "epoch": 2.2, "learning_rate": 1.3340973514040217e-05, "loss": 0.5994, "step": 198645 }, { "epoch": 2.2, "learning_rate": 1.3340050786901703e-05, "loss": 0.5718, "step": 198650 }, { "epoch": 2.2, "learning_rate": 1.3339128059763193e-05, "loss": 0.5291, "step": 198655 }, { "epoch": 2.2, "learning_rate": 1.3338205332624679e-05, "loss": 0.5909, "step": 198660 }, { "epoch": 2.2, "learning_rate": 1.3337282605486167e-05, "loss": 0.5855, "step": 198665 }, { "epoch": 2.2, "learning_rate": 1.3336359878347655e-05, "loss": 0.6241, "step": 198670 }, { "epoch": 2.2, "learning_rate": 1.3335437151209143e-05, "loss": 0.6438, "step": 198675 }, { "epoch": 2.2, "learning_rate": 1.3334514424070629e-05, "loss": 0.5858, "step": 198680 }, { "epoch": 2.2, "learning_rate": 1.3333591696932118e-05, "loss": 0.6369, "step": 198685 }, { "epoch": 2.2, "learning_rate": 1.3332668969793604e-05, "loss": 0.6037, "step": 198690 }, { "epoch": 2.2, "learning_rate": 1.3331746242655094e-05, "loss": 0.5738, "step": 198695 }, { "epoch": 2.2, "learning_rate": 1.333082351551658e-05, "loss": 0.6248, "step": 198700 }, { "epoch": 2.2, "learning_rate": 1.332990078837807e-05, "loss": 0.5558, "step": 198705 }, { "epoch": 2.2, "learning_rate": 1.3328978061239556e-05, "loss": 0.5946, "step": 198710 }, { "epoch": 2.2, "learning_rate": 1.3328055334101042e-05, "loss": 0.5456, "step": 198715 }, { "epoch": 2.2, "learning_rate": 1.3327132606962532e-05, "loss": 0.5778, "step": 198720 }, { "epoch": 2.2, "learning_rate": 1.3326209879824018e-05, "loss": 0.6292, "step": 198725 }, { "epoch": 2.2, "learning_rate": 1.3325287152685506e-05, "loss": 0.5563, "step": 198730 }, { "epoch": 2.2, "learning_rate": 1.3324364425546992e-05, "loss": 0.5691, "step": 198735 }, { "epoch": 2.2, "learning_rate": 1.3323441698408481e-05, "loss": 0.5943, "step": 198740 }, { "epoch": 2.2, "learning_rate": 1.3322518971269967e-05, "loss": 0.6455, "step": 198745 }, { "epoch": 2.2, "learning_rate": 1.3321596244131457e-05, "loss": 0.6025, "step": 198750 }, { "epoch": 2.2, "learning_rate": 1.3320673516992943e-05, "loss": 0.6484, "step": 198755 }, { "epoch": 2.2, "learning_rate": 1.3319750789854433e-05, "loss": 0.6106, "step": 198760 }, { "epoch": 2.2, "learning_rate": 1.3318828062715919e-05, "loss": 0.6155, "step": 198765 }, { "epoch": 2.2, "learning_rate": 1.3317905335577407e-05, "loss": 0.6152, "step": 198770 }, { "epoch": 2.2, "learning_rate": 1.3316982608438895e-05, "loss": 0.668, "step": 198775 }, { "epoch": 2.2, "learning_rate": 1.3316059881300382e-05, "loss": 0.6263, "step": 198780 }, { "epoch": 2.2, "learning_rate": 1.3315137154161869e-05, "loss": 0.6001, "step": 198785 }, { "epoch": 2.2, "learning_rate": 1.3314214427023355e-05, "loss": 0.5824, "step": 198790 }, { "epoch": 2.2, "learning_rate": 1.3313291699884844e-05, "loss": 0.5753, "step": 198795 }, { "epoch": 2.2, "learning_rate": 1.331236897274633e-05, "loss": 0.5246, "step": 198800 }, { "epoch": 2.2, "learning_rate": 1.331144624560782e-05, "loss": 0.618, "step": 198805 }, { "epoch": 2.2, "learning_rate": 1.3310523518469306e-05, "loss": 0.5949, "step": 198810 }, { "epoch": 2.2, "learning_rate": 1.3309600791330796e-05, "loss": 0.6084, "step": 198815 }, { "epoch": 2.2, "learning_rate": 1.3308678064192282e-05, "loss": 0.5993, "step": 198820 }, { "epoch": 2.2, "learning_rate": 1.3307755337053771e-05, "loss": 0.5931, "step": 198825 }, { "epoch": 2.2, "learning_rate": 1.3306832609915257e-05, "loss": 0.5796, "step": 198830 }, { "epoch": 2.2, "learning_rate": 1.3305909882776745e-05, "loss": 0.5631, "step": 198835 }, { "epoch": 2.2, "learning_rate": 1.3304987155638231e-05, "loss": 0.5576, "step": 198840 }, { "epoch": 2.2, "learning_rate": 1.3304064428499721e-05, "loss": 0.6569, "step": 198845 }, { "epoch": 2.2, "learning_rate": 1.3303141701361207e-05, "loss": 0.5255, "step": 198850 }, { "epoch": 2.2, "learning_rate": 1.3302218974222697e-05, "loss": 0.5976, "step": 198855 }, { "epoch": 2.2, "learning_rate": 1.3301296247084183e-05, "loss": 0.5985, "step": 198860 }, { "epoch": 2.2, "learning_rate": 1.3300373519945669e-05, "loss": 0.5944, "step": 198865 }, { "epoch": 2.2, "learning_rate": 1.3299450792807159e-05, "loss": 0.6014, "step": 198870 }, { "epoch": 2.2, "learning_rate": 1.3298528065668645e-05, "loss": 0.6449, "step": 198875 }, { "epoch": 2.2, "learning_rate": 1.3297605338530134e-05, "loss": 0.6489, "step": 198880 }, { "epoch": 2.2, "learning_rate": 1.329668261139162e-05, "loss": 0.5804, "step": 198885 }, { "epoch": 2.2, "learning_rate": 1.3295759884253108e-05, "loss": 0.5789, "step": 198890 }, { "epoch": 2.2, "learning_rate": 1.3294837157114594e-05, "loss": 0.5609, "step": 198895 }, { "epoch": 2.2, "learning_rate": 1.3293914429976084e-05, "loss": 0.6185, "step": 198900 }, { "epoch": 2.2, "learning_rate": 1.329299170283757e-05, "loss": 0.5637, "step": 198905 }, { "epoch": 2.2, "learning_rate": 1.329206897569906e-05, "loss": 0.6208, "step": 198910 }, { "epoch": 2.2, "learning_rate": 1.3291146248560546e-05, "loss": 0.6167, "step": 198915 }, { "epoch": 2.2, "learning_rate": 1.3290223521422035e-05, "loss": 0.5552, "step": 198920 }, { "epoch": 2.2, "learning_rate": 1.3289300794283522e-05, "loss": 0.5693, "step": 198925 }, { "epoch": 2.2, "learning_rate": 1.3288378067145011e-05, "loss": 0.6114, "step": 198930 }, { "epoch": 2.2, "learning_rate": 1.3287455340006497e-05, "loss": 0.6107, "step": 198935 }, { "epoch": 2.2, "learning_rate": 1.3286532612867983e-05, "loss": 0.601, "step": 198940 }, { "epoch": 2.2, "learning_rate": 1.3285609885729471e-05, "loss": 0.5839, "step": 198945 }, { "epoch": 2.2, "learning_rate": 1.3284687158590959e-05, "loss": 0.5631, "step": 198950 }, { "epoch": 2.2, "learning_rate": 1.3283764431452447e-05, "loss": 0.6448, "step": 198955 }, { "epoch": 2.2, "learning_rate": 1.3282841704313933e-05, "loss": 0.5826, "step": 198960 }, { "epoch": 2.2, "learning_rate": 1.3281918977175423e-05, "loss": 0.6033, "step": 198965 }, { "epoch": 2.2, "learning_rate": 1.3280996250036909e-05, "loss": 0.6519, "step": 198970 }, { "epoch": 2.2, "learning_rate": 1.3280073522898398e-05, "loss": 0.622, "step": 198975 }, { "epoch": 2.2, "learning_rate": 1.3279150795759884e-05, "loss": 0.6006, "step": 198980 }, { "epoch": 2.2, "learning_rate": 1.3278228068621374e-05, "loss": 0.5793, "step": 198985 }, { "epoch": 2.2, "learning_rate": 1.327730534148286e-05, "loss": 0.6256, "step": 198990 }, { "epoch": 2.2, "learning_rate": 1.3276382614344348e-05, "loss": 0.6341, "step": 198995 }, { "epoch": 2.2, "learning_rate": 1.3275459887205834e-05, "loss": 0.6067, "step": 199000 }, { "epoch": 2.2, "eval_loss": 0.5801089406013489, "eval_runtime": 69.3581, "eval_samples_per_second": 28.836, "eval_steps_per_second": 14.418, "step": 199000 }, { "epoch": 2.2, "learning_rate": 1.3274537160067324e-05, "loss": 0.5422, "step": 199005 }, { "epoch": 2.2, "learning_rate": 1.327361443292881e-05, "loss": 0.645, "step": 199010 }, { "epoch": 2.2, "learning_rate": 1.3272691705790296e-05, "loss": 0.5854, "step": 199015 }, { "epoch": 2.2, "learning_rate": 1.3271768978651786e-05, "loss": 0.5763, "step": 199020 }, { "epoch": 2.2, "learning_rate": 1.3270846251513272e-05, "loss": 0.6352, "step": 199025 }, { "epoch": 2.2, "learning_rate": 1.3269923524374761e-05, "loss": 0.6042, "step": 199030 }, { "epoch": 2.2, "learning_rate": 1.3269000797236247e-05, "loss": 0.5973, "step": 199035 }, { "epoch": 2.2, "learning_rate": 1.3268078070097737e-05, "loss": 0.5815, "step": 199040 }, { "epoch": 2.2, "learning_rate": 1.3267155342959223e-05, "loss": 0.5669, "step": 199045 }, { "epoch": 2.2, "learning_rate": 1.3266232615820711e-05, "loss": 0.5815, "step": 199050 }, { "epoch": 2.2, "learning_rate": 1.3265309888682199e-05, "loss": 0.5859, "step": 199055 }, { "epoch": 2.2, "learning_rate": 1.3264387161543687e-05, "loss": 0.5541, "step": 199060 }, { "epoch": 2.2, "learning_rate": 1.3263464434405173e-05, "loss": 0.6059, "step": 199065 }, { "epoch": 2.2, "learning_rate": 1.3262541707266662e-05, "loss": 0.6115, "step": 199070 }, { "epoch": 2.2, "learning_rate": 1.3261618980128148e-05, "loss": 0.5784, "step": 199075 }, { "epoch": 2.2, "learning_rate": 1.3260696252989638e-05, "loss": 0.5529, "step": 199080 }, { "epoch": 2.2, "learning_rate": 1.3259773525851124e-05, "loss": 0.6137, "step": 199085 }, { "epoch": 2.2, "learning_rate": 1.325885079871261e-05, "loss": 0.6241, "step": 199090 }, { "epoch": 2.2, "learning_rate": 1.32579280715741e-05, "loss": 0.618, "step": 199095 }, { "epoch": 2.2, "learning_rate": 1.3257005344435586e-05, "loss": 0.5958, "step": 199100 }, { "epoch": 2.2, "learning_rate": 1.3256082617297076e-05, "loss": 0.6167, "step": 199105 }, { "epoch": 2.2, "learning_rate": 1.3255159890158562e-05, "loss": 0.6024, "step": 199110 }, { "epoch": 2.2, "learning_rate": 1.325423716302005e-05, "loss": 0.6561, "step": 199115 }, { "epoch": 2.2, "learning_rate": 1.3253314435881536e-05, "loss": 0.6144, "step": 199120 }, { "epoch": 2.2, "learning_rate": 1.3252391708743025e-05, "loss": 0.6122, "step": 199125 }, { "epoch": 2.2, "learning_rate": 1.3251468981604511e-05, "loss": 0.6149, "step": 199130 }, { "epoch": 2.2, "learning_rate": 1.3250546254466001e-05, "loss": 0.6181, "step": 199135 }, { "epoch": 2.21, "learning_rate": 1.3249623527327487e-05, "loss": 0.6597, "step": 199140 }, { "epoch": 2.21, "learning_rate": 1.3248700800188977e-05, "loss": 0.5776, "step": 199145 }, { "epoch": 2.21, "learning_rate": 1.3247778073050463e-05, "loss": 0.653, "step": 199150 }, { "epoch": 2.21, "learning_rate": 1.3246855345911952e-05, "loss": 0.6086, "step": 199155 }, { "epoch": 2.21, "learning_rate": 1.3245932618773439e-05, "loss": 0.641, "step": 199160 }, { "epoch": 2.21, "learning_rate": 1.3245009891634925e-05, "loss": 0.585, "step": 199165 }, { "epoch": 2.21, "learning_rate": 1.3244087164496413e-05, "loss": 0.5691, "step": 199170 }, { "epoch": 2.21, "learning_rate": 1.3243164437357899e-05, "loss": 0.61, "step": 199175 }, { "epoch": 2.21, "learning_rate": 1.3242241710219388e-05, "loss": 0.5667, "step": 199180 }, { "epoch": 2.21, "learning_rate": 1.3241318983080874e-05, "loss": 0.6245, "step": 199185 }, { "epoch": 2.21, "learning_rate": 1.3240396255942364e-05, "loss": 0.6198, "step": 199190 }, { "epoch": 2.21, "learning_rate": 1.323947352880385e-05, "loss": 0.5532, "step": 199195 }, { "epoch": 2.21, "learning_rate": 1.323855080166534e-05, "loss": 0.5793, "step": 199200 }, { "epoch": 2.21, "learning_rate": 1.3237628074526826e-05, "loss": 0.5747, "step": 199205 }, { "epoch": 2.21, "learning_rate": 1.3236705347388315e-05, "loss": 0.6296, "step": 199210 }, { "epoch": 2.21, "learning_rate": 1.3235782620249801e-05, "loss": 0.5943, "step": 199215 }, { "epoch": 2.21, "learning_rate": 1.323485989311129e-05, "loss": 0.601, "step": 199220 }, { "epoch": 2.21, "learning_rate": 1.3233937165972775e-05, "loss": 0.5689, "step": 199225 }, { "epoch": 2.21, "learning_rate": 1.3233014438834265e-05, "loss": 0.6176, "step": 199230 }, { "epoch": 2.21, "learning_rate": 1.3232091711695751e-05, "loss": 0.6174, "step": 199235 }, { "epoch": 2.21, "learning_rate": 1.3231168984557237e-05, "loss": 0.567, "step": 199240 }, { "epoch": 2.21, "learning_rate": 1.3230246257418727e-05, "loss": 0.5868, "step": 199245 }, { "epoch": 2.21, "learning_rate": 1.3229323530280213e-05, "loss": 0.559, "step": 199250 }, { "epoch": 2.21, "learning_rate": 1.3228400803141703e-05, "loss": 0.5773, "step": 199255 }, { "epoch": 2.21, "learning_rate": 1.3227478076003189e-05, "loss": 0.6598, "step": 199260 }, { "epoch": 2.21, "learning_rate": 1.3226555348864678e-05, "loss": 0.6058, "step": 199265 }, { "epoch": 2.21, "learning_rate": 1.3225632621726164e-05, "loss": 0.635, "step": 199270 }, { "epoch": 2.21, "learning_rate": 1.3224709894587652e-05, "loss": 0.6322, "step": 199275 }, { "epoch": 2.21, "learning_rate": 1.3223787167449138e-05, "loss": 0.6002, "step": 199280 }, { "epoch": 2.21, "learning_rate": 1.3222864440310628e-05, "loss": 0.5703, "step": 199285 }, { "epoch": 2.21, "learning_rate": 1.3221941713172114e-05, "loss": 0.593, "step": 199290 }, { "epoch": 2.21, "learning_rate": 1.3221018986033604e-05, "loss": 0.6212, "step": 199295 }, { "epoch": 2.21, "learning_rate": 1.322009625889509e-05, "loss": 0.6408, "step": 199300 }, { "epoch": 2.21, "learning_rate": 1.321917353175658e-05, "loss": 0.5533, "step": 199305 }, { "epoch": 2.21, "learning_rate": 1.3218250804618066e-05, "loss": 0.5849, "step": 199310 }, { "epoch": 2.21, "learning_rate": 1.3217328077479555e-05, "loss": 0.6596, "step": 199315 }, { "epoch": 2.21, "learning_rate": 1.3216405350341041e-05, "loss": 0.5437, "step": 199320 }, { "epoch": 2.21, "learning_rate": 1.3215482623202527e-05, "loss": 0.6167, "step": 199325 }, { "epoch": 2.21, "learning_rate": 1.3214559896064015e-05, "loss": 0.5729, "step": 199330 }, { "epoch": 2.21, "learning_rate": 1.3213637168925503e-05, "loss": 0.5717, "step": 199335 }, { "epoch": 2.21, "learning_rate": 1.3212714441786991e-05, "loss": 0.5892, "step": 199340 }, { "epoch": 2.21, "learning_rate": 1.3211791714648477e-05, "loss": 0.5885, "step": 199345 }, { "epoch": 2.21, "learning_rate": 1.3210868987509967e-05, "loss": 0.6289, "step": 199350 }, { "epoch": 2.21, "learning_rate": 1.3209946260371453e-05, "loss": 0.5499, "step": 199355 }, { "epoch": 2.21, "learning_rate": 1.3209023533232942e-05, "loss": 0.6168, "step": 199360 }, { "epoch": 2.21, "learning_rate": 1.3208100806094428e-05, "loss": 0.5946, "step": 199365 }, { "epoch": 2.21, "learning_rate": 1.3207178078955918e-05, "loss": 0.5829, "step": 199370 }, { "epoch": 2.21, "learning_rate": 1.3206255351817404e-05, "loss": 0.6183, "step": 199375 }, { "epoch": 2.21, "learning_rate": 1.3205332624678892e-05, "loss": 0.5434, "step": 199380 }, { "epoch": 2.21, "learning_rate": 1.320440989754038e-05, "loss": 0.5825, "step": 199385 }, { "epoch": 2.21, "learning_rate": 1.3203487170401868e-05, "loss": 0.6151, "step": 199390 }, { "epoch": 2.21, "learning_rate": 1.3202564443263354e-05, "loss": 0.5407, "step": 199395 }, { "epoch": 2.21, "learning_rate": 1.320164171612484e-05, "loss": 0.5895, "step": 199400 }, { "epoch": 2.21, "learning_rate": 1.320071898898633e-05, "loss": 0.5711, "step": 199405 }, { "epoch": 2.21, "learning_rate": 1.3199796261847816e-05, "loss": 0.5928, "step": 199410 }, { "epoch": 2.21, "learning_rate": 1.3198873534709305e-05, "loss": 0.6037, "step": 199415 }, { "epoch": 2.21, "learning_rate": 1.3197950807570791e-05, "loss": 0.5958, "step": 199420 }, { "epoch": 2.21, "learning_rate": 1.3197028080432281e-05, "loss": 0.6966, "step": 199425 }, { "epoch": 2.21, "learning_rate": 1.3196105353293767e-05, "loss": 0.5691, "step": 199430 }, { "epoch": 2.21, "learning_rate": 1.3195182626155255e-05, "loss": 0.6741, "step": 199435 }, { "epoch": 2.21, "learning_rate": 1.3194259899016743e-05, "loss": 0.5874, "step": 199440 }, { "epoch": 2.21, "learning_rate": 1.319333717187823e-05, "loss": 0.5949, "step": 199445 }, { "epoch": 2.21, "learning_rate": 1.3192414444739717e-05, "loss": 0.6374, "step": 199450 }, { "epoch": 2.21, "learning_rate": 1.3191491717601206e-05, "loss": 0.5952, "step": 199455 }, { "epoch": 2.21, "learning_rate": 1.3190568990462693e-05, "loss": 0.6238, "step": 199460 }, { "epoch": 2.21, "learning_rate": 1.3189646263324182e-05, "loss": 0.6069, "step": 199465 }, { "epoch": 2.21, "learning_rate": 1.3188723536185668e-05, "loss": 0.5978, "step": 199470 }, { "epoch": 2.21, "learning_rate": 1.3187800809047154e-05, "loss": 0.6283, "step": 199475 }, { "epoch": 2.21, "learning_rate": 1.3186878081908644e-05, "loss": 0.5562, "step": 199480 }, { "epoch": 2.21, "learning_rate": 1.318595535477013e-05, "loss": 0.5668, "step": 199485 }, { "epoch": 2.21, "learning_rate": 1.318503262763162e-05, "loss": 0.5551, "step": 199490 }, { "epoch": 2.21, "learning_rate": 1.3184109900493106e-05, "loss": 0.5712, "step": 199495 }, { "epoch": 2.21, "learning_rate": 1.3183187173354594e-05, "loss": 0.5731, "step": 199500 }, { "epoch": 2.21, "learning_rate": 1.318226444621608e-05, "loss": 0.6465, "step": 199505 }, { "epoch": 2.21, "learning_rate": 1.318134171907757e-05, "loss": 0.5722, "step": 199510 }, { "epoch": 2.21, "learning_rate": 1.3180418991939055e-05, "loss": 0.5695, "step": 199515 }, { "epoch": 2.21, "learning_rate": 1.3179496264800545e-05, "loss": 0.5788, "step": 199520 }, { "epoch": 2.21, "learning_rate": 1.3178573537662031e-05, "loss": 0.5814, "step": 199525 }, { "epoch": 2.21, "learning_rate": 1.317765081052352e-05, "loss": 0.655, "step": 199530 }, { "epoch": 2.21, "learning_rate": 1.3176728083385007e-05, "loss": 0.5905, "step": 199535 }, { "epoch": 2.21, "learning_rate": 1.3175805356246496e-05, "loss": 0.6181, "step": 199540 }, { "epoch": 2.21, "learning_rate": 1.3174882629107983e-05, "loss": 0.6265, "step": 199545 }, { "epoch": 2.21, "learning_rate": 1.3173959901969469e-05, "loss": 0.5721, "step": 199550 }, { "epoch": 2.21, "learning_rate": 1.3173037174830957e-05, "loss": 0.609, "step": 199555 }, { "epoch": 2.21, "learning_rate": 1.3172114447692443e-05, "loss": 0.5538, "step": 199560 }, { "epoch": 2.21, "learning_rate": 1.3171191720553932e-05, "loss": 0.5824, "step": 199565 }, { "epoch": 2.21, "learning_rate": 1.3170268993415418e-05, "loss": 0.6218, "step": 199570 }, { "epoch": 2.21, "learning_rate": 1.3169346266276908e-05, "loss": 0.5604, "step": 199575 }, { "epoch": 2.21, "learning_rate": 1.3168423539138394e-05, "loss": 0.6463, "step": 199580 }, { "epoch": 2.21, "learning_rate": 1.3167500811999884e-05, "loss": 0.5887, "step": 199585 }, { "epoch": 2.21, "learning_rate": 1.316657808486137e-05, "loss": 0.6131, "step": 199590 }, { "epoch": 2.21, "learning_rate": 1.316565535772286e-05, "loss": 0.6093, "step": 199595 }, { "epoch": 2.21, "learning_rate": 1.3164732630584346e-05, "loss": 0.6517, "step": 199600 }, { "epoch": 2.21, "learning_rate": 1.3163809903445833e-05, "loss": 0.6338, "step": 199605 }, { "epoch": 2.21, "learning_rate": 1.316288717630732e-05, "loss": 0.6114, "step": 199610 }, { "epoch": 2.21, "learning_rate": 1.3161964449168809e-05, "loss": 0.5686, "step": 199615 }, { "epoch": 2.21, "learning_rate": 1.3161041722030295e-05, "loss": 0.5815, "step": 199620 }, { "epoch": 2.21, "learning_rate": 1.3160118994891781e-05, "loss": 0.5806, "step": 199625 }, { "epoch": 2.21, "learning_rate": 1.3159196267753271e-05, "loss": 0.6089, "step": 199630 }, { "epoch": 2.21, "learning_rate": 1.3158273540614757e-05, "loss": 0.6048, "step": 199635 }, { "epoch": 2.21, "learning_rate": 1.3157350813476247e-05, "loss": 0.5858, "step": 199640 }, { "epoch": 2.21, "learning_rate": 1.3156428086337733e-05, "loss": 0.5535, "step": 199645 }, { "epoch": 2.21, "learning_rate": 1.3155505359199222e-05, "loss": 0.6402, "step": 199650 }, { "epoch": 2.21, "learning_rate": 1.3154582632060708e-05, "loss": 0.5722, "step": 199655 }, { "epoch": 2.21, "learning_rate": 1.3153659904922196e-05, "loss": 0.6318, "step": 199660 }, { "epoch": 2.21, "learning_rate": 1.3152737177783684e-05, "loss": 0.5958, "step": 199665 }, { "epoch": 2.21, "learning_rate": 1.3151814450645172e-05, "loss": 0.6425, "step": 199670 }, { "epoch": 2.21, "learning_rate": 1.3150891723506658e-05, "loss": 0.5689, "step": 199675 }, { "epoch": 2.21, "learning_rate": 1.3149968996368148e-05, "loss": 0.5774, "step": 199680 }, { "epoch": 2.21, "learning_rate": 1.3149046269229634e-05, "loss": 0.6101, "step": 199685 }, { "epoch": 2.21, "learning_rate": 1.3148123542091123e-05, "loss": 0.6002, "step": 199690 }, { "epoch": 2.21, "learning_rate": 1.314720081495261e-05, "loss": 0.6083, "step": 199695 }, { "epoch": 2.21, "learning_rate": 1.3146278087814096e-05, "loss": 0.5968, "step": 199700 }, { "epoch": 2.21, "learning_rate": 1.3145355360675585e-05, "loss": 0.6021, "step": 199705 }, { "epoch": 2.21, "learning_rate": 1.3144432633537071e-05, "loss": 0.6033, "step": 199710 }, { "epoch": 2.21, "learning_rate": 1.314350990639856e-05, "loss": 0.584, "step": 199715 }, { "epoch": 2.21, "learning_rate": 1.3142587179260047e-05, "loss": 0.5618, "step": 199720 }, { "epoch": 2.21, "learning_rate": 1.3141664452121535e-05, "loss": 0.5722, "step": 199725 }, { "epoch": 2.21, "learning_rate": 1.3140741724983021e-05, "loss": 0.6041, "step": 199730 }, { "epoch": 2.21, "learning_rate": 1.313981899784451e-05, "loss": 0.6456, "step": 199735 }, { "epoch": 2.21, "learning_rate": 1.3138896270705997e-05, "loss": 0.6196, "step": 199740 }, { "epoch": 2.21, "learning_rate": 1.3137973543567486e-05, "loss": 0.5929, "step": 199745 }, { "epoch": 2.21, "learning_rate": 1.3137050816428972e-05, "loss": 0.5854, "step": 199750 }, { "epoch": 2.21, "learning_rate": 1.3136128089290462e-05, "loss": 0.5693, "step": 199755 }, { "epoch": 2.21, "learning_rate": 1.3135205362151948e-05, "loss": 0.5885, "step": 199760 }, { "epoch": 2.21, "learning_rate": 1.3134282635013436e-05, "loss": 0.5653, "step": 199765 }, { "epoch": 2.21, "learning_rate": 1.3133359907874924e-05, "loss": 0.5539, "step": 199770 }, { "epoch": 2.21, "learning_rate": 1.313243718073641e-05, "loss": 0.6053, "step": 199775 }, { "epoch": 2.21, "learning_rate": 1.3131514453597898e-05, "loss": 0.6023, "step": 199780 }, { "epoch": 2.21, "learning_rate": 1.3130591726459384e-05, "loss": 0.5907, "step": 199785 }, { "epoch": 2.21, "learning_rate": 1.3129668999320874e-05, "loss": 0.5793, "step": 199790 }, { "epoch": 2.21, "learning_rate": 1.312874627218236e-05, "loss": 0.5997, "step": 199795 }, { "epoch": 2.21, "learning_rate": 1.312782354504385e-05, "loss": 0.5458, "step": 199800 }, { "epoch": 2.21, "learning_rate": 1.3126900817905335e-05, "loss": 0.6813, "step": 199805 }, { "epoch": 2.21, "learning_rate": 1.3125978090766825e-05, "loss": 0.5764, "step": 199810 }, { "epoch": 2.21, "learning_rate": 1.3125055363628311e-05, "loss": 0.623, "step": 199815 }, { "epoch": 2.21, "learning_rate": 1.31241326364898e-05, "loss": 0.616, "step": 199820 }, { "epoch": 2.21, "learning_rate": 1.3123209909351287e-05, "loss": 0.5989, "step": 199825 }, { "epoch": 2.21, "learning_rate": 1.3122287182212775e-05, "loss": 0.6167, "step": 199830 }, { "epoch": 2.21, "learning_rate": 1.312136445507426e-05, "loss": 0.5806, "step": 199835 }, { "epoch": 2.21, "learning_rate": 1.312044172793575e-05, "loss": 0.5663, "step": 199840 }, { "epoch": 2.21, "learning_rate": 1.3119519000797237e-05, "loss": 0.5975, "step": 199845 }, { "epoch": 2.21, "learning_rate": 1.3118596273658723e-05, "loss": 0.592, "step": 199850 }, { "epoch": 2.21, "learning_rate": 1.3117673546520212e-05, "loss": 0.592, "step": 199855 }, { "epoch": 2.21, "learning_rate": 1.3116750819381698e-05, "loss": 0.5904, "step": 199860 }, { "epoch": 2.21, "learning_rate": 1.3115828092243188e-05, "loss": 0.5891, "step": 199865 }, { "epoch": 2.21, "learning_rate": 1.3114905365104674e-05, "loss": 0.5934, "step": 199870 }, { "epoch": 2.21, "learning_rate": 1.3113982637966164e-05, "loss": 0.5906, "step": 199875 }, { "epoch": 2.21, "learning_rate": 1.311305991082765e-05, "loss": 0.6219, "step": 199880 }, { "epoch": 2.21, "learning_rate": 1.3112137183689138e-05, "loss": 0.6666, "step": 199885 }, { "epoch": 2.21, "learning_rate": 1.3111214456550624e-05, "loss": 0.5618, "step": 199890 }, { "epoch": 2.21, "learning_rate": 1.3110291729412113e-05, "loss": 0.5741, "step": 199895 }, { "epoch": 2.21, "learning_rate": 1.31093690022736e-05, "loss": 0.6174, "step": 199900 }, { "epoch": 2.21, "learning_rate": 1.3108446275135089e-05, "loss": 0.573, "step": 199905 }, { "epoch": 2.21, "learning_rate": 1.3107523547996575e-05, "loss": 0.6105, "step": 199910 }, { "epoch": 2.21, "learning_rate": 1.3106600820858065e-05, "loss": 0.5305, "step": 199915 }, { "epoch": 2.21, "learning_rate": 1.3105678093719551e-05, "loss": 0.6056, "step": 199920 }, { "epoch": 2.21, "learning_rate": 1.3104755366581037e-05, "loss": 0.6527, "step": 199925 }, { "epoch": 2.21, "learning_rate": 1.3103832639442527e-05, "loss": 0.6081, "step": 199930 }, { "epoch": 2.21, "learning_rate": 1.3102909912304013e-05, "loss": 0.5905, "step": 199935 }, { "epoch": 2.21, "learning_rate": 1.31019871851655e-05, "loss": 0.6148, "step": 199940 }, { "epoch": 2.21, "learning_rate": 1.3101064458026987e-05, "loss": 0.6145, "step": 199945 }, { "epoch": 2.21, "learning_rate": 1.3100141730888476e-05, "loss": 0.6426, "step": 199950 }, { "epoch": 2.21, "learning_rate": 1.3099219003749962e-05, "loss": 0.5874, "step": 199955 }, { "epoch": 2.21, "learning_rate": 1.3098296276611452e-05, "loss": 0.5974, "step": 199960 }, { "epoch": 2.21, "learning_rate": 1.3097373549472938e-05, "loss": 0.6425, "step": 199965 }, { "epoch": 2.21, "learning_rate": 1.3096450822334428e-05, "loss": 0.5737, "step": 199970 }, { "epoch": 2.21, "learning_rate": 1.3095528095195914e-05, "loss": 0.601, "step": 199975 }, { "epoch": 2.21, "learning_rate": 1.3094605368057403e-05, "loss": 0.5958, "step": 199980 }, { "epoch": 2.21, "learning_rate": 1.309368264091889e-05, "loss": 0.6199, "step": 199985 }, { "epoch": 2.21, "learning_rate": 1.3092759913780377e-05, "loss": 0.5694, "step": 199990 }, { "epoch": 2.21, "learning_rate": 1.3091837186641864e-05, "loss": 0.6009, "step": 199995 }, { "epoch": 2.21, "learning_rate": 1.3090914459503351e-05, "loss": 0.6422, "step": 200000 }, { "epoch": 2.21, "eval_loss": 0.5589736700057983, "eval_runtime": 69.1996, "eval_samples_per_second": 28.902, "eval_steps_per_second": 14.451, "step": 200000 }, { "epoch": 2.21, "learning_rate": 1.308999173236484e-05, "loss": 0.5663, "step": 200005 }, { "epoch": 2.21, "learning_rate": 1.3089069005226325e-05, "loss": 0.517, "step": 200010 }, { "epoch": 2.21, "learning_rate": 1.3088146278087815e-05, "loss": 0.5952, "step": 200015 }, { "epoch": 2.21, "learning_rate": 1.3087223550949301e-05, "loss": 0.5724, "step": 200020 }, { "epoch": 2.21, "learning_rate": 1.308630082381079e-05, "loss": 0.6029, "step": 200025 }, { "epoch": 2.21, "learning_rate": 1.3085378096672277e-05, "loss": 0.5557, "step": 200030 }, { "epoch": 2.21, "learning_rate": 1.3084455369533766e-05, "loss": 0.6391, "step": 200035 }, { "epoch": 2.21, "learning_rate": 1.3083532642395252e-05, "loss": 0.5742, "step": 200040 }, { "epoch": 2.22, "learning_rate": 1.308260991525674e-05, "loss": 0.6049, "step": 200045 }, { "epoch": 2.22, "learning_rate": 1.3081687188118228e-05, "loss": 0.5951, "step": 200050 }, { "epoch": 2.22, "learning_rate": 1.3080764460979716e-05, "loss": 0.6091, "step": 200055 }, { "epoch": 2.22, "learning_rate": 1.3079841733841202e-05, "loss": 0.5623, "step": 200060 }, { "epoch": 2.22, "learning_rate": 1.3078919006702692e-05, "loss": 0.6163, "step": 200065 }, { "epoch": 2.22, "learning_rate": 1.3077996279564178e-05, "loss": 0.6062, "step": 200070 }, { "epoch": 2.22, "learning_rate": 1.3077073552425664e-05, "loss": 0.5809, "step": 200075 }, { "epoch": 2.22, "learning_rate": 1.3076150825287154e-05, "loss": 0.6024, "step": 200080 }, { "epoch": 2.22, "learning_rate": 1.307522809814864e-05, "loss": 0.5989, "step": 200085 }, { "epoch": 2.22, "learning_rate": 1.307430537101013e-05, "loss": 0.5843, "step": 200090 }, { "epoch": 2.22, "learning_rate": 1.3073382643871615e-05, "loss": 0.6043, "step": 200095 }, { "epoch": 2.22, "learning_rate": 1.3072459916733103e-05, "loss": 0.5782, "step": 200100 }, { "epoch": 2.22, "learning_rate": 1.3071537189594591e-05, "loss": 0.5709, "step": 200105 }, { "epoch": 2.22, "learning_rate": 1.3070614462456079e-05, "loss": 0.5789, "step": 200110 }, { "epoch": 2.22, "learning_rate": 1.3069691735317565e-05, "loss": 0.589, "step": 200115 }, { "epoch": 2.22, "learning_rate": 1.3068769008179055e-05, "loss": 0.6791, "step": 200120 }, { "epoch": 2.22, "learning_rate": 1.306784628104054e-05, "loss": 0.6017, "step": 200125 }, { "epoch": 2.22, "learning_rate": 1.306692355390203e-05, "loss": 0.589, "step": 200130 }, { "epoch": 2.22, "learning_rate": 1.3066000826763517e-05, "loss": 0.6628, "step": 200135 }, { "epoch": 2.22, "learning_rate": 1.3065078099625006e-05, "loss": 0.601, "step": 200140 }, { "epoch": 2.22, "learning_rate": 1.3064155372486492e-05, "loss": 0.6129, "step": 200145 }, { "epoch": 2.22, "learning_rate": 1.306323264534798e-05, "loss": 0.5561, "step": 200150 }, { "epoch": 2.22, "learning_rate": 1.3062309918209468e-05, "loss": 0.5905, "step": 200155 }, { "epoch": 2.22, "learning_rate": 1.3061387191070954e-05, "loss": 0.6064, "step": 200160 }, { "epoch": 2.22, "learning_rate": 1.3060464463932442e-05, "loss": 0.6167, "step": 200165 }, { "epoch": 2.22, "learning_rate": 1.3059541736793928e-05, "loss": 0.6088, "step": 200170 }, { "epoch": 2.22, "learning_rate": 1.3058619009655418e-05, "loss": 0.5592, "step": 200175 }, { "epoch": 2.22, "learning_rate": 1.3057696282516904e-05, "loss": 0.5713, "step": 200180 }, { "epoch": 2.22, "learning_rate": 1.3056773555378393e-05, "loss": 0.572, "step": 200185 }, { "epoch": 2.22, "learning_rate": 1.305585082823988e-05, "loss": 0.591, "step": 200190 }, { "epoch": 2.22, "learning_rate": 1.3054928101101369e-05, "loss": 0.6065, "step": 200195 }, { "epoch": 2.22, "learning_rate": 1.3054005373962855e-05, "loss": 0.5995, "step": 200200 }, { "epoch": 2.22, "learning_rate": 1.3053082646824345e-05, "loss": 0.5782, "step": 200205 }, { "epoch": 2.22, "learning_rate": 1.3052159919685831e-05, "loss": 0.5935, "step": 200210 }, { "epoch": 2.22, "learning_rate": 1.3051237192547319e-05, "loss": 0.5408, "step": 200215 }, { "epoch": 2.22, "learning_rate": 1.3050314465408805e-05, "loss": 0.6113, "step": 200220 }, { "epoch": 2.22, "learning_rate": 1.3049391738270294e-05, "loss": 0.6381, "step": 200225 }, { "epoch": 2.22, "learning_rate": 1.304846901113178e-05, "loss": 0.6254, "step": 200230 }, { "epoch": 2.22, "learning_rate": 1.3047546283993267e-05, "loss": 0.5956, "step": 200235 }, { "epoch": 2.22, "learning_rate": 1.3046623556854756e-05, "loss": 0.6362, "step": 200240 }, { "epoch": 2.22, "learning_rate": 1.3045700829716242e-05, "loss": 0.567, "step": 200245 }, { "epoch": 2.22, "learning_rate": 1.3044778102577732e-05, "loss": 0.5994, "step": 200250 }, { "epoch": 2.22, "learning_rate": 1.3043855375439218e-05, "loss": 0.665, "step": 200255 }, { "epoch": 2.22, "learning_rate": 1.3042932648300708e-05, "loss": 0.6024, "step": 200260 }, { "epoch": 2.22, "learning_rate": 1.3042009921162194e-05, "loss": 0.5455, "step": 200265 }, { "epoch": 2.22, "learning_rate": 1.3041087194023682e-05, "loss": 0.6549, "step": 200270 }, { "epoch": 2.22, "learning_rate": 1.3040164466885168e-05, "loss": 0.6175, "step": 200275 }, { "epoch": 2.22, "learning_rate": 1.3039241739746657e-05, "loss": 0.5849, "step": 200280 }, { "epoch": 2.22, "learning_rate": 1.3038319012608144e-05, "loss": 0.6338, "step": 200285 }, { "epoch": 2.22, "learning_rate": 1.3037396285469633e-05, "loss": 0.6125, "step": 200290 }, { "epoch": 2.22, "learning_rate": 1.303647355833112e-05, "loss": 0.613, "step": 200295 }, { "epoch": 2.22, "learning_rate": 1.3035550831192609e-05, "loss": 0.5893, "step": 200300 }, { "epoch": 2.22, "learning_rate": 1.3034628104054095e-05, "loss": 0.6144, "step": 200305 }, { "epoch": 2.22, "learning_rate": 1.3033705376915581e-05, "loss": 0.5775, "step": 200310 }, { "epoch": 2.22, "learning_rate": 1.303278264977707e-05, "loss": 0.6242, "step": 200315 }, { "epoch": 2.22, "learning_rate": 1.3031859922638557e-05, "loss": 0.5916, "step": 200320 }, { "epoch": 2.22, "learning_rate": 1.3030937195500045e-05, "loss": 0.6656, "step": 200325 }, { "epoch": 2.22, "learning_rate": 1.3030014468361532e-05, "loss": 0.6296, "step": 200330 }, { "epoch": 2.22, "learning_rate": 1.302909174122302e-05, "loss": 0.5657, "step": 200335 }, { "epoch": 2.22, "learning_rate": 1.3028169014084506e-05, "loss": 0.61, "step": 200340 }, { "epoch": 2.22, "learning_rate": 1.3027246286945996e-05, "loss": 0.5803, "step": 200345 }, { "epoch": 2.22, "learning_rate": 1.3026323559807482e-05, "loss": 0.6369, "step": 200350 }, { "epoch": 2.22, "learning_rate": 1.3025400832668972e-05, "loss": 0.5738, "step": 200355 }, { "epoch": 2.22, "learning_rate": 1.3024478105530458e-05, "loss": 0.5764, "step": 200360 }, { "epoch": 2.22, "learning_rate": 1.3023555378391947e-05, "loss": 0.5543, "step": 200365 }, { "epoch": 2.22, "learning_rate": 1.3022632651253434e-05, "loss": 0.5889, "step": 200370 }, { "epoch": 2.22, "learning_rate": 1.3021709924114921e-05, "loss": 0.622, "step": 200375 }, { "epoch": 2.22, "learning_rate": 1.3020787196976408e-05, "loss": 0.5592, "step": 200380 }, { "epoch": 2.22, "learning_rate": 1.3019864469837895e-05, "loss": 0.55, "step": 200385 }, { "epoch": 2.22, "learning_rate": 1.3018941742699383e-05, "loss": 0.5555, "step": 200390 }, { "epoch": 2.22, "learning_rate": 1.301801901556087e-05, "loss": 0.6371, "step": 200395 }, { "epoch": 2.22, "learning_rate": 1.3017096288422359e-05, "loss": 0.6446, "step": 200400 }, { "epoch": 2.22, "learning_rate": 1.3016173561283845e-05, "loss": 0.5659, "step": 200405 }, { "epoch": 2.22, "learning_rate": 1.3015250834145335e-05, "loss": 0.5936, "step": 200410 }, { "epoch": 2.22, "learning_rate": 1.301432810700682e-05, "loss": 0.6093, "step": 200415 }, { "epoch": 2.22, "learning_rate": 1.301340537986831e-05, "loss": 0.6367, "step": 200420 }, { "epoch": 2.22, "learning_rate": 1.3012482652729796e-05, "loss": 0.6116, "step": 200425 }, { "epoch": 2.22, "learning_rate": 1.3011559925591284e-05, "loss": 0.5764, "step": 200430 }, { "epoch": 2.22, "learning_rate": 1.3010637198452772e-05, "loss": 0.6286, "step": 200435 }, { "epoch": 2.22, "learning_rate": 1.300971447131426e-05, "loss": 0.5733, "step": 200440 }, { "epoch": 2.22, "learning_rate": 1.3008791744175746e-05, "loss": 0.61, "step": 200445 }, { "epoch": 2.22, "learning_rate": 1.3007869017037236e-05, "loss": 0.6308, "step": 200450 }, { "epoch": 2.22, "learning_rate": 1.3006946289898722e-05, "loss": 0.5375, "step": 200455 }, { "epoch": 2.22, "learning_rate": 1.3006023562760208e-05, "loss": 0.6009, "step": 200460 }, { "epoch": 2.22, "learning_rate": 1.3005100835621698e-05, "loss": 0.6474, "step": 200465 }, { "epoch": 2.22, "learning_rate": 1.3004178108483184e-05, "loss": 0.5795, "step": 200470 }, { "epoch": 2.22, "learning_rate": 1.3003255381344673e-05, "loss": 0.6438, "step": 200475 }, { "epoch": 2.22, "learning_rate": 1.300233265420616e-05, "loss": 0.6566, "step": 200480 }, { "epoch": 2.22, "learning_rate": 1.3001409927067649e-05, "loss": 0.5523, "step": 200485 }, { "epoch": 2.22, "learning_rate": 1.3000487199929135e-05, "loss": 0.6161, "step": 200490 }, { "epoch": 2.22, "learning_rate": 1.2999564472790623e-05, "loss": 0.5587, "step": 200495 }, { "epoch": 2.22, "learning_rate": 1.2998641745652109e-05, "loss": 0.5526, "step": 200500 }, { "epoch": 2.22, "learning_rate": 1.2997719018513599e-05, "loss": 0.6237, "step": 200505 }, { "epoch": 2.22, "learning_rate": 1.2996796291375085e-05, "loss": 0.5703, "step": 200510 }, { "epoch": 2.22, "learning_rate": 1.2995873564236574e-05, "loss": 0.6259, "step": 200515 }, { "epoch": 2.22, "learning_rate": 1.299495083709806e-05, "loss": 0.5998, "step": 200520 }, { "epoch": 2.22, "learning_rate": 1.299402810995955e-05, "loss": 0.6335, "step": 200525 }, { "epoch": 2.22, "learning_rate": 1.2993105382821036e-05, "loss": 0.6143, "step": 200530 }, { "epoch": 2.22, "learning_rate": 1.2992182655682522e-05, "loss": 0.5683, "step": 200535 }, { "epoch": 2.22, "learning_rate": 1.2991259928544012e-05, "loss": 0.5817, "step": 200540 }, { "epoch": 2.22, "learning_rate": 1.2990337201405498e-05, "loss": 0.5186, "step": 200545 }, { "epoch": 2.22, "learning_rate": 1.2989414474266986e-05, "loss": 0.617, "step": 200550 }, { "epoch": 2.22, "learning_rate": 1.2988491747128472e-05, "loss": 0.6032, "step": 200555 }, { "epoch": 2.22, "learning_rate": 1.2987569019989962e-05, "loss": 0.5592, "step": 200560 }, { "epoch": 2.22, "learning_rate": 1.2986646292851448e-05, "loss": 0.6463, "step": 200565 }, { "epoch": 2.22, "learning_rate": 1.2985723565712937e-05, "loss": 0.6, "step": 200570 }, { "epoch": 2.22, "learning_rate": 1.2984800838574423e-05, "loss": 0.5447, "step": 200575 }, { "epoch": 2.22, "learning_rate": 1.2983878111435913e-05, "loss": 0.613, "step": 200580 }, { "epoch": 2.22, "learning_rate": 1.29829553842974e-05, "loss": 0.5826, "step": 200585 }, { "epoch": 2.22, "learning_rate": 1.2982032657158889e-05, "loss": 0.6241, "step": 200590 }, { "epoch": 2.22, "learning_rate": 1.2981109930020375e-05, "loss": 0.6109, "step": 200595 }, { "epoch": 2.22, "learning_rate": 1.2980187202881863e-05, "loss": 0.5757, "step": 200600 }, { "epoch": 2.22, "learning_rate": 1.2979264475743349e-05, "loss": 0.5353, "step": 200605 }, { "epoch": 2.22, "learning_rate": 1.2978341748604835e-05, "loss": 0.5871, "step": 200610 }, { "epoch": 2.22, "learning_rate": 1.2977419021466325e-05, "loss": 0.6159, "step": 200615 }, { "epoch": 2.22, "learning_rate": 1.297649629432781e-05, "loss": 0.596, "step": 200620 }, { "epoch": 2.22, "learning_rate": 1.29755735671893e-05, "loss": 0.5619, "step": 200625 }, { "epoch": 2.22, "learning_rate": 1.2974650840050786e-05, "loss": 0.6022, "step": 200630 }, { "epoch": 2.22, "learning_rate": 1.2973728112912276e-05, "loss": 0.6241, "step": 200635 }, { "epoch": 2.22, "learning_rate": 1.2972805385773762e-05, "loss": 0.5844, "step": 200640 }, { "epoch": 2.22, "learning_rate": 1.2971882658635252e-05, "loss": 0.5924, "step": 200645 }, { "epoch": 2.22, "learning_rate": 1.2970959931496738e-05, "loss": 0.5388, "step": 200650 }, { "epoch": 2.22, "learning_rate": 1.2970037204358226e-05, "loss": 0.5647, "step": 200655 }, { "epoch": 2.22, "learning_rate": 1.2969114477219712e-05, "loss": 0.6817, "step": 200660 }, { "epoch": 2.22, "learning_rate": 1.2968191750081201e-05, "loss": 0.5699, "step": 200665 }, { "epoch": 2.22, "learning_rate": 1.2967269022942688e-05, "loss": 0.6122, "step": 200670 }, { "epoch": 2.22, "learning_rate": 1.2966346295804177e-05, "loss": 0.6342, "step": 200675 }, { "epoch": 2.22, "learning_rate": 1.2965423568665663e-05, "loss": 0.6221, "step": 200680 }, { "epoch": 2.22, "learning_rate": 1.296450084152715e-05, "loss": 0.6367, "step": 200685 }, { "epoch": 2.22, "learning_rate": 1.2963578114388639e-05, "loss": 0.5623, "step": 200690 }, { "epoch": 2.22, "learning_rate": 1.2962655387250125e-05, "loss": 0.5995, "step": 200695 }, { "epoch": 2.22, "learning_rate": 1.2961732660111615e-05, "loss": 0.578, "step": 200700 }, { "epoch": 2.22, "learning_rate": 1.29608099329731e-05, "loss": 0.6005, "step": 200705 }, { "epoch": 2.22, "learning_rate": 1.2959887205834589e-05, "loss": 0.5819, "step": 200710 }, { "epoch": 2.22, "learning_rate": 1.2958964478696076e-05, "loss": 0.5928, "step": 200715 }, { "epoch": 2.22, "learning_rate": 1.2958041751557564e-05, "loss": 0.5875, "step": 200720 }, { "epoch": 2.22, "learning_rate": 1.295711902441905e-05, "loss": 0.5976, "step": 200725 }, { "epoch": 2.22, "learning_rate": 1.295619629728054e-05, "loss": 0.5729, "step": 200730 }, { "epoch": 2.22, "learning_rate": 1.2955273570142026e-05, "loss": 0.6652, "step": 200735 }, { "epoch": 2.22, "learning_rate": 1.2954350843003516e-05, "loss": 0.5616, "step": 200740 }, { "epoch": 2.22, "learning_rate": 1.2953428115865002e-05, "loss": 0.6118, "step": 200745 }, { "epoch": 2.22, "learning_rate": 1.2952505388726491e-05, "loss": 0.5977, "step": 200750 }, { "epoch": 2.22, "learning_rate": 1.2951582661587978e-05, "loss": 0.5662, "step": 200755 }, { "epoch": 2.22, "learning_rate": 1.2950659934449464e-05, "loss": 0.5728, "step": 200760 }, { "epoch": 2.22, "learning_rate": 1.2949737207310953e-05, "loss": 0.6312, "step": 200765 }, { "epoch": 2.22, "learning_rate": 1.294881448017244e-05, "loss": 0.5561, "step": 200770 }, { "epoch": 2.22, "learning_rate": 1.2947891753033927e-05, "loss": 0.584, "step": 200775 }, { "epoch": 2.22, "learning_rate": 1.2946969025895413e-05, "loss": 0.5922, "step": 200780 }, { "epoch": 2.22, "learning_rate": 1.2946046298756903e-05, "loss": 0.6509, "step": 200785 }, { "epoch": 2.22, "learning_rate": 1.2945123571618389e-05, "loss": 0.6322, "step": 200790 }, { "epoch": 2.22, "learning_rate": 1.2944200844479879e-05, "loss": 0.5708, "step": 200795 }, { "epoch": 2.22, "learning_rate": 1.2943278117341365e-05, "loss": 0.5673, "step": 200800 }, { "epoch": 2.22, "learning_rate": 1.2942355390202854e-05, "loss": 0.5911, "step": 200805 }, { "epoch": 2.22, "learning_rate": 1.294143266306434e-05, "loss": 0.6104, "step": 200810 }, { "epoch": 2.22, "learning_rate": 1.2940509935925828e-05, "loss": 0.5889, "step": 200815 }, { "epoch": 2.22, "learning_rate": 1.2939587208787316e-05, "loss": 0.6232, "step": 200820 }, { "epoch": 2.22, "learning_rate": 1.2938664481648804e-05, "loss": 0.5564, "step": 200825 }, { "epoch": 2.22, "learning_rate": 1.293774175451029e-05, "loss": 0.5944, "step": 200830 }, { "epoch": 2.22, "learning_rate": 1.2936819027371776e-05, "loss": 0.591, "step": 200835 }, { "epoch": 2.22, "learning_rate": 1.2935896300233266e-05, "loss": 0.5891, "step": 200840 }, { "epoch": 2.22, "learning_rate": 1.2934973573094752e-05, "loss": 0.6414, "step": 200845 }, { "epoch": 2.22, "learning_rate": 1.2934050845956242e-05, "loss": 0.6139, "step": 200850 }, { "epoch": 2.22, "learning_rate": 1.2933128118817728e-05, "loss": 0.629, "step": 200855 }, { "epoch": 2.22, "learning_rate": 1.2932205391679217e-05, "loss": 0.5608, "step": 200860 }, { "epoch": 2.22, "learning_rate": 1.2931282664540703e-05, "loss": 0.613, "step": 200865 }, { "epoch": 2.22, "learning_rate": 1.2930359937402193e-05, "loss": 0.582, "step": 200870 }, { "epoch": 2.22, "learning_rate": 1.292943721026368e-05, "loss": 0.5778, "step": 200875 }, { "epoch": 2.22, "learning_rate": 1.2928514483125167e-05, "loss": 0.6045, "step": 200880 }, { "epoch": 2.22, "learning_rate": 1.2927591755986653e-05, "loss": 0.5609, "step": 200885 }, { "epoch": 2.22, "learning_rate": 1.2926669028848143e-05, "loss": 0.6238, "step": 200890 }, { "epoch": 2.22, "learning_rate": 1.2925746301709629e-05, "loss": 0.6132, "step": 200895 }, { "epoch": 2.22, "learning_rate": 1.2924823574571118e-05, "loss": 0.6387, "step": 200900 }, { "epoch": 2.22, "learning_rate": 1.2923900847432605e-05, "loss": 0.6235, "step": 200905 }, { "epoch": 2.22, "learning_rate": 1.292297812029409e-05, "loss": 0.594, "step": 200910 }, { "epoch": 2.22, "learning_rate": 1.292205539315558e-05, "loss": 0.6461, "step": 200915 }, { "epoch": 2.22, "learning_rate": 1.2921132666017066e-05, "loss": 0.6157, "step": 200920 }, { "epoch": 2.22, "learning_rate": 1.2920209938878556e-05, "loss": 0.5317, "step": 200925 }, { "epoch": 2.22, "learning_rate": 1.2919287211740042e-05, "loss": 0.5713, "step": 200930 }, { "epoch": 2.22, "learning_rate": 1.291836448460153e-05, "loss": 0.6289, "step": 200935 }, { "epoch": 2.22, "learning_rate": 1.2917441757463016e-05, "loss": 0.5744, "step": 200940 }, { "epoch": 2.23, "learning_rate": 1.2916519030324506e-05, "loss": 0.6145, "step": 200945 }, { "epoch": 2.23, "learning_rate": 1.2915596303185992e-05, "loss": 0.5509, "step": 200950 }, { "epoch": 2.23, "learning_rate": 1.2914673576047481e-05, "loss": 0.6173, "step": 200955 }, { "epoch": 2.23, "learning_rate": 1.2913750848908967e-05, "loss": 0.5452, "step": 200960 }, { "epoch": 2.23, "learning_rate": 1.2912828121770457e-05, "loss": 0.6037, "step": 200965 }, { "epoch": 2.23, "learning_rate": 1.2911905394631943e-05, "loss": 0.5753, "step": 200970 }, { "epoch": 2.23, "learning_rate": 1.2910982667493433e-05, "loss": 0.6357, "step": 200975 }, { "epoch": 2.23, "learning_rate": 1.2910059940354919e-05, "loss": 0.5625, "step": 200980 }, { "epoch": 2.23, "learning_rate": 1.2909137213216407e-05, "loss": 0.6348, "step": 200985 }, { "epoch": 2.23, "learning_rate": 1.2908214486077893e-05, "loss": 0.5711, "step": 200990 }, { "epoch": 2.23, "learning_rate": 1.290729175893938e-05, "loss": 0.5627, "step": 200995 }, { "epoch": 2.23, "learning_rate": 1.2906369031800869e-05, "loss": 0.624, "step": 201000 }, { "epoch": 2.23, "eval_loss": 0.5573316812515259, "eval_runtime": 69.3353, "eval_samples_per_second": 28.845, "eval_steps_per_second": 14.423, "step": 201000 }, { "epoch": 2.23, "learning_rate": 1.2905446304662355e-05, "loss": 0.6072, "step": 201005 }, { "epoch": 2.23, "learning_rate": 1.2904523577523844e-05, "loss": 0.5765, "step": 201010 }, { "epoch": 2.23, "learning_rate": 1.290360085038533e-05, "loss": 0.63, "step": 201015 }, { "epoch": 2.23, "learning_rate": 1.290267812324682e-05, "loss": 0.6177, "step": 201020 }, { "epoch": 2.23, "learning_rate": 1.2901755396108306e-05, "loss": 0.6007, "step": 201025 }, { "epoch": 2.23, "learning_rate": 1.2900832668969796e-05, "loss": 0.5672, "step": 201030 }, { "epoch": 2.23, "learning_rate": 1.2899909941831282e-05, "loss": 0.5844, "step": 201035 }, { "epoch": 2.23, "learning_rate": 1.289898721469277e-05, "loss": 0.6281, "step": 201040 }, { "epoch": 2.23, "learning_rate": 1.2898064487554256e-05, "loss": 0.5351, "step": 201045 }, { "epoch": 2.23, "learning_rate": 1.2897141760415745e-05, "loss": 0.5772, "step": 201050 }, { "epoch": 2.23, "learning_rate": 1.2896219033277232e-05, "loss": 0.6149, "step": 201055 }, { "epoch": 2.23, "learning_rate": 1.2895296306138721e-05, "loss": 0.5895, "step": 201060 }, { "epoch": 2.23, "learning_rate": 1.2894373579000207e-05, "loss": 0.6237, "step": 201065 }, { "epoch": 2.23, "learning_rate": 1.2893450851861693e-05, "loss": 0.5899, "step": 201070 }, { "epoch": 2.23, "learning_rate": 1.2892528124723183e-05, "loss": 0.5764, "step": 201075 }, { "epoch": 2.23, "learning_rate": 1.2891605397584669e-05, "loss": 0.6274, "step": 201080 }, { "epoch": 2.23, "learning_rate": 1.2890682670446159e-05, "loss": 0.5744, "step": 201085 }, { "epoch": 2.23, "learning_rate": 1.2889759943307645e-05, "loss": 0.6804, "step": 201090 }, { "epoch": 2.23, "learning_rate": 1.2888837216169133e-05, "loss": 0.5256, "step": 201095 }, { "epoch": 2.23, "learning_rate": 1.288791448903062e-05, "loss": 0.5637, "step": 201100 }, { "epoch": 2.23, "learning_rate": 1.2886991761892108e-05, "loss": 0.6197, "step": 201105 }, { "epoch": 2.23, "learning_rate": 1.2886069034753594e-05, "loss": 0.6295, "step": 201110 }, { "epoch": 2.23, "learning_rate": 1.2885146307615084e-05, "loss": 0.5934, "step": 201115 }, { "epoch": 2.23, "learning_rate": 1.288422358047657e-05, "loss": 0.5542, "step": 201120 }, { "epoch": 2.23, "learning_rate": 1.288330085333806e-05, "loss": 0.6221, "step": 201125 }, { "epoch": 2.23, "learning_rate": 1.2882378126199546e-05, "loss": 0.6358, "step": 201130 }, { "epoch": 2.23, "learning_rate": 1.2881455399061035e-05, "loss": 0.6164, "step": 201135 }, { "epoch": 2.23, "learning_rate": 1.2880532671922522e-05, "loss": 0.6052, "step": 201140 }, { "epoch": 2.23, "learning_rate": 1.2879609944784008e-05, "loss": 0.5844, "step": 201145 }, { "epoch": 2.23, "learning_rate": 1.2878687217645497e-05, "loss": 0.5987, "step": 201150 }, { "epoch": 2.23, "learning_rate": 1.2877764490506983e-05, "loss": 0.5802, "step": 201155 }, { "epoch": 2.23, "learning_rate": 1.2876841763368471e-05, "loss": 0.6185, "step": 201160 }, { "epoch": 2.23, "learning_rate": 1.2875919036229957e-05, "loss": 0.6167, "step": 201165 }, { "epoch": 2.23, "learning_rate": 1.2874996309091447e-05, "loss": 0.6174, "step": 201170 }, { "epoch": 2.23, "learning_rate": 1.2874073581952933e-05, "loss": 0.5913, "step": 201175 }, { "epoch": 2.23, "learning_rate": 1.2873150854814423e-05, "loss": 0.5671, "step": 201180 }, { "epoch": 2.23, "learning_rate": 1.2872228127675909e-05, "loss": 0.6938, "step": 201185 }, { "epoch": 2.23, "learning_rate": 1.2871305400537398e-05, "loss": 0.5464, "step": 201190 }, { "epoch": 2.23, "learning_rate": 1.2870382673398885e-05, "loss": 0.6019, "step": 201195 }, { "epoch": 2.23, "learning_rate": 1.2869459946260372e-05, "loss": 0.6199, "step": 201200 }, { "epoch": 2.23, "learning_rate": 1.286853721912186e-05, "loss": 0.5807, "step": 201205 }, { "epoch": 2.23, "learning_rate": 1.2867614491983348e-05, "loss": 0.616, "step": 201210 }, { "epoch": 2.23, "learning_rate": 1.2866691764844834e-05, "loss": 0.6159, "step": 201215 }, { "epoch": 2.23, "learning_rate": 1.286576903770632e-05, "loss": 0.6363, "step": 201220 }, { "epoch": 2.23, "learning_rate": 1.286484631056781e-05, "loss": 0.5648, "step": 201225 }, { "epoch": 2.23, "learning_rate": 1.2863923583429296e-05, "loss": 0.5801, "step": 201230 }, { "epoch": 2.23, "learning_rate": 1.2863000856290786e-05, "loss": 0.5261, "step": 201235 }, { "epoch": 2.23, "learning_rate": 1.2862078129152272e-05, "loss": 0.5559, "step": 201240 }, { "epoch": 2.23, "learning_rate": 1.2861155402013761e-05, "loss": 0.603, "step": 201245 }, { "epoch": 2.23, "learning_rate": 1.2860232674875247e-05, "loss": 0.6003, "step": 201250 }, { "epoch": 2.23, "learning_rate": 1.2859309947736737e-05, "loss": 0.6199, "step": 201255 }, { "epoch": 2.23, "learning_rate": 1.2858387220598223e-05, "loss": 0.56, "step": 201260 }, { "epoch": 2.23, "learning_rate": 1.2857464493459711e-05, "loss": 0.6111, "step": 201265 }, { "epoch": 2.23, "learning_rate": 1.2856541766321197e-05, "loss": 0.5942, "step": 201270 }, { "epoch": 2.23, "learning_rate": 1.2855619039182687e-05, "loss": 0.6977, "step": 201275 }, { "epoch": 2.23, "learning_rate": 1.2854696312044173e-05, "loss": 0.6116, "step": 201280 }, { "epoch": 2.23, "learning_rate": 1.2853773584905662e-05, "loss": 0.6332, "step": 201285 }, { "epoch": 2.23, "learning_rate": 1.2852850857767149e-05, "loss": 0.6492, "step": 201290 }, { "epoch": 2.23, "learning_rate": 1.2851928130628635e-05, "loss": 0.5515, "step": 201295 }, { "epoch": 2.23, "learning_rate": 1.2851005403490124e-05, "loss": 0.5719, "step": 201300 }, { "epoch": 2.23, "learning_rate": 1.285008267635161e-05, "loss": 0.5696, "step": 201305 }, { "epoch": 2.23, "learning_rate": 1.28491599492131e-05, "loss": 0.5734, "step": 201310 }, { "epoch": 2.23, "learning_rate": 1.2848237222074586e-05, "loss": 0.5893, "step": 201315 }, { "epoch": 2.23, "learning_rate": 1.2847314494936074e-05, "loss": 0.5499, "step": 201320 }, { "epoch": 2.23, "learning_rate": 1.284639176779756e-05, "loss": 0.5977, "step": 201325 }, { "epoch": 2.23, "learning_rate": 1.284546904065905e-05, "loss": 0.6195, "step": 201330 }, { "epoch": 2.23, "learning_rate": 1.2844546313520536e-05, "loss": 0.6186, "step": 201335 }, { "epoch": 2.23, "learning_rate": 1.2843623586382025e-05, "loss": 0.591, "step": 201340 }, { "epoch": 2.23, "learning_rate": 1.2842700859243512e-05, "loss": 0.6357, "step": 201345 }, { "epoch": 2.23, "learning_rate": 1.2841778132105001e-05, "loss": 0.595, "step": 201350 }, { "epoch": 2.23, "learning_rate": 1.2840855404966487e-05, "loss": 0.5931, "step": 201355 }, { "epoch": 2.23, "learning_rate": 1.2839932677827977e-05, "loss": 0.5422, "step": 201360 }, { "epoch": 2.23, "learning_rate": 1.2839009950689463e-05, "loss": 0.5839, "step": 201365 }, { "epoch": 2.23, "learning_rate": 1.2838087223550949e-05, "loss": 0.5904, "step": 201370 }, { "epoch": 2.23, "learning_rate": 1.2837164496412437e-05, "loss": 0.5443, "step": 201375 }, { "epoch": 2.23, "learning_rate": 1.2836241769273925e-05, "loss": 0.6039, "step": 201380 }, { "epoch": 2.23, "learning_rate": 1.2835319042135413e-05, "loss": 0.5834, "step": 201385 }, { "epoch": 2.23, "learning_rate": 1.2834396314996899e-05, "loss": 0.567, "step": 201390 }, { "epoch": 2.23, "learning_rate": 1.2833473587858388e-05, "loss": 0.5569, "step": 201395 }, { "epoch": 2.23, "learning_rate": 1.2832550860719874e-05, "loss": 0.5658, "step": 201400 }, { "epoch": 2.23, "learning_rate": 1.2831628133581364e-05, "loss": 0.6426, "step": 201405 }, { "epoch": 2.23, "learning_rate": 1.283070540644285e-05, "loss": 0.6355, "step": 201410 }, { "epoch": 2.23, "learning_rate": 1.282978267930434e-05, "loss": 0.652, "step": 201415 }, { "epoch": 2.23, "learning_rate": 1.2828859952165826e-05, "loss": 0.5945, "step": 201420 }, { "epoch": 2.23, "learning_rate": 1.2827937225027314e-05, "loss": 0.6233, "step": 201425 }, { "epoch": 2.23, "learning_rate": 1.2827014497888802e-05, "loss": 0.587, "step": 201430 }, { "epoch": 2.23, "learning_rate": 1.282609177075029e-05, "loss": 0.6177, "step": 201435 }, { "epoch": 2.23, "learning_rate": 1.2825169043611776e-05, "loss": 0.5922, "step": 201440 }, { "epoch": 2.23, "learning_rate": 1.2824246316473262e-05, "loss": 0.5972, "step": 201445 }, { "epoch": 2.23, "learning_rate": 1.2823323589334751e-05, "loss": 0.6256, "step": 201450 }, { "epoch": 2.23, "learning_rate": 1.2822400862196237e-05, "loss": 0.5851, "step": 201455 }, { "epoch": 2.23, "learning_rate": 1.2821478135057727e-05, "loss": 0.5871, "step": 201460 }, { "epoch": 2.23, "learning_rate": 1.2820555407919213e-05, "loss": 0.6164, "step": 201465 }, { "epoch": 2.23, "learning_rate": 1.2819632680780703e-05, "loss": 0.5872, "step": 201470 }, { "epoch": 2.23, "learning_rate": 1.2818709953642189e-05, "loss": 0.5708, "step": 201475 }, { "epoch": 2.23, "learning_rate": 1.2817787226503677e-05, "loss": 0.6435, "step": 201480 }, { "epoch": 2.23, "learning_rate": 1.2816864499365165e-05, "loss": 0.581, "step": 201485 }, { "epoch": 2.23, "learning_rate": 1.2815941772226652e-05, "loss": 0.5344, "step": 201490 }, { "epoch": 2.23, "learning_rate": 1.2815019045088139e-05, "loss": 0.5715, "step": 201495 }, { "epoch": 2.23, "learning_rate": 1.2814096317949628e-05, "loss": 0.6202, "step": 201500 }, { "epoch": 2.23, "learning_rate": 1.2813173590811114e-05, "loss": 0.6284, "step": 201505 }, { "epoch": 2.23, "learning_rate": 1.2812250863672604e-05, "loss": 0.5671, "step": 201510 }, { "epoch": 2.23, "learning_rate": 1.281132813653409e-05, "loss": 0.6132, "step": 201515 }, { "epoch": 2.23, "learning_rate": 1.2810405409395576e-05, "loss": 0.6213, "step": 201520 }, { "epoch": 2.23, "learning_rate": 1.2809482682257066e-05, "loss": 0.582, "step": 201525 }, { "epoch": 2.23, "learning_rate": 1.2808559955118552e-05, "loss": 0.6038, "step": 201530 }, { "epoch": 2.23, "learning_rate": 1.2807637227980041e-05, "loss": 0.6024, "step": 201535 }, { "epoch": 2.23, "learning_rate": 1.2806714500841527e-05, "loss": 0.5941, "step": 201540 }, { "epoch": 2.23, "learning_rate": 1.2805791773703015e-05, "loss": 0.6193, "step": 201545 }, { "epoch": 2.23, "learning_rate": 1.2804869046564501e-05, "loss": 0.6227, "step": 201550 }, { "epoch": 2.23, "learning_rate": 1.2803946319425991e-05, "loss": 0.5817, "step": 201555 }, { "epoch": 2.23, "learning_rate": 1.2803023592287477e-05, "loss": 0.5652, "step": 201560 }, { "epoch": 2.23, "learning_rate": 1.2802100865148967e-05, "loss": 0.5633, "step": 201565 }, { "epoch": 2.23, "learning_rate": 1.2801178138010453e-05, "loss": 0.613, "step": 201570 }, { "epoch": 2.23, "learning_rate": 1.2800255410871942e-05, "loss": 0.6092, "step": 201575 }, { "epoch": 2.23, "learning_rate": 1.2799332683733429e-05, "loss": 0.628, "step": 201580 }, { "epoch": 2.23, "learning_rate": 1.2798409956594918e-05, "loss": 0.5731, "step": 201585 }, { "epoch": 2.23, "learning_rate": 1.2797487229456404e-05, "loss": 0.6368, "step": 201590 }, { "epoch": 2.23, "learning_rate": 1.279656450231789e-05, "loss": 0.5581, "step": 201595 }, { "epoch": 2.23, "learning_rate": 1.2795641775179378e-05, "loss": 0.6416, "step": 201600 }, { "epoch": 2.23, "learning_rate": 1.2794719048040864e-05, "loss": 0.5711, "step": 201605 }, { "epoch": 2.23, "learning_rate": 1.2793796320902354e-05, "loss": 0.6039, "step": 201610 }, { "epoch": 2.23, "learning_rate": 1.279287359376384e-05, "loss": 0.5987, "step": 201615 }, { "epoch": 2.23, "learning_rate": 1.279195086662533e-05, "loss": 0.6123, "step": 201620 }, { "epoch": 2.23, "learning_rate": 1.2791028139486816e-05, "loss": 0.5986, "step": 201625 }, { "epoch": 2.23, "learning_rate": 1.2790105412348305e-05, "loss": 0.5814, "step": 201630 }, { "epoch": 2.23, "learning_rate": 1.2789182685209791e-05, "loss": 0.5952, "step": 201635 }, { "epoch": 2.23, "learning_rate": 1.2788259958071281e-05, "loss": 0.5691, "step": 201640 }, { "epoch": 2.23, "learning_rate": 1.2787337230932767e-05, "loss": 0.6003, "step": 201645 }, { "epoch": 2.23, "learning_rate": 1.2786414503794255e-05, "loss": 0.5769, "step": 201650 }, { "epoch": 2.23, "learning_rate": 1.2785491776655741e-05, "loss": 0.6089, "step": 201655 }, { "epoch": 2.23, "learning_rate": 1.278456904951723e-05, "loss": 0.5905, "step": 201660 }, { "epoch": 2.23, "learning_rate": 1.2783646322378717e-05, "loss": 0.5959, "step": 201665 }, { "epoch": 2.23, "learning_rate": 1.2782723595240203e-05, "loss": 0.5875, "step": 201670 }, { "epoch": 2.23, "learning_rate": 1.2781800868101693e-05, "loss": 0.583, "step": 201675 }, { "epoch": 2.23, "learning_rate": 1.2780878140963179e-05, "loss": 0.6133, "step": 201680 }, { "epoch": 2.23, "learning_rate": 1.2779955413824668e-05, "loss": 0.5732, "step": 201685 }, { "epoch": 2.23, "learning_rate": 1.2779032686686154e-05, "loss": 0.586, "step": 201690 }, { "epoch": 2.23, "learning_rate": 1.2778109959547644e-05, "loss": 0.5878, "step": 201695 }, { "epoch": 2.23, "learning_rate": 1.277718723240913e-05, "loss": 0.5832, "step": 201700 }, { "epoch": 2.23, "learning_rate": 1.2776264505270618e-05, "loss": 0.6417, "step": 201705 }, { "epoch": 2.23, "learning_rate": 1.2775341778132104e-05, "loss": 0.6111, "step": 201710 }, { "epoch": 2.23, "learning_rate": 1.2774419050993594e-05, "loss": 0.5239, "step": 201715 }, { "epoch": 2.23, "learning_rate": 1.277349632385508e-05, "loss": 0.5707, "step": 201720 }, { "epoch": 2.23, "learning_rate": 1.277257359671657e-05, "loss": 0.6492, "step": 201725 }, { "epoch": 2.23, "learning_rate": 1.2771650869578056e-05, "loss": 0.6263, "step": 201730 }, { "epoch": 2.23, "learning_rate": 1.2770728142439545e-05, "loss": 0.5169, "step": 201735 }, { "epoch": 2.23, "learning_rate": 1.2769805415301031e-05, "loss": 0.5568, "step": 201740 }, { "epoch": 2.23, "learning_rate": 1.2768882688162517e-05, "loss": 0.6784, "step": 201745 }, { "epoch": 2.23, "learning_rate": 1.2767959961024007e-05, "loss": 0.6396, "step": 201750 }, { "epoch": 2.23, "learning_rate": 1.2767037233885493e-05, "loss": 0.6001, "step": 201755 }, { "epoch": 2.23, "learning_rate": 1.2766114506746981e-05, "loss": 0.5778, "step": 201760 }, { "epoch": 2.23, "learning_rate": 1.2765191779608469e-05, "loss": 0.5538, "step": 201765 }, { "epoch": 2.23, "learning_rate": 1.2764269052469957e-05, "loss": 0.6052, "step": 201770 }, { "epoch": 2.23, "learning_rate": 1.2763346325331443e-05, "loss": 0.5978, "step": 201775 }, { "epoch": 2.23, "learning_rate": 1.2762423598192932e-05, "loss": 0.6392, "step": 201780 }, { "epoch": 2.23, "learning_rate": 1.2761500871054418e-05, "loss": 0.6477, "step": 201785 }, { "epoch": 2.23, "learning_rate": 1.2760578143915908e-05, "loss": 0.5749, "step": 201790 }, { "epoch": 2.23, "learning_rate": 1.2759655416777394e-05, "loss": 0.5986, "step": 201795 }, { "epoch": 2.23, "learning_rate": 1.2758732689638884e-05, "loss": 0.615, "step": 201800 }, { "epoch": 2.23, "learning_rate": 1.275780996250037e-05, "loss": 0.5883, "step": 201805 }, { "epoch": 2.23, "learning_rate": 1.2756887235361858e-05, "loss": 0.588, "step": 201810 }, { "epoch": 2.23, "learning_rate": 1.2755964508223346e-05, "loss": 0.5772, "step": 201815 }, { "epoch": 2.23, "learning_rate": 1.2755041781084833e-05, "loss": 0.5854, "step": 201820 }, { "epoch": 2.23, "learning_rate": 1.275411905394632e-05, "loss": 0.5853, "step": 201825 }, { "epoch": 2.23, "learning_rate": 1.2753196326807806e-05, "loss": 0.5821, "step": 201830 }, { "epoch": 2.23, "learning_rate": 1.2752273599669295e-05, "loss": 0.6105, "step": 201835 }, { "epoch": 2.23, "learning_rate": 1.2751350872530781e-05, "loss": 0.6088, "step": 201840 }, { "epoch": 2.23, "learning_rate": 1.2750428145392271e-05, "loss": 0.642, "step": 201845 }, { "epoch": 2.24, "learning_rate": 1.2749505418253757e-05, "loss": 0.6199, "step": 201850 }, { "epoch": 2.24, "learning_rate": 1.2748582691115247e-05, "loss": 0.5644, "step": 201855 }, { "epoch": 2.24, "learning_rate": 1.2747659963976733e-05, "loss": 0.6535, "step": 201860 }, { "epoch": 2.24, "learning_rate": 1.274673723683822e-05, "loss": 0.5798, "step": 201865 }, { "epoch": 2.24, "learning_rate": 1.2745814509699709e-05, "loss": 0.6105, "step": 201870 }, { "epoch": 2.24, "learning_rate": 1.2744891782561196e-05, "loss": 0.579, "step": 201875 }, { "epoch": 2.24, "learning_rate": 1.2743969055422683e-05, "loss": 0.5748, "step": 201880 }, { "epoch": 2.24, "learning_rate": 1.2743046328284172e-05, "loss": 0.6182, "step": 201885 }, { "epoch": 2.24, "learning_rate": 1.2742123601145658e-05, "loss": 0.5746, "step": 201890 }, { "epoch": 2.24, "learning_rate": 1.2741200874007148e-05, "loss": 0.5539, "step": 201895 }, { "epoch": 2.24, "learning_rate": 1.2740278146868634e-05, "loss": 0.6059, "step": 201900 }, { "epoch": 2.24, "learning_rate": 1.273935541973012e-05, "loss": 0.6527, "step": 201905 }, { "epoch": 2.24, "learning_rate": 1.273843269259161e-05, "loss": 0.6022, "step": 201910 }, { "epoch": 2.24, "learning_rate": 1.2737509965453096e-05, "loss": 0.6114, "step": 201915 }, { "epoch": 2.24, "learning_rate": 1.2736587238314585e-05, "loss": 0.5603, "step": 201920 }, { "epoch": 2.24, "learning_rate": 1.2735664511176071e-05, "loss": 0.5487, "step": 201925 }, { "epoch": 2.24, "learning_rate": 1.273474178403756e-05, "loss": 0.5509, "step": 201930 }, { "epoch": 2.24, "learning_rate": 1.2733819056899045e-05, "loss": 0.5699, "step": 201935 }, { "epoch": 2.24, "learning_rate": 1.2732896329760535e-05, "loss": 0.6014, "step": 201940 }, { "epoch": 2.24, "learning_rate": 1.2731973602622021e-05, "loss": 0.6249, "step": 201945 }, { "epoch": 2.24, "learning_rate": 1.273105087548351e-05, "loss": 0.6226, "step": 201950 }, { "epoch": 2.24, "learning_rate": 1.2730128148344997e-05, "loss": 0.5648, "step": 201955 }, { "epoch": 2.24, "learning_rate": 1.2729205421206486e-05, "loss": 0.6241, "step": 201960 }, { "epoch": 2.24, "learning_rate": 1.2728282694067973e-05, "loss": 0.5873, "step": 201965 }, { "epoch": 2.24, "learning_rate": 1.2727359966929462e-05, "loss": 0.5545, "step": 201970 }, { "epoch": 2.24, "learning_rate": 1.2726437239790948e-05, "loss": 0.5699, "step": 201975 }, { "epoch": 2.24, "learning_rate": 1.2725514512652434e-05, "loss": 0.6157, "step": 201980 }, { "epoch": 2.24, "learning_rate": 1.2724591785513922e-05, "loss": 0.6178, "step": 201985 }, { "epoch": 2.24, "learning_rate": 1.2723669058375408e-05, "loss": 0.5521, "step": 201990 }, { "epoch": 2.24, "learning_rate": 1.2722746331236898e-05, "loss": 0.582, "step": 201995 }, { "epoch": 2.24, "learning_rate": 1.2721823604098384e-05, "loss": 0.6222, "step": 202000 }, { "epoch": 2.24, "eval_loss": 0.5660519599914551, "eval_runtime": 69.3439, "eval_samples_per_second": 28.842, "eval_steps_per_second": 14.421, "step": 202000 }, { "epoch": 2.24, "learning_rate": 1.2720900876959874e-05, "loss": 0.5971, "step": 202005 }, { "epoch": 2.24, "learning_rate": 1.271997814982136e-05, "loss": 0.5902, "step": 202010 }, { "epoch": 2.24, "learning_rate": 1.271905542268285e-05, "loss": 0.5789, "step": 202015 }, { "epoch": 2.24, "learning_rate": 1.2718132695544336e-05, "loss": 0.5884, "step": 202020 }, { "epoch": 2.24, "learning_rate": 1.2717209968405825e-05, "loss": 0.6161, "step": 202025 }, { "epoch": 2.24, "learning_rate": 1.2716287241267311e-05, "loss": 0.5782, "step": 202030 }, { "epoch": 2.24, "learning_rate": 1.2715364514128799e-05, "loss": 0.6037, "step": 202035 }, { "epoch": 2.24, "learning_rate": 1.2714441786990285e-05, "loss": 0.6088, "step": 202040 }, { "epoch": 2.24, "learning_rate": 1.2713519059851775e-05, "loss": 0.6188, "step": 202045 }, { "epoch": 2.24, "learning_rate": 1.2712596332713261e-05, "loss": 0.6029, "step": 202050 }, { "epoch": 2.24, "learning_rate": 1.2711673605574747e-05, "loss": 0.608, "step": 202055 }, { "epoch": 2.24, "learning_rate": 1.2710750878436237e-05, "loss": 0.5697, "step": 202060 }, { "epoch": 2.24, "learning_rate": 1.2709828151297723e-05, "loss": 0.609, "step": 202065 }, { "epoch": 2.24, "learning_rate": 1.2708905424159212e-05, "loss": 0.622, "step": 202070 }, { "epoch": 2.24, "learning_rate": 1.2707982697020698e-05, "loss": 0.5659, "step": 202075 }, { "epoch": 2.24, "learning_rate": 1.2707059969882188e-05, "loss": 0.6046, "step": 202080 }, { "epoch": 2.24, "learning_rate": 1.2706137242743674e-05, "loss": 0.6074, "step": 202085 }, { "epoch": 2.24, "learning_rate": 1.2705214515605162e-05, "loss": 0.5785, "step": 202090 }, { "epoch": 2.24, "learning_rate": 1.270429178846665e-05, "loss": 0.6325, "step": 202095 }, { "epoch": 2.24, "learning_rate": 1.2703369061328138e-05, "loss": 0.6299, "step": 202100 }, { "epoch": 2.24, "learning_rate": 1.2702446334189624e-05, "loss": 0.5545, "step": 202105 }, { "epoch": 2.24, "learning_rate": 1.2701523607051113e-05, "loss": 0.6157, "step": 202110 }, { "epoch": 2.24, "learning_rate": 1.27006008799126e-05, "loss": 0.5728, "step": 202115 }, { "epoch": 2.24, "learning_rate": 1.2699678152774089e-05, "loss": 0.6303, "step": 202120 }, { "epoch": 2.24, "learning_rate": 1.2698755425635575e-05, "loss": 0.6366, "step": 202125 }, { "epoch": 2.24, "learning_rate": 1.2697832698497061e-05, "loss": 0.5709, "step": 202130 }, { "epoch": 2.24, "learning_rate": 1.2696909971358551e-05, "loss": 0.6083, "step": 202135 }, { "epoch": 2.24, "learning_rate": 1.2695987244220037e-05, "loss": 0.5526, "step": 202140 }, { "epoch": 2.24, "learning_rate": 1.2695064517081525e-05, "loss": 0.6289, "step": 202145 }, { "epoch": 2.24, "learning_rate": 1.2694141789943013e-05, "loss": 0.6561, "step": 202150 }, { "epoch": 2.24, "learning_rate": 1.26932190628045e-05, "loss": 0.6203, "step": 202155 }, { "epoch": 2.24, "learning_rate": 1.2692296335665987e-05, "loss": 0.5719, "step": 202160 }, { "epoch": 2.24, "learning_rate": 1.2691373608527476e-05, "loss": 0.6332, "step": 202165 }, { "epoch": 2.24, "learning_rate": 1.2690450881388963e-05, "loss": 0.5866, "step": 202170 }, { "epoch": 2.24, "learning_rate": 1.2689528154250452e-05, "loss": 0.6386, "step": 202175 }, { "epoch": 2.24, "learning_rate": 1.2688605427111938e-05, "loss": 0.6177, "step": 202180 }, { "epoch": 2.24, "learning_rate": 1.2687682699973428e-05, "loss": 0.6137, "step": 202185 }, { "epoch": 2.24, "learning_rate": 1.2686759972834914e-05, "loss": 0.5992, "step": 202190 }, { "epoch": 2.24, "learning_rate": 1.2685837245696402e-05, "loss": 0.5554, "step": 202195 }, { "epoch": 2.24, "learning_rate": 1.268491451855789e-05, "loss": 0.6528, "step": 202200 }, { "epoch": 2.24, "learning_rate": 1.2683991791419376e-05, "loss": 0.5598, "step": 202205 }, { "epoch": 2.24, "learning_rate": 1.2683069064280864e-05, "loss": 0.6107, "step": 202210 }, { "epoch": 2.24, "learning_rate": 1.268214633714235e-05, "loss": 0.5968, "step": 202215 }, { "epoch": 2.24, "learning_rate": 1.268122361000384e-05, "loss": 0.6162, "step": 202220 }, { "epoch": 2.24, "learning_rate": 1.2680300882865325e-05, "loss": 0.6159, "step": 202225 }, { "epoch": 2.24, "learning_rate": 1.2679378155726815e-05, "loss": 0.5582, "step": 202230 }, { "epoch": 2.24, "learning_rate": 1.2678455428588301e-05, "loss": 0.5701, "step": 202235 }, { "epoch": 2.24, "learning_rate": 1.267753270144979e-05, "loss": 0.5731, "step": 202240 }, { "epoch": 2.24, "learning_rate": 1.2676609974311277e-05, "loss": 0.6014, "step": 202245 }, { "epoch": 2.24, "learning_rate": 1.2675687247172766e-05, "loss": 0.5876, "step": 202250 }, { "epoch": 2.24, "learning_rate": 1.2674764520034253e-05, "loss": 0.6072, "step": 202255 }, { "epoch": 2.24, "learning_rate": 1.267384179289574e-05, "loss": 0.5494, "step": 202260 }, { "epoch": 2.24, "learning_rate": 1.2672919065757227e-05, "loss": 0.5744, "step": 202265 }, { "epoch": 2.24, "learning_rate": 1.2671996338618716e-05, "loss": 0.6105, "step": 202270 }, { "epoch": 2.24, "learning_rate": 1.2671073611480202e-05, "loss": 0.6037, "step": 202275 }, { "epoch": 2.24, "learning_rate": 1.2670150884341688e-05, "loss": 0.5783, "step": 202280 }, { "epoch": 2.24, "learning_rate": 1.2669228157203178e-05, "loss": 0.6046, "step": 202285 }, { "epoch": 2.24, "learning_rate": 1.2668305430064664e-05, "loss": 0.533, "step": 202290 }, { "epoch": 2.24, "learning_rate": 1.2667382702926154e-05, "loss": 0.5822, "step": 202295 }, { "epoch": 2.24, "learning_rate": 1.266645997578764e-05, "loss": 0.6128, "step": 202300 }, { "epoch": 2.24, "learning_rate": 1.266553724864913e-05, "loss": 0.5877, "step": 202305 }, { "epoch": 2.24, "learning_rate": 1.2664614521510615e-05, "loss": 0.6141, "step": 202310 }, { "epoch": 2.24, "learning_rate": 1.2663691794372103e-05, "loss": 0.5743, "step": 202315 }, { "epoch": 2.24, "learning_rate": 1.266276906723359e-05, "loss": 0.5873, "step": 202320 }, { "epoch": 2.24, "learning_rate": 1.2661846340095079e-05, "loss": 0.6412, "step": 202325 }, { "epoch": 2.24, "learning_rate": 1.2660923612956565e-05, "loss": 0.5433, "step": 202330 }, { "epoch": 2.24, "learning_rate": 1.2660000885818055e-05, "loss": 0.5878, "step": 202335 }, { "epoch": 2.24, "learning_rate": 1.2659078158679541e-05, "loss": 0.5701, "step": 202340 }, { "epoch": 2.24, "learning_rate": 1.265815543154103e-05, "loss": 0.5935, "step": 202345 }, { "epoch": 2.24, "learning_rate": 1.2657232704402517e-05, "loss": 0.5925, "step": 202350 }, { "epoch": 2.24, "learning_rate": 1.2656309977264003e-05, "loss": 0.5629, "step": 202355 }, { "epoch": 2.24, "learning_rate": 1.2655387250125492e-05, "loss": 0.6437, "step": 202360 }, { "epoch": 2.24, "learning_rate": 1.2654464522986978e-05, "loss": 0.5961, "step": 202365 }, { "epoch": 2.24, "learning_rate": 1.2653541795848466e-05, "loss": 0.6134, "step": 202370 }, { "epoch": 2.24, "learning_rate": 1.2652619068709952e-05, "loss": 0.5687, "step": 202375 }, { "epoch": 2.24, "learning_rate": 1.2651696341571442e-05, "loss": 0.6337, "step": 202380 }, { "epoch": 2.24, "learning_rate": 1.2650773614432928e-05, "loss": 0.5759, "step": 202385 }, { "epoch": 2.24, "learning_rate": 1.2649850887294418e-05, "loss": 0.557, "step": 202390 }, { "epoch": 2.24, "learning_rate": 1.2648928160155904e-05, "loss": 0.5822, "step": 202395 }, { "epoch": 2.24, "learning_rate": 1.2648005433017393e-05, "loss": 0.6591, "step": 202400 }, { "epoch": 2.24, "learning_rate": 1.264708270587888e-05, "loss": 0.625, "step": 202405 }, { "epoch": 2.24, "learning_rate": 1.2646159978740369e-05, "loss": 0.6109, "step": 202410 }, { "epoch": 2.24, "learning_rate": 1.2645237251601855e-05, "loss": 0.5836, "step": 202415 }, { "epoch": 2.24, "learning_rate": 1.2644314524463343e-05, "loss": 0.601, "step": 202420 }, { "epoch": 2.24, "learning_rate": 1.264339179732483e-05, "loss": 0.587, "step": 202425 }, { "epoch": 2.24, "learning_rate": 1.2642469070186317e-05, "loss": 0.5766, "step": 202430 }, { "epoch": 2.24, "learning_rate": 1.2641546343047805e-05, "loss": 0.6012, "step": 202435 }, { "epoch": 2.24, "learning_rate": 1.2640623615909291e-05, "loss": 0.6013, "step": 202440 }, { "epoch": 2.24, "learning_rate": 1.263970088877078e-05, "loss": 0.6162, "step": 202445 }, { "epoch": 2.24, "learning_rate": 1.2638778161632267e-05, "loss": 0.5925, "step": 202450 }, { "epoch": 2.24, "learning_rate": 1.2637855434493756e-05, "loss": 0.5687, "step": 202455 }, { "epoch": 2.24, "learning_rate": 1.2636932707355242e-05, "loss": 0.5678, "step": 202460 }, { "epoch": 2.24, "learning_rate": 1.2636009980216732e-05, "loss": 0.626, "step": 202465 }, { "epoch": 2.24, "learning_rate": 1.2635087253078218e-05, "loss": 0.6256, "step": 202470 }, { "epoch": 2.24, "learning_rate": 1.2634164525939706e-05, "loss": 0.5911, "step": 202475 }, { "epoch": 2.24, "learning_rate": 1.2633241798801194e-05, "loss": 0.6078, "step": 202480 }, { "epoch": 2.24, "learning_rate": 1.2632319071662682e-05, "loss": 0.6497, "step": 202485 }, { "epoch": 2.24, "learning_rate": 1.2631396344524168e-05, "loss": 0.6406, "step": 202490 }, { "epoch": 2.24, "learning_rate": 1.2630473617385657e-05, "loss": 0.553, "step": 202495 }, { "epoch": 2.24, "learning_rate": 1.2629550890247144e-05, "loss": 0.5237, "step": 202500 }, { "epoch": 2.24, "learning_rate": 1.262862816310863e-05, "loss": 0.5731, "step": 202505 }, { "epoch": 2.24, "learning_rate": 1.262770543597012e-05, "loss": 0.5802, "step": 202510 }, { "epoch": 2.24, "learning_rate": 1.2626782708831605e-05, "loss": 0.6107, "step": 202515 }, { "epoch": 2.24, "learning_rate": 1.2625859981693095e-05, "loss": 0.5847, "step": 202520 }, { "epoch": 2.24, "learning_rate": 1.2624937254554581e-05, "loss": 0.5996, "step": 202525 }, { "epoch": 2.24, "learning_rate": 1.262401452741607e-05, "loss": 0.5935, "step": 202530 }, { "epoch": 2.24, "learning_rate": 1.2623091800277557e-05, "loss": 0.5183, "step": 202535 }, { "epoch": 2.24, "learning_rate": 1.2622169073139045e-05, "loss": 0.5719, "step": 202540 }, { "epoch": 2.24, "learning_rate": 1.262124634600053e-05, "loss": 0.5912, "step": 202545 }, { "epoch": 2.24, "learning_rate": 1.262032361886202e-05, "loss": 0.5972, "step": 202550 }, { "epoch": 2.24, "learning_rate": 1.2619400891723507e-05, "loss": 0.607, "step": 202555 }, { "epoch": 2.24, "learning_rate": 1.2618478164584996e-05, "loss": 0.5957, "step": 202560 }, { "epoch": 2.24, "learning_rate": 1.2617555437446482e-05, "loss": 0.6468, "step": 202565 }, { "epoch": 2.24, "learning_rate": 1.2616632710307972e-05, "loss": 0.5573, "step": 202570 }, { "epoch": 2.24, "learning_rate": 1.2615709983169458e-05, "loss": 0.5926, "step": 202575 }, { "epoch": 2.24, "learning_rate": 1.2614787256030944e-05, "loss": 0.5791, "step": 202580 }, { "epoch": 2.24, "learning_rate": 1.2613864528892434e-05, "loss": 0.6433, "step": 202585 }, { "epoch": 2.24, "learning_rate": 1.261294180175392e-05, "loss": 0.6082, "step": 202590 }, { "epoch": 2.24, "learning_rate": 1.2612019074615408e-05, "loss": 0.5861, "step": 202595 }, { "epoch": 2.24, "learning_rate": 1.2611096347476894e-05, "loss": 0.6172, "step": 202600 }, { "epoch": 2.24, "learning_rate": 1.2610173620338383e-05, "loss": 0.5829, "step": 202605 }, { "epoch": 2.24, "learning_rate": 1.260925089319987e-05, "loss": 0.5717, "step": 202610 }, { "epoch": 2.24, "learning_rate": 1.2608328166061359e-05, "loss": 0.591, "step": 202615 }, { "epoch": 2.24, "learning_rate": 1.2607405438922845e-05, "loss": 0.59, "step": 202620 }, { "epoch": 2.24, "learning_rate": 1.2606482711784335e-05, "loss": 0.5835, "step": 202625 }, { "epoch": 2.24, "learning_rate": 1.2605559984645821e-05, "loss": 0.576, "step": 202630 }, { "epoch": 2.24, "learning_rate": 1.260463725750731e-05, "loss": 0.6172, "step": 202635 }, { "epoch": 2.24, "learning_rate": 1.2603714530368797e-05, "loss": 0.5848, "step": 202640 }, { "epoch": 2.24, "learning_rate": 1.2602791803230284e-05, "loss": 0.5752, "step": 202645 }, { "epoch": 2.24, "learning_rate": 1.260186907609177e-05, "loss": 0.5606, "step": 202650 }, { "epoch": 2.24, "learning_rate": 1.260094634895326e-05, "loss": 0.6181, "step": 202655 }, { "epoch": 2.24, "learning_rate": 1.2600023621814746e-05, "loss": 0.5759, "step": 202660 }, { "epoch": 2.24, "learning_rate": 1.2599100894676232e-05, "loss": 0.56, "step": 202665 }, { "epoch": 2.24, "learning_rate": 1.2598178167537722e-05, "loss": 0.6314, "step": 202670 }, { "epoch": 2.24, "learning_rate": 1.2597255440399208e-05, "loss": 0.5589, "step": 202675 }, { "epoch": 2.24, "learning_rate": 1.2596332713260698e-05, "loss": 0.5877, "step": 202680 }, { "epoch": 2.24, "learning_rate": 1.2595409986122184e-05, "loss": 0.6102, "step": 202685 }, { "epoch": 2.24, "learning_rate": 1.2594487258983673e-05, "loss": 0.5321, "step": 202690 }, { "epoch": 2.24, "learning_rate": 1.259356453184516e-05, "loss": 0.6023, "step": 202695 }, { "epoch": 2.24, "learning_rate": 1.2592641804706647e-05, "loss": 0.6395, "step": 202700 }, { "epoch": 2.24, "learning_rate": 1.2591719077568134e-05, "loss": 0.5519, "step": 202705 }, { "epoch": 2.24, "learning_rate": 1.2590796350429623e-05, "loss": 0.608, "step": 202710 }, { "epoch": 2.24, "learning_rate": 1.258987362329111e-05, "loss": 0.6161, "step": 202715 }, { "epoch": 2.24, "learning_rate": 1.2588950896152599e-05, "loss": 0.5738, "step": 202720 }, { "epoch": 2.24, "learning_rate": 1.2588028169014085e-05, "loss": 0.5242, "step": 202725 }, { "epoch": 2.24, "learning_rate": 1.2587105441875574e-05, "loss": 0.6258, "step": 202730 }, { "epoch": 2.24, "learning_rate": 1.258618271473706e-05, "loss": 0.6183, "step": 202735 }, { "epoch": 2.24, "learning_rate": 1.2585259987598547e-05, "loss": 0.6114, "step": 202740 }, { "epoch": 2.24, "learning_rate": 1.2584337260460036e-05, "loss": 0.6375, "step": 202745 }, { "epoch": 2.24, "learning_rate": 1.2583414533321522e-05, "loss": 0.646, "step": 202750 }, { "epoch": 2.25, "learning_rate": 1.258249180618301e-05, "loss": 0.5276, "step": 202755 }, { "epoch": 2.25, "learning_rate": 1.2581569079044498e-05, "loss": 0.6168, "step": 202760 }, { "epoch": 2.25, "learning_rate": 1.2580646351905986e-05, "loss": 0.6046, "step": 202765 }, { "epoch": 2.25, "learning_rate": 1.2579723624767472e-05, "loss": 0.5841, "step": 202770 }, { "epoch": 2.25, "learning_rate": 1.2578800897628962e-05, "loss": 0.5449, "step": 202775 }, { "epoch": 2.25, "learning_rate": 1.2577878170490448e-05, "loss": 0.5515, "step": 202780 }, { "epoch": 2.25, "learning_rate": 1.2576955443351937e-05, "loss": 0.573, "step": 202785 }, { "epoch": 2.25, "learning_rate": 1.2576032716213424e-05, "loss": 0.6077, "step": 202790 }, { "epoch": 2.25, "learning_rate": 1.2575109989074913e-05, "loss": 0.5968, "step": 202795 }, { "epoch": 2.25, "learning_rate": 1.25741872619364e-05, "loss": 0.646, "step": 202800 }, { "epoch": 2.25, "learning_rate": 1.2573264534797887e-05, "loss": 0.603, "step": 202805 }, { "epoch": 2.25, "learning_rate": 1.2572341807659373e-05, "loss": 0.5785, "step": 202810 }, { "epoch": 2.25, "learning_rate": 1.2571419080520861e-05, "loss": 0.6464, "step": 202815 }, { "epoch": 2.25, "learning_rate": 1.2570496353382349e-05, "loss": 0.591, "step": 202820 }, { "epoch": 2.25, "learning_rate": 1.2569573626243835e-05, "loss": 0.5942, "step": 202825 }, { "epoch": 2.25, "learning_rate": 1.2568650899105325e-05, "loss": 0.6264, "step": 202830 }, { "epoch": 2.25, "learning_rate": 1.256772817196681e-05, "loss": 0.5509, "step": 202835 }, { "epoch": 2.25, "learning_rate": 1.25668054448283e-05, "loss": 0.6179, "step": 202840 }, { "epoch": 2.25, "learning_rate": 1.2565882717689786e-05, "loss": 0.5787, "step": 202845 }, { "epoch": 2.25, "learning_rate": 1.2564959990551276e-05, "loss": 0.5701, "step": 202850 }, { "epoch": 2.25, "learning_rate": 1.2564037263412762e-05, "loss": 0.5748, "step": 202855 }, { "epoch": 2.25, "learning_rate": 1.256311453627425e-05, "loss": 0.6388, "step": 202860 }, { "epoch": 2.25, "learning_rate": 1.2562191809135738e-05, "loss": 0.6398, "step": 202865 }, { "epoch": 2.25, "learning_rate": 1.2561269081997226e-05, "loss": 0.5607, "step": 202870 }, { "epoch": 2.25, "learning_rate": 1.2560346354858712e-05, "loss": 0.5814, "step": 202875 }, { "epoch": 2.25, "learning_rate": 1.2559423627720201e-05, "loss": 0.6236, "step": 202880 }, { "epoch": 2.25, "learning_rate": 1.2558500900581688e-05, "loss": 0.6272, "step": 202885 }, { "epoch": 2.25, "learning_rate": 1.2557578173443174e-05, "loss": 0.6676, "step": 202890 }, { "epoch": 2.25, "learning_rate": 1.2556655446304663e-05, "loss": 0.6232, "step": 202895 }, { "epoch": 2.25, "learning_rate": 1.255573271916615e-05, "loss": 0.5573, "step": 202900 }, { "epoch": 2.25, "learning_rate": 1.2554809992027639e-05, "loss": 0.5672, "step": 202905 }, { "epoch": 2.25, "learning_rate": 1.2553887264889125e-05, "loss": 0.586, "step": 202910 }, { "epoch": 2.25, "learning_rate": 1.2552964537750615e-05, "loss": 0.6181, "step": 202915 }, { "epoch": 2.25, "learning_rate": 1.25520418106121e-05, "loss": 0.5839, "step": 202920 }, { "epoch": 2.25, "learning_rate": 1.2551119083473589e-05, "loss": 0.633, "step": 202925 }, { "epoch": 2.25, "learning_rate": 1.2550196356335075e-05, "loss": 0.5894, "step": 202930 }, { "epoch": 2.25, "learning_rate": 1.2549273629196564e-05, "loss": 0.5751, "step": 202935 }, { "epoch": 2.25, "learning_rate": 1.254835090205805e-05, "loss": 0.5985, "step": 202940 }, { "epoch": 2.25, "learning_rate": 1.254742817491954e-05, "loss": 0.5989, "step": 202945 }, { "epoch": 2.25, "learning_rate": 1.2546505447781026e-05, "loss": 0.6828, "step": 202950 }, { "epoch": 2.25, "learning_rate": 1.2545582720642516e-05, "loss": 0.6331, "step": 202955 }, { "epoch": 2.25, "learning_rate": 1.2544659993504002e-05, "loss": 0.6117, "step": 202960 }, { "epoch": 2.25, "learning_rate": 1.2543737266365488e-05, "loss": 0.6048, "step": 202965 }, { "epoch": 2.25, "learning_rate": 1.2542814539226978e-05, "loss": 0.5861, "step": 202970 }, { "epoch": 2.25, "learning_rate": 1.2541891812088464e-05, "loss": 0.5901, "step": 202975 }, { "epoch": 2.25, "learning_rate": 1.2540969084949952e-05, "loss": 0.5924, "step": 202980 }, { "epoch": 2.25, "learning_rate": 1.2540046357811438e-05, "loss": 0.6085, "step": 202985 }, { "epoch": 2.25, "learning_rate": 1.2539123630672927e-05, "loss": 0.561, "step": 202990 }, { "epoch": 2.25, "learning_rate": 1.2538200903534413e-05, "loss": 0.6184, "step": 202995 }, { "epoch": 2.25, "learning_rate": 1.2537278176395903e-05, "loss": 0.5597, "step": 203000 }, { "epoch": 2.25, "eval_loss": 0.5785530805587769, "eval_runtime": 69.1537, "eval_samples_per_second": 28.921, "eval_steps_per_second": 14.461, "step": 203000 }, { "epoch": 2.25, "learning_rate": 1.253635544925739e-05, "loss": 0.5915, "step": 203005 }, { "epoch": 2.25, "learning_rate": 1.2535432722118879e-05, "loss": 0.5886, "step": 203010 }, { "epoch": 2.25, "learning_rate": 1.2534509994980365e-05, "loss": 0.612, "step": 203015 }, { "epoch": 2.25, "learning_rate": 1.2533587267841854e-05, "loss": 0.6128, "step": 203020 }, { "epoch": 2.25, "learning_rate": 1.253266454070334e-05, "loss": 0.6048, "step": 203025 }, { "epoch": 2.25, "learning_rate": 1.2531741813564828e-05, "loss": 0.5603, "step": 203030 }, { "epoch": 2.25, "learning_rate": 1.2530819086426315e-05, "loss": 0.6385, "step": 203035 }, { "epoch": 2.25, "learning_rate": 1.2529896359287802e-05, "loss": 0.6386, "step": 203040 }, { "epoch": 2.25, "learning_rate": 1.252897363214929e-05, "loss": 0.6335, "step": 203045 }, { "epoch": 2.25, "learning_rate": 1.2528050905010776e-05, "loss": 0.5927, "step": 203050 }, { "epoch": 2.25, "learning_rate": 1.2527128177872266e-05, "loss": 0.4933, "step": 203055 }, { "epoch": 2.25, "learning_rate": 1.2526205450733752e-05, "loss": 0.5438, "step": 203060 }, { "epoch": 2.25, "learning_rate": 1.2525282723595242e-05, "loss": 0.6153, "step": 203065 }, { "epoch": 2.25, "learning_rate": 1.2524359996456728e-05, "loss": 0.5839, "step": 203070 }, { "epoch": 2.25, "learning_rate": 1.2523437269318217e-05, "loss": 0.5679, "step": 203075 }, { "epoch": 2.25, "learning_rate": 1.2522514542179704e-05, "loss": 0.5608, "step": 203080 }, { "epoch": 2.25, "learning_rate": 1.2521591815041191e-05, "loss": 0.6097, "step": 203085 }, { "epoch": 2.25, "learning_rate": 1.2520669087902678e-05, "loss": 0.5923, "step": 203090 }, { "epoch": 2.25, "learning_rate": 1.2519746360764167e-05, "loss": 0.6146, "step": 203095 }, { "epoch": 2.25, "learning_rate": 1.2518823633625653e-05, "loss": 0.5811, "step": 203100 }, { "epoch": 2.25, "learning_rate": 1.2517900906487143e-05, "loss": 0.5629, "step": 203105 }, { "epoch": 2.25, "learning_rate": 1.2516978179348629e-05, "loss": 0.6288, "step": 203110 }, { "epoch": 2.25, "learning_rate": 1.2516055452210115e-05, "loss": 0.5675, "step": 203115 }, { "epoch": 2.25, "learning_rate": 1.2515132725071605e-05, "loss": 0.6092, "step": 203120 }, { "epoch": 2.25, "learning_rate": 1.251420999793309e-05, "loss": 0.6104, "step": 203125 }, { "epoch": 2.25, "learning_rate": 1.251328727079458e-05, "loss": 0.6394, "step": 203130 }, { "epoch": 2.25, "learning_rate": 1.2512364543656066e-05, "loss": 0.5725, "step": 203135 }, { "epoch": 2.25, "learning_rate": 1.2511441816517554e-05, "loss": 0.565, "step": 203140 }, { "epoch": 2.25, "learning_rate": 1.2510519089379042e-05, "loss": 0.5995, "step": 203145 }, { "epoch": 2.25, "learning_rate": 1.250959636224053e-05, "loss": 0.6197, "step": 203150 }, { "epoch": 2.25, "learning_rate": 1.2508673635102016e-05, "loss": 0.5685, "step": 203155 }, { "epoch": 2.25, "learning_rate": 1.2507750907963506e-05, "loss": 0.6079, "step": 203160 }, { "epoch": 2.25, "learning_rate": 1.2506828180824992e-05, "loss": 0.5752, "step": 203165 }, { "epoch": 2.25, "learning_rate": 1.2505905453686481e-05, "loss": 0.6153, "step": 203170 }, { "epoch": 2.25, "learning_rate": 1.2504982726547968e-05, "loss": 0.6646, "step": 203175 }, { "epoch": 2.25, "learning_rate": 1.2504059999409457e-05, "loss": 0.5725, "step": 203180 }, { "epoch": 2.25, "learning_rate": 1.2503137272270943e-05, "loss": 0.605, "step": 203185 }, { "epoch": 2.25, "learning_rate": 1.250221454513243e-05, "loss": 0.5682, "step": 203190 }, { "epoch": 2.25, "learning_rate": 1.2501291817993919e-05, "loss": 0.5647, "step": 203195 }, { "epoch": 2.25, "learning_rate": 1.2500369090855405e-05, "loss": 0.6745, "step": 203200 }, { "epoch": 2.25, "learning_rate": 1.2499446363716893e-05, "loss": 0.5911, "step": 203205 }, { "epoch": 2.25, "learning_rate": 1.249852363657838e-05, "loss": 0.6693, "step": 203210 }, { "epoch": 2.25, "learning_rate": 1.2497600909439869e-05, "loss": 0.6224, "step": 203215 }, { "epoch": 2.25, "learning_rate": 1.2496678182301357e-05, "loss": 0.5748, "step": 203220 }, { "epoch": 2.25, "learning_rate": 1.2495755455162843e-05, "loss": 0.5807, "step": 203225 }, { "epoch": 2.25, "learning_rate": 1.249483272802433e-05, "loss": 0.571, "step": 203230 }, { "epoch": 2.25, "learning_rate": 1.2493910000885818e-05, "loss": 0.606, "step": 203235 }, { "epoch": 2.25, "learning_rate": 1.2492987273747306e-05, "loss": 0.5484, "step": 203240 }, { "epoch": 2.25, "learning_rate": 1.2492064546608794e-05, "loss": 0.6639, "step": 203245 }, { "epoch": 2.25, "learning_rate": 1.2491141819470282e-05, "loss": 0.5774, "step": 203250 }, { "epoch": 2.25, "learning_rate": 1.2490219092331768e-05, "loss": 0.6039, "step": 203255 }, { "epoch": 2.25, "learning_rate": 1.2489296365193256e-05, "loss": 0.588, "step": 203260 }, { "epoch": 2.25, "learning_rate": 1.2488373638054744e-05, "loss": 0.539, "step": 203265 }, { "epoch": 2.25, "learning_rate": 1.2487450910916232e-05, "loss": 0.5725, "step": 203270 }, { "epoch": 2.25, "learning_rate": 1.248652818377772e-05, "loss": 0.5415, "step": 203275 }, { "epoch": 2.25, "learning_rate": 1.2485605456639207e-05, "loss": 0.6012, "step": 203280 }, { "epoch": 2.25, "learning_rate": 1.2484682729500695e-05, "loss": 0.5867, "step": 203285 }, { "epoch": 2.25, "learning_rate": 1.2483760002362183e-05, "loss": 0.6033, "step": 203290 }, { "epoch": 2.25, "learning_rate": 1.2482837275223671e-05, "loss": 0.5704, "step": 203295 }, { "epoch": 2.25, "learning_rate": 1.2481914548085159e-05, "loss": 0.5911, "step": 203300 }, { "epoch": 2.25, "learning_rate": 1.2480991820946645e-05, "loss": 0.5791, "step": 203305 }, { "epoch": 2.25, "learning_rate": 1.2480069093808133e-05, "loss": 0.5444, "step": 203310 }, { "epoch": 2.25, "learning_rate": 1.2479146366669619e-05, "loss": 0.585, "step": 203315 }, { "epoch": 2.25, "learning_rate": 1.2478223639531107e-05, "loss": 0.6397, "step": 203320 }, { "epoch": 2.25, "learning_rate": 1.2477300912392595e-05, "loss": 0.6107, "step": 203325 }, { "epoch": 2.25, "learning_rate": 1.2476378185254082e-05, "loss": 0.6369, "step": 203330 }, { "epoch": 2.25, "learning_rate": 1.247545545811557e-05, "loss": 0.5832, "step": 203335 }, { "epoch": 2.25, "learning_rate": 1.2474532730977058e-05, "loss": 0.6273, "step": 203340 }, { "epoch": 2.25, "learning_rate": 1.2473610003838546e-05, "loss": 0.6077, "step": 203345 }, { "epoch": 2.25, "learning_rate": 1.2472687276700034e-05, "loss": 0.6082, "step": 203350 }, { "epoch": 2.25, "learning_rate": 1.2471764549561522e-05, "loss": 0.5764, "step": 203355 }, { "epoch": 2.25, "learning_rate": 1.2470841822423008e-05, "loss": 0.6193, "step": 203360 }, { "epoch": 2.25, "learning_rate": 1.2469919095284496e-05, "loss": 0.5776, "step": 203365 }, { "epoch": 2.25, "learning_rate": 1.2468996368145984e-05, "loss": 0.5789, "step": 203370 }, { "epoch": 2.25, "learning_rate": 1.2468073641007471e-05, "loss": 0.617, "step": 203375 }, { "epoch": 2.25, "learning_rate": 1.2467150913868958e-05, "loss": 0.586, "step": 203380 }, { "epoch": 2.25, "learning_rate": 1.2466228186730445e-05, "loss": 0.5557, "step": 203385 }, { "epoch": 2.25, "learning_rate": 1.2465305459591933e-05, "loss": 0.5977, "step": 203390 }, { "epoch": 2.25, "learning_rate": 1.2464382732453421e-05, "loss": 0.6018, "step": 203395 }, { "epoch": 2.25, "learning_rate": 1.2463460005314909e-05, "loss": 0.6279, "step": 203400 }, { "epoch": 2.25, "learning_rate": 1.2462537278176397e-05, "loss": 0.5627, "step": 203405 }, { "epoch": 2.25, "learning_rate": 1.2461614551037885e-05, "loss": 0.5422, "step": 203410 }, { "epoch": 2.25, "learning_rate": 1.2460691823899372e-05, "loss": 0.6122, "step": 203415 }, { "epoch": 2.25, "learning_rate": 1.2459769096760859e-05, "loss": 0.6191, "step": 203420 }, { "epoch": 2.25, "learning_rate": 1.2458846369622346e-05, "loss": 0.6027, "step": 203425 }, { "epoch": 2.25, "learning_rate": 1.2457923642483834e-05, "loss": 0.6467, "step": 203430 }, { "epoch": 2.25, "learning_rate": 1.2457000915345322e-05, "loss": 0.5948, "step": 203435 }, { "epoch": 2.25, "learning_rate": 1.245607818820681e-05, "loss": 0.6256, "step": 203440 }, { "epoch": 2.25, "learning_rate": 1.2455155461068298e-05, "loss": 0.611, "step": 203445 }, { "epoch": 2.25, "learning_rate": 1.2454232733929786e-05, "loss": 0.591, "step": 203450 }, { "epoch": 2.25, "learning_rate": 1.2453310006791272e-05, "loss": 0.6103, "step": 203455 }, { "epoch": 2.25, "learning_rate": 1.245238727965276e-05, "loss": 0.624, "step": 203460 }, { "epoch": 2.25, "learning_rate": 1.2451464552514248e-05, "loss": 0.5862, "step": 203465 }, { "epoch": 2.25, "learning_rate": 1.2450541825375735e-05, "loss": 0.5805, "step": 203470 }, { "epoch": 2.25, "learning_rate": 1.2449619098237222e-05, "loss": 0.562, "step": 203475 }, { "epoch": 2.25, "learning_rate": 1.244869637109871e-05, "loss": 0.6396, "step": 203480 }, { "epoch": 2.25, "learning_rate": 1.2447773643960197e-05, "loss": 0.571, "step": 203485 }, { "epoch": 2.25, "learning_rate": 1.2446850916821685e-05, "loss": 0.5923, "step": 203490 }, { "epoch": 2.25, "learning_rate": 1.2445928189683173e-05, "loss": 0.6593, "step": 203495 }, { "epoch": 2.25, "learning_rate": 1.244500546254466e-05, "loss": 0.5618, "step": 203500 }, { "epoch": 2.25, "learning_rate": 1.2444082735406149e-05, "loss": 0.5857, "step": 203505 }, { "epoch": 2.25, "learning_rate": 1.2443160008267636e-05, "loss": 0.5438, "step": 203510 }, { "epoch": 2.25, "learning_rate": 1.2442237281129124e-05, "loss": 0.6321, "step": 203515 }, { "epoch": 2.25, "learning_rate": 1.2441314553990612e-05, "loss": 0.593, "step": 203520 }, { "epoch": 2.25, "learning_rate": 1.2440391826852098e-05, "loss": 0.6078, "step": 203525 }, { "epoch": 2.25, "learning_rate": 1.2439469099713586e-05, "loss": 0.577, "step": 203530 }, { "epoch": 2.25, "learning_rate": 1.2438546372575072e-05, "loss": 0.5794, "step": 203535 }, { "epoch": 2.25, "learning_rate": 1.243762364543656e-05, "loss": 0.5956, "step": 203540 }, { "epoch": 2.25, "learning_rate": 1.2436700918298048e-05, "loss": 0.5805, "step": 203545 }, { "epoch": 2.25, "learning_rate": 1.2435778191159536e-05, "loss": 0.5555, "step": 203550 }, { "epoch": 2.25, "learning_rate": 1.2434855464021024e-05, "loss": 0.5632, "step": 203555 }, { "epoch": 2.25, "learning_rate": 1.2433932736882512e-05, "loss": 0.5816, "step": 203560 }, { "epoch": 2.25, "learning_rate": 1.2433010009744e-05, "loss": 0.5948, "step": 203565 }, { "epoch": 2.25, "learning_rate": 1.2432087282605487e-05, "loss": 0.5942, "step": 203570 }, { "epoch": 2.25, "learning_rate": 1.2431164555466975e-05, "loss": 0.6365, "step": 203575 }, { "epoch": 2.25, "learning_rate": 1.2430241828328463e-05, "loss": 0.5725, "step": 203580 }, { "epoch": 2.25, "learning_rate": 1.2429319101189949e-05, "loss": 0.6038, "step": 203585 }, { "epoch": 2.25, "learning_rate": 1.2428396374051437e-05, "loss": 0.586, "step": 203590 }, { "epoch": 2.25, "learning_rate": 1.2427473646912925e-05, "loss": 0.5994, "step": 203595 }, { "epoch": 2.25, "learning_rate": 1.2426550919774413e-05, "loss": 0.5966, "step": 203600 }, { "epoch": 2.25, "learning_rate": 1.2425628192635899e-05, "loss": 0.6372, "step": 203605 }, { "epoch": 2.25, "learning_rate": 1.2424705465497387e-05, "loss": 0.5816, "step": 203610 }, { "epoch": 2.25, "learning_rate": 1.2423782738358875e-05, "loss": 0.6089, "step": 203615 }, { "epoch": 2.25, "learning_rate": 1.2422860011220362e-05, "loss": 0.5275, "step": 203620 }, { "epoch": 2.25, "learning_rate": 1.242193728408185e-05, "loss": 0.5878, "step": 203625 }, { "epoch": 2.25, "learning_rate": 1.2421014556943338e-05, "loss": 0.5843, "step": 203630 }, { "epoch": 2.25, "learning_rate": 1.2420091829804826e-05, "loss": 0.6431, "step": 203635 }, { "epoch": 2.25, "learning_rate": 1.2419169102666312e-05, "loss": 0.5591, "step": 203640 }, { "epoch": 2.25, "learning_rate": 1.24182463755278e-05, "loss": 0.5888, "step": 203645 }, { "epoch": 2.25, "learning_rate": 1.2417323648389288e-05, "loss": 0.5763, "step": 203650 }, { "epoch": 2.26, "learning_rate": 1.2416400921250776e-05, "loss": 0.6034, "step": 203655 }, { "epoch": 2.26, "learning_rate": 1.2415478194112263e-05, "loss": 0.6556, "step": 203660 }, { "epoch": 2.26, "learning_rate": 1.2414555466973751e-05, "loss": 0.6245, "step": 203665 }, { "epoch": 2.26, "learning_rate": 1.241363273983524e-05, "loss": 0.5723, "step": 203670 }, { "epoch": 2.26, "learning_rate": 1.2412710012696727e-05, "loss": 0.5357, "step": 203675 }, { "epoch": 2.26, "learning_rate": 1.2411787285558215e-05, "loss": 0.5611, "step": 203680 }, { "epoch": 2.26, "learning_rate": 1.2410864558419701e-05, "loss": 0.6494, "step": 203685 }, { "epoch": 2.26, "learning_rate": 1.2409941831281189e-05, "loss": 0.556, "step": 203690 }, { "epoch": 2.26, "learning_rate": 1.2409019104142677e-05, "loss": 0.6447, "step": 203695 }, { "epoch": 2.26, "learning_rate": 1.2408096377004163e-05, "loss": 0.5632, "step": 203700 }, { "epoch": 2.26, "learning_rate": 1.240717364986565e-05, "loss": 0.612, "step": 203705 }, { "epoch": 2.26, "learning_rate": 1.2406250922727139e-05, "loss": 0.5629, "step": 203710 }, { "epoch": 2.26, "learning_rate": 1.2405328195588626e-05, "loss": 0.575, "step": 203715 }, { "epoch": 2.26, "learning_rate": 1.2404405468450114e-05, "loss": 0.5716, "step": 203720 }, { "epoch": 2.26, "learning_rate": 1.2403482741311602e-05, "loss": 0.675, "step": 203725 }, { "epoch": 2.26, "learning_rate": 1.240256001417309e-05, "loss": 0.5568, "step": 203730 }, { "epoch": 2.26, "learning_rate": 1.2401637287034578e-05, "loss": 0.6032, "step": 203735 }, { "epoch": 2.26, "learning_rate": 1.2400714559896066e-05, "loss": 0.6314, "step": 203740 }, { "epoch": 2.26, "learning_rate": 1.2399791832757554e-05, "loss": 0.5831, "step": 203745 }, { "epoch": 2.26, "learning_rate": 1.239886910561904e-05, "loss": 0.6269, "step": 203750 }, { "epoch": 2.26, "learning_rate": 1.2397946378480528e-05, "loss": 0.5737, "step": 203755 }, { "epoch": 2.26, "learning_rate": 1.2397023651342014e-05, "loss": 0.5878, "step": 203760 }, { "epoch": 2.26, "learning_rate": 1.2396100924203502e-05, "loss": 0.5666, "step": 203765 }, { "epoch": 2.26, "learning_rate": 1.239517819706499e-05, "loss": 0.5977, "step": 203770 }, { "epoch": 2.26, "learning_rate": 1.2394255469926477e-05, "loss": 0.5575, "step": 203775 }, { "epoch": 2.26, "learning_rate": 1.2393332742787965e-05, "loss": 0.572, "step": 203780 }, { "epoch": 2.26, "learning_rate": 1.2392410015649453e-05, "loss": 0.635, "step": 203785 }, { "epoch": 2.26, "learning_rate": 1.239148728851094e-05, "loss": 0.6034, "step": 203790 }, { "epoch": 2.26, "learning_rate": 1.2390564561372429e-05, "loss": 0.5768, "step": 203795 }, { "epoch": 2.26, "learning_rate": 1.2389641834233916e-05, "loss": 0.607, "step": 203800 }, { "epoch": 2.26, "learning_rate": 1.2388719107095403e-05, "loss": 0.5545, "step": 203805 }, { "epoch": 2.26, "learning_rate": 1.238779637995689e-05, "loss": 0.5705, "step": 203810 }, { "epoch": 2.26, "learning_rate": 1.2386873652818378e-05, "loss": 0.6447, "step": 203815 }, { "epoch": 2.26, "learning_rate": 1.2385950925679866e-05, "loss": 0.6158, "step": 203820 }, { "epoch": 2.26, "learning_rate": 1.2385028198541354e-05, "loss": 0.6283, "step": 203825 }, { "epoch": 2.26, "learning_rate": 1.2384105471402842e-05, "loss": 0.5656, "step": 203830 }, { "epoch": 2.26, "learning_rate": 1.2383182744264328e-05, "loss": 0.6279, "step": 203835 }, { "epoch": 2.26, "learning_rate": 1.2382260017125816e-05, "loss": 0.5762, "step": 203840 }, { "epoch": 2.26, "learning_rate": 1.2381337289987304e-05, "loss": 0.6068, "step": 203845 }, { "epoch": 2.26, "learning_rate": 1.2380414562848792e-05, "loss": 0.6641, "step": 203850 }, { "epoch": 2.26, "learning_rate": 1.237949183571028e-05, "loss": 0.5974, "step": 203855 }, { "epoch": 2.26, "learning_rate": 1.2378569108571767e-05, "loss": 0.5487, "step": 203860 }, { "epoch": 2.26, "learning_rate": 1.2377646381433253e-05, "loss": 0.6281, "step": 203865 }, { "epoch": 2.26, "learning_rate": 1.2376723654294741e-05, "loss": 0.6023, "step": 203870 }, { "epoch": 2.26, "learning_rate": 1.2375800927156229e-05, "loss": 0.5759, "step": 203875 }, { "epoch": 2.26, "learning_rate": 1.2374878200017717e-05, "loss": 0.6099, "step": 203880 }, { "epoch": 2.26, "learning_rate": 1.2373955472879205e-05, "loss": 0.5889, "step": 203885 }, { "epoch": 2.26, "learning_rate": 1.2373032745740693e-05, "loss": 0.5803, "step": 203890 }, { "epoch": 2.26, "learning_rate": 1.237211001860218e-05, "loss": 0.5579, "step": 203895 }, { "epoch": 2.26, "learning_rate": 1.2371187291463668e-05, "loss": 0.6321, "step": 203900 }, { "epoch": 2.26, "learning_rate": 1.2370264564325156e-05, "loss": 0.559, "step": 203905 }, { "epoch": 2.26, "learning_rate": 1.2369341837186642e-05, "loss": 0.5795, "step": 203910 }, { "epoch": 2.26, "learning_rate": 1.236841911004813e-05, "loss": 0.6182, "step": 203915 }, { "epoch": 2.26, "learning_rate": 1.2367496382909616e-05, "loss": 0.5538, "step": 203920 }, { "epoch": 2.26, "learning_rate": 1.2366573655771104e-05, "loss": 0.6376, "step": 203925 }, { "epoch": 2.26, "learning_rate": 1.2365650928632592e-05, "loss": 0.5626, "step": 203930 }, { "epoch": 2.26, "learning_rate": 1.236472820149408e-05, "loss": 0.6333, "step": 203935 }, { "epoch": 2.26, "learning_rate": 1.2363805474355568e-05, "loss": 0.6248, "step": 203940 }, { "epoch": 2.26, "learning_rate": 1.2362882747217056e-05, "loss": 0.5829, "step": 203945 }, { "epoch": 2.26, "learning_rate": 1.2361960020078543e-05, "loss": 0.6341, "step": 203950 }, { "epoch": 2.26, "learning_rate": 1.2361037292940031e-05, "loss": 0.5833, "step": 203955 }, { "epoch": 2.26, "learning_rate": 1.236011456580152e-05, "loss": 0.5886, "step": 203960 }, { "epoch": 2.26, "learning_rate": 1.2359191838663007e-05, "loss": 0.5802, "step": 203965 }, { "epoch": 2.26, "learning_rate": 1.2358269111524493e-05, "loss": 0.5398, "step": 203970 }, { "epoch": 2.26, "learning_rate": 1.2357346384385981e-05, "loss": 0.5561, "step": 203975 }, { "epoch": 2.26, "learning_rate": 1.2356423657247469e-05, "loss": 0.6286, "step": 203980 }, { "epoch": 2.26, "learning_rate": 1.2355500930108955e-05, "loss": 0.5937, "step": 203985 }, { "epoch": 2.26, "learning_rate": 1.2354578202970443e-05, "loss": 0.6106, "step": 203990 }, { "epoch": 2.26, "learning_rate": 1.235365547583193e-05, "loss": 0.6247, "step": 203995 }, { "epoch": 2.26, "learning_rate": 1.2352732748693419e-05, "loss": 0.5746, "step": 204000 }, { "epoch": 2.26, "eval_loss": 0.5622279644012451, "eval_runtime": 69.3106, "eval_samples_per_second": 28.856, "eval_steps_per_second": 14.428, "step": 204000 }, { "epoch": 2.26, "learning_rate": 1.2351810021554906e-05, "loss": 0.6031, "step": 204005 }, { "epoch": 2.26, "learning_rate": 1.2350887294416394e-05, "loss": 0.6161, "step": 204010 }, { "epoch": 2.26, "learning_rate": 1.2349964567277882e-05, "loss": 0.6079, "step": 204015 }, { "epoch": 2.26, "learning_rate": 1.234904184013937e-05, "loss": 0.6162, "step": 204020 }, { "epoch": 2.26, "learning_rate": 1.2348119113000856e-05, "loss": 0.5336, "step": 204025 }, { "epoch": 2.26, "learning_rate": 1.2347196385862344e-05, "loss": 0.5569, "step": 204030 }, { "epoch": 2.26, "learning_rate": 1.2346273658723832e-05, "loss": 0.5308, "step": 204035 }, { "epoch": 2.26, "learning_rate": 1.234535093158532e-05, "loss": 0.5775, "step": 204040 }, { "epoch": 2.26, "learning_rate": 1.2344428204446808e-05, "loss": 0.6187, "step": 204045 }, { "epoch": 2.26, "learning_rate": 1.2343505477308295e-05, "loss": 0.6119, "step": 204050 }, { "epoch": 2.26, "learning_rate": 1.2342582750169783e-05, "loss": 0.6136, "step": 204055 }, { "epoch": 2.26, "learning_rate": 1.234166002303127e-05, "loss": 0.6255, "step": 204060 }, { "epoch": 2.26, "learning_rate": 1.2340737295892757e-05, "loss": 0.6267, "step": 204065 }, { "epoch": 2.26, "learning_rate": 1.2339814568754245e-05, "loss": 0.637, "step": 204070 }, { "epoch": 2.26, "learning_rate": 1.2338891841615733e-05, "loss": 0.6375, "step": 204075 }, { "epoch": 2.26, "learning_rate": 1.233796911447722e-05, "loss": 0.5326, "step": 204080 }, { "epoch": 2.26, "learning_rate": 1.2337046387338707e-05, "loss": 0.5572, "step": 204085 }, { "epoch": 2.26, "learning_rate": 1.2336123660200195e-05, "loss": 0.6283, "step": 204090 }, { "epoch": 2.26, "learning_rate": 1.2335200933061683e-05, "loss": 0.6312, "step": 204095 }, { "epoch": 2.26, "learning_rate": 1.233427820592317e-05, "loss": 0.6364, "step": 204100 }, { "epoch": 2.26, "learning_rate": 1.2333355478784658e-05, "loss": 0.6138, "step": 204105 }, { "epoch": 2.26, "learning_rate": 1.2332432751646146e-05, "loss": 0.6004, "step": 204110 }, { "epoch": 2.26, "learning_rate": 1.2331510024507634e-05, "loss": 0.5658, "step": 204115 }, { "epoch": 2.26, "learning_rate": 1.2330587297369122e-05, "loss": 0.6149, "step": 204120 }, { "epoch": 2.26, "learning_rate": 1.232966457023061e-05, "loss": 0.6241, "step": 204125 }, { "epoch": 2.26, "learning_rate": 1.2328741843092098e-05, "loss": 0.6362, "step": 204130 }, { "epoch": 2.26, "learning_rate": 1.2327819115953584e-05, "loss": 0.5653, "step": 204135 }, { "epoch": 2.26, "learning_rate": 1.232689638881507e-05, "loss": 0.5985, "step": 204140 }, { "epoch": 2.26, "learning_rate": 1.2325973661676558e-05, "loss": 0.582, "step": 204145 }, { "epoch": 2.26, "learning_rate": 1.2325050934538046e-05, "loss": 0.5966, "step": 204150 }, { "epoch": 2.26, "learning_rate": 1.2324128207399533e-05, "loss": 0.6321, "step": 204155 }, { "epoch": 2.26, "learning_rate": 1.2323205480261021e-05, "loss": 0.597, "step": 204160 }, { "epoch": 2.26, "learning_rate": 1.2322282753122509e-05, "loss": 0.6309, "step": 204165 }, { "epoch": 2.26, "learning_rate": 1.2321360025983997e-05, "loss": 0.617, "step": 204170 }, { "epoch": 2.26, "learning_rate": 1.2320437298845485e-05, "loss": 0.548, "step": 204175 }, { "epoch": 2.26, "learning_rate": 1.2319514571706973e-05, "loss": 0.5692, "step": 204180 }, { "epoch": 2.26, "learning_rate": 1.231859184456846e-05, "loss": 0.6233, "step": 204185 }, { "epoch": 2.26, "learning_rate": 1.2317669117429947e-05, "loss": 0.6172, "step": 204190 }, { "epoch": 2.26, "learning_rate": 1.2316746390291434e-05, "loss": 0.5881, "step": 204195 }, { "epoch": 2.26, "learning_rate": 1.2315823663152922e-05, "loss": 0.5833, "step": 204200 }, { "epoch": 2.26, "learning_rate": 1.231490093601441e-05, "loss": 0.546, "step": 204205 }, { "epoch": 2.26, "learning_rate": 1.2313978208875898e-05, "loss": 0.6322, "step": 204210 }, { "epoch": 2.26, "learning_rate": 1.2313055481737384e-05, "loss": 0.6012, "step": 204215 }, { "epoch": 2.26, "learning_rate": 1.2312132754598872e-05, "loss": 0.6008, "step": 204220 }, { "epoch": 2.26, "learning_rate": 1.231121002746036e-05, "loss": 0.6217, "step": 204225 }, { "epoch": 2.26, "learning_rate": 1.2310287300321848e-05, "loss": 0.5588, "step": 204230 }, { "epoch": 2.26, "learning_rate": 1.2309364573183336e-05, "loss": 0.5902, "step": 204235 }, { "epoch": 2.26, "learning_rate": 1.2308441846044823e-05, "loss": 0.5795, "step": 204240 }, { "epoch": 2.26, "learning_rate": 1.2307519118906311e-05, "loss": 0.5668, "step": 204245 }, { "epoch": 2.26, "learning_rate": 1.2306596391767797e-05, "loss": 0.6176, "step": 204250 }, { "epoch": 2.26, "learning_rate": 1.2305673664629285e-05, "loss": 0.5623, "step": 204255 }, { "epoch": 2.26, "learning_rate": 1.2304750937490773e-05, "loss": 0.6084, "step": 204260 }, { "epoch": 2.26, "learning_rate": 1.2303828210352261e-05, "loss": 0.5533, "step": 204265 }, { "epoch": 2.26, "learning_rate": 1.2302905483213749e-05, "loss": 0.6691, "step": 204270 }, { "epoch": 2.26, "learning_rate": 1.2301982756075237e-05, "loss": 0.614, "step": 204275 }, { "epoch": 2.26, "learning_rate": 1.2301060028936725e-05, "loss": 0.6534, "step": 204280 }, { "epoch": 2.26, "learning_rate": 1.2300137301798212e-05, "loss": 0.5577, "step": 204285 }, { "epoch": 2.26, "learning_rate": 1.2299214574659699e-05, "loss": 0.6217, "step": 204290 }, { "epoch": 2.26, "learning_rate": 1.2298291847521186e-05, "loss": 0.5753, "step": 204295 }, { "epoch": 2.26, "learning_rate": 1.2297369120382674e-05, "loss": 0.601, "step": 204300 }, { "epoch": 2.26, "learning_rate": 1.229644639324416e-05, "loss": 0.5725, "step": 204305 }, { "epoch": 2.26, "learning_rate": 1.2295523666105648e-05, "loss": 0.5879, "step": 204310 }, { "epoch": 2.26, "learning_rate": 1.2294600938967136e-05, "loss": 0.5974, "step": 204315 }, { "epoch": 2.26, "learning_rate": 1.2293678211828624e-05, "loss": 0.5482, "step": 204320 }, { "epoch": 2.26, "learning_rate": 1.2292755484690112e-05, "loss": 0.6396, "step": 204325 }, { "epoch": 2.26, "learning_rate": 1.22918327575516e-05, "loss": 0.5712, "step": 204330 }, { "epoch": 2.26, "learning_rate": 1.2290910030413087e-05, "loss": 0.5048, "step": 204335 }, { "epoch": 2.26, "learning_rate": 1.2289987303274575e-05, "loss": 0.571, "step": 204340 }, { "epoch": 2.26, "learning_rate": 1.2289064576136063e-05, "loss": 0.5736, "step": 204345 }, { "epoch": 2.26, "learning_rate": 1.2288141848997551e-05, "loss": 0.5917, "step": 204350 }, { "epoch": 2.26, "learning_rate": 1.2287219121859037e-05, "loss": 0.5628, "step": 204355 }, { "epoch": 2.26, "learning_rate": 1.2286296394720525e-05, "loss": 0.5733, "step": 204360 }, { "epoch": 2.26, "learning_rate": 1.2285373667582011e-05, "loss": 0.5637, "step": 204365 }, { "epoch": 2.26, "learning_rate": 1.2284450940443499e-05, "loss": 0.5743, "step": 204370 }, { "epoch": 2.26, "learning_rate": 1.2283528213304987e-05, "loss": 0.5691, "step": 204375 }, { "epoch": 2.26, "learning_rate": 1.2282605486166475e-05, "loss": 0.5836, "step": 204380 }, { "epoch": 2.26, "learning_rate": 1.2281682759027963e-05, "loss": 0.5396, "step": 204385 }, { "epoch": 2.26, "learning_rate": 1.228076003188945e-05, "loss": 0.6177, "step": 204390 }, { "epoch": 2.26, "learning_rate": 1.2279837304750938e-05, "loss": 0.606, "step": 204395 }, { "epoch": 2.26, "learning_rate": 1.2278914577612426e-05, "loss": 0.6529, "step": 204400 }, { "epoch": 2.26, "learning_rate": 1.2277991850473914e-05, "loss": 0.6186, "step": 204405 }, { "epoch": 2.26, "learning_rate": 1.2277069123335402e-05, "loss": 0.651, "step": 204410 }, { "epoch": 2.26, "learning_rate": 1.2276146396196888e-05, "loss": 0.6015, "step": 204415 }, { "epoch": 2.26, "learning_rate": 1.2275223669058376e-05, "loss": 0.5279, "step": 204420 }, { "epoch": 2.26, "learning_rate": 1.2274300941919864e-05, "loss": 0.6214, "step": 204425 }, { "epoch": 2.26, "learning_rate": 1.2273378214781352e-05, "loss": 0.5715, "step": 204430 }, { "epoch": 2.26, "learning_rate": 1.227245548764284e-05, "loss": 0.5737, "step": 204435 }, { "epoch": 2.26, "learning_rate": 1.2271532760504326e-05, "loss": 0.5953, "step": 204440 }, { "epoch": 2.26, "learning_rate": 1.2270610033365813e-05, "loss": 0.608, "step": 204445 }, { "epoch": 2.26, "learning_rate": 1.2269687306227301e-05, "loss": 0.6649, "step": 204450 }, { "epoch": 2.26, "learning_rate": 1.2268764579088789e-05, "loss": 0.6591, "step": 204455 }, { "epoch": 2.26, "learning_rate": 1.2267841851950277e-05, "loss": 0.6459, "step": 204460 }, { "epoch": 2.26, "learning_rate": 1.2266919124811765e-05, "loss": 0.6619, "step": 204465 }, { "epoch": 2.26, "learning_rate": 1.2265996397673251e-05, "loss": 0.5864, "step": 204470 }, { "epoch": 2.26, "learning_rate": 1.2265073670534739e-05, "loss": 0.5934, "step": 204475 }, { "epoch": 2.26, "learning_rate": 1.2264150943396227e-05, "loss": 0.6436, "step": 204480 }, { "epoch": 2.26, "learning_rate": 1.2263228216257714e-05, "loss": 0.594, "step": 204485 }, { "epoch": 2.26, "learning_rate": 1.2262305489119202e-05, "loss": 0.6166, "step": 204490 }, { "epoch": 2.26, "learning_rate": 1.226138276198069e-05, "loss": 0.598, "step": 204495 }, { "epoch": 2.26, "learning_rate": 1.2260460034842178e-05, "loss": 0.6344, "step": 204500 }, { "epoch": 2.26, "learning_rate": 1.2259537307703666e-05, "loss": 0.6194, "step": 204505 }, { "epoch": 2.26, "learning_rate": 1.2258614580565154e-05, "loss": 0.6393, "step": 204510 }, { "epoch": 2.26, "learning_rate": 1.2257691853426642e-05, "loss": 0.5494, "step": 204515 }, { "epoch": 2.26, "learning_rate": 1.2256769126288128e-05, "loss": 0.6084, "step": 204520 }, { "epoch": 2.26, "learning_rate": 1.2255846399149616e-05, "loss": 0.5347, "step": 204525 }, { "epoch": 2.26, "learning_rate": 1.2254923672011102e-05, "loss": 0.5795, "step": 204530 }, { "epoch": 2.26, "learning_rate": 1.225400094487259e-05, "loss": 0.5713, "step": 204535 }, { "epoch": 2.26, "learning_rate": 1.2253078217734077e-05, "loss": 0.6079, "step": 204540 }, { "epoch": 2.26, "learning_rate": 1.2252155490595565e-05, "loss": 0.5784, "step": 204545 }, { "epoch": 2.26, "learning_rate": 1.2251232763457053e-05, "loss": 0.5673, "step": 204550 }, { "epoch": 2.26, "learning_rate": 1.2250310036318541e-05, "loss": 0.5939, "step": 204555 }, { "epoch": 2.27, "learning_rate": 1.2249387309180029e-05, "loss": 0.6055, "step": 204560 }, { "epoch": 2.27, "learning_rate": 1.2248464582041517e-05, "loss": 0.6128, "step": 204565 }, { "epoch": 2.27, "learning_rate": 1.2247541854903005e-05, "loss": 0.6073, "step": 204570 }, { "epoch": 2.27, "learning_rate": 1.224661912776449e-05, "loss": 0.5711, "step": 204575 }, { "epoch": 2.27, "learning_rate": 1.2245696400625979e-05, "loss": 0.5587, "step": 204580 }, { "epoch": 2.27, "learning_rate": 1.2244773673487466e-05, "loss": 0.5923, "step": 204585 }, { "epoch": 2.27, "learning_rate": 1.2243850946348954e-05, "loss": 0.6187, "step": 204590 }, { "epoch": 2.27, "learning_rate": 1.224292821921044e-05, "loss": 0.5805, "step": 204595 }, { "epoch": 2.27, "learning_rate": 1.2242005492071928e-05, "loss": 0.5984, "step": 204600 }, { "epoch": 2.27, "learning_rate": 1.2241082764933416e-05, "loss": 0.6316, "step": 204605 }, { "epoch": 2.27, "learning_rate": 1.2240160037794904e-05, "loss": 0.5552, "step": 204610 }, { "epoch": 2.27, "learning_rate": 1.2239237310656392e-05, "loss": 0.5992, "step": 204615 }, { "epoch": 2.27, "learning_rate": 1.223831458351788e-05, "loss": 0.5894, "step": 204620 }, { "epoch": 2.27, "learning_rate": 1.2237391856379367e-05, "loss": 0.576, "step": 204625 }, { "epoch": 2.27, "learning_rate": 1.2236469129240855e-05, "loss": 0.5519, "step": 204630 }, { "epoch": 2.27, "learning_rate": 1.2235546402102341e-05, "loss": 0.6319, "step": 204635 }, { "epoch": 2.27, "learning_rate": 1.223462367496383e-05, "loss": 0.6184, "step": 204640 }, { "epoch": 2.27, "learning_rate": 1.2233700947825317e-05, "loss": 0.6144, "step": 204645 }, { "epoch": 2.27, "learning_rate": 1.2232778220686805e-05, "loss": 0.6692, "step": 204650 }, { "epoch": 2.27, "learning_rate": 1.2231855493548293e-05, "loss": 0.6069, "step": 204655 }, { "epoch": 2.27, "learning_rate": 1.223093276640978e-05, "loss": 0.5702, "step": 204660 }, { "epoch": 2.27, "learning_rate": 1.2230010039271269e-05, "loss": 0.5667, "step": 204665 }, { "epoch": 2.27, "learning_rate": 1.2229087312132755e-05, "loss": 0.5892, "step": 204670 }, { "epoch": 2.27, "learning_rate": 1.2228164584994243e-05, "loss": 0.5811, "step": 204675 }, { "epoch": 2.27, "learning_rate": 1.222724185785573e-05, "loss": 0.623, "step": 204680 }, { "epoch": 2.27, "learning_rate": 1.2226319130717218e-05, "loss": 0.6087, "step": 204685 }, { "epoch": 2.27, "learning_rate": 1.2225396403578704e-05, "loss": 0.5741, "step": 204690 }, { "epoch": 2.27, "learning_rate": 1.2224473676440192e-05, "loss": 0.6231, "step": 204695 }, { "epoch": 2.27, "learning_rate": 1.222355094930168e-05, "loss": 0.5778, "step": 204700 }, { "epoch": 2.27, "learning_rate": 1.2222628222163168e-05, "loss": 0.5773, "step": 204705 }, { "epoch": 2.27, "learning_rate": 1.2221705495024656e-05, "loss": 0.6517, "step": 204710 }, { "epoch": 2.27, "learning_rate": 1.2220782767886144e-05, "loss": 0.5874, "step": 204715 }, { "epoch": 2.27, "learning_rate": 1.2219860040747631e-05, "loss": 0.564, "step": 204720 }, { "epoch": 2.27, "learning_rate": 1.221893731360912e-05, "loss": 0.5513, "step": 204725 }, { "epoch": 2.27, "learning_rate": 1.2218014586470607e-05, "loss": 0.5669, "step": 204730 }, { "epoch": 2.27, "learning_rate": 1.2217091859332095e-05, "loss": 0.5879, "step": 204735 }, { "epoch": 2.27, "learning_rate": 1.2216169132193581e-05, "loss": 0.5949, "step": 204740 }, { "epoch": 2.27, "learning_rate": 1.2215246405055069e-05, "loss": 0.5532, "step": 204745 }, { "epoch": 2.27, "learning_rate": 1.2214323677916555e-05, "loss": 0.5744, "step": 204750 }, { "epoch": 2.27, "learning_rate": 1.2213400950778043e-05, "loss": 0.6719, "step": 204755 }, { "epoch": 2.27, "learning_rate": 1.2212478223639531e-05, "loss": 0.5915, "step": 204760 }, { "epoch": 2.27, "learning_rate": 1.2211555496501019e-05, "loss": 0.6364, "step": 204765 }, { "epoch": 2.27, "learning_rate": 1.2210632769362507e-05, "loss": 0.647, "step": 204770 }, { "epoch": 2.27, "learning_rate": 1.2209710042223994e-05, "loss": 0.6338, "step": 204775 }, { "epoch": 2.27, "learning_rate": 1.2208787315085482e-05, "loss": 0.5989, "step": 204780 }, { "epoch": 2.27, "learning_rate": 1.220786458794697e-05, "loss": 0.6319, "step": 204785 }, { "epoch": 2.27, "learning_rate": 1.2206941860808458e-05, "loss": 0.6, "step": 204790 }, { "epoch": 2.27, "learning_rate": 1.2206019133669946e-05, "loss": 0.6116, "step": 204795 }, { "epoch": 2.27, "learning_rate": 1.2205096406531432e-05, "loss": 0.5689, "step": 204800 }, { "epoch": 2.27, "learning_rate": 1.220417367939292e-05, "loss": 0.5683, "step": 204805 }, { "epoch": 2.27, "learning_rate": 1.2203250952254408e-05, "loss": 0.6348, "step": 204810 }, { "epoch": 2.27, "learning_rate": 1.2202328225115896e-05, "loss": 0.5652, "step": 204815 }, { "epoch": 2.27, "learning_rate": 1.2201405497977382e-05, "loss": 0.5926, "step": 204820 }, { "epoch": 2.27, "learning_rate": 1.220048277083887e-05, "loss": 0.6371, "step": 204825 }, { "epoch": 2.27, "learning_rate": 1.2199560043700357e-05, "loss": 0.6012, "step": 204830 }, { "epoch": 2.27, "learning_rate": 1.2198637316561845e-05, "loss": 0.5617, "step": 204835 }, { "epoch": 2.27, "learning_rate": 1.2197714589423333e-05, "loss": 0.6469, "step": 204840 }, { "epoch": 2.27, "learning_rate": 1.2196791862284821e-05, "loss": 0.5731, "step": 204845 }, { "epoch": 2.27, "learning_rate": 1.2195869135146309e-05, "loss": 0.5111, "step": 204850 }, { "epoch": 2.27, "learning_rate": 1.2194946408007795e-05, "loss": 0.5996, "step": 204855 }, { "epoch": 2.27, "learning_rate": 1.2194023680869283e-05, "loss": 0.6158, "step": 204860 }, { "epoch": 2.27, "learning_rate": 1.219310095373077e-05, "loss": 0.6233, "step": 204865 }, { "epoch": 2.27, "learning_rate": 1.2192178226592258e-05, "loss": 0.5735, "step": 204870 }, { "epoch": 2.27, "learning_rate": 1.2191255499453746e-05, "loss": 0.5945, "step": 204875 }, { "epoch": 2.27, "learning_rate": 1.2190332772315234e-05, "loss": 0.588, "step": 204880 }, { "epoch": 2.27, "learning_rate": 1.2189410045176722e-05, "loss": 0.6108, "step": 204885 }, { "epoch": 2.27, "learning_rate": 1.218848731803821e-05, "loss": 0.5587, "step": 204890 }, { "epoch": 2.27, "learning_rate": 1.2187564590899696e-05, "loss": 0.6274, "step": 204895 }, { "epoch": 2.27, "learning_rate": 1.2186641863761184e-05, "loss": 0.6031, "step": 204900 }, { "epoch": 2.27, "learning_rate": 1.2185719136622672e-05, "loss": 0.617, "step": 204905 }, { "epoch": 2.27, "learning_rate": 1.218479640948416e-05, "loss": 0.5578, "step": 204910 }, { "epoch": 2.27, "learning_rate": 1.2183873682345646e-05, "loss": 0.6023, "step": 204915 }, { "epoch": 2.27, "learning_rate": 1.2182950955207134e-05, "loss": 0.6026, "step": 204920 }, { "epoch": 2.27, "learning_rate": 1.2182028228068621e-05, "loss": 0.6071, "step": 204925 }, { "epoch": 2.27, "learning_rate": 1.218110550093011e-05, "loss": 0.5794, "step": 204930 }, { "epoch": 2.27, "learning_rate": 1.2180182773791597e-05, "loss": 0.5969, "step": 204935 }, { "epoch": 2.27, "learning_rate": 1.2179260046653085e-05, "loss": 0.6314, "step": 204940 }, { "epoch": 2.27, "learning_rate": 1.2178337319514573e-05, "loss": 0.6264, "step": 204945 }, { "epoch": 2.27, "learning_rate": 1.217741459237606e-05, "loss": 0.5709, "step": 204950 }, { "epoch": 2.27, "learning_rate": 1.2176491865237549e-05, "loss": 0.5588, "step": 204955 }, { "epoch": 2.27, "learning_rate": 1.2175569138099036e-05, "loss": 0.56, "step": 204960 }, { "epoch": 2.27, "learning_rate": 1.2174646410960523e-05, "loss": 0.609, "step": 204965 }, { "epoch": 2.27, "learning_rate": 1.217372368382201e-05, "loss": 0.6063, "step": 204970 }, { "epoch": 2.27, "learning_rate": 1.2172800956683497e-05, "loss": 0.5969, "step": 204975 }, { "epoch": 2.27, "learning_rate": 1.2171878229544984e-05, "loss": 0.6186, "step": 204980 }, { "epoch": 2.27, "learning_rate": 1.2170955502406472e-05, "loss": 0.5536, "step": 204985 }, { "epoch": 2.27, "learning_rate": 1.217003277526796e-05, "loss": 0.5935, "step": 204990 }, { "epoch": 2.27, "learning_rate": 1.2169110048129448e-05, "loss": 0.6024, "step": 204995 }, { "epoch": 2.27, "learning_rate": 1.2168187320990936e-05, "loss": 0.6269, "step": 205000 }, { "epoch": 2.27, "eval_loss": 0.5804030895233154, "eval_runtime": 69.2621, "eval_samples_per_second": 28.876, "eval_steps_per_second": 14.438, "step": 205000 }, { "epoch": 2.27, "learning_rate": 1.2167264593852424e-05, "loss": 0.6044, "step": 205005 }, { "epoch": 2.27, "learning_rate": 1.2166341866713911e-05, "loss": 0.6273, "step": 205010 }, { "epoch": 2.27, "learning_rate": 1.21654191395754e-05, "loss": 0.6073, "step": 205015 }, { "epoch": 2.27, "learning_rate": 1.2164496412436885e-05, "loss": 0.6124, "step": 205020 }, { "epoch": 2.27, "learning_rate": 1.2163573685298373e-05, "loss": 0.5687, "step": 205025 }, { "epoch": 2.27, "learning_rate": 1.2162650958159861e-05, "loss": 0.5926, "step": 205030 }, { "epoch": 2.27, "learning_rate": 1.2161728231021349e-05, "loss": 0.6456, "step": 205035 }, { "epoch": 2.27, "learning_rate": 1.2160805503882837e-05, "loss": 0.5965, "step": 205040 }, { "epoch": 2.27, "learning_rate": 1.2159882776744325e-05, "loss": 0.5752, "step": 205045 }, { "epoch": 2.27, "learning_rate": 1.2158960049605811e-05, "loss": 0.5853, "step": 205050 }, { "epoch": 2.27, "learning_rate": 1.2158037322467299e-05, "loss": 0.593, "step": 205055 }, { "epoch": 2.27, "learning_rate": 1.2157114595328787e-05, "loss": 0.6018, "step": 205060 }, { "epoch": 2.27, "learning_rate": 1.2156191868190274e-05, "loss": 0.6065, "step": 205065 }, { "epoch": 2.27, "learning_rate": 1.2155269141051762e-05, "loss": 0.5927, "step": 205070 }, { "epoch": 2.27, "learning_rate": 1.215434641391325e-05, "loss": 0.5943, "step": 205075 }, { "epoch": 2.27, "learning_rate": 1.2153423686774736e-05, "loss": 0.6112, "step": 205080 }, { "epoch": 2.27, "learning_rate": 1.2152500959636224e-05, "loss": 0.641, "step": 205085 }, { "epoch": 2.27, "learning_rate": 1.2151578232497712e-05, "loss": 0.5648, "step": 205090 }, { "epoch": 2.27, "learning_rate": 1.21506555053592e-05, "loss": 0.618, "step": 205095 }, { "epoch": 2.27, "learning_rate": 1.2149732778220688e-05, "loss": 0.6342, "step": 205100 }, { "epoch": 2.27, "learning_rate": 1.2148810051082176e-05, "loss": 0.6133, "step": 205105 }, { "epoch": 2.27, "learning_rate": 1.2147887323943663e-05, "loss": 0.5574, "step": 205110 }, { "epoch": 2.27, "learning_rate": 1.2146964596805151e-05, "loss": 0.5371, "step": 205115 }, { "epoch": 2.27, "learning_rate": 1.2146041869666639e-05, "loss": 0.5655, "step": 205120 }, { "epoch": 2.27, "learning_rate": 1.2145119142528125e-05, "loss": 0.6307, "step": 205125 }, { "epoch": 2.27, "learning_rate": 1.2144196415389613e-05, "loss": 0.6585, "step": 205130 }, { "epoch": 2.27, "learning_rate": 1.21432736882511e-05, "loss": 0.5752, "step": 205135 }, { "epoch": 2.27, "learning_rate": 1.2142350961112587e-05, "loss": 0.602, "step": 205140 }, { "epoch": 2.27, "learning_rate": 1.2141428233974075e-05, "loss": 0.5694, "step": 205145 }, { "epoch": 2.27, "learning_rate": 1.2140505506835563e-05, "loss": 0.5972, "step": 205150 }, { "epoch": 2.27, "learning_rate": 1.213958277969705e-05, "loss": 0.5932, "step": 205155 }, { "epoch": 2.27, "learning_rate": 1.2138660052558538e-05, "loss": 0.5878, "step": 205160 }, { "epoch": 2.27, "learning_rate": 1.2137737325420026e-05, "loss": 0.5722, "step": 205165 }, { "epoch": 2.27, "learning_rate": 1.2136814598281514e-05, "loss": 0.5736, "step": 205170 }, { "epoch": 2.27, "learning_rate": 1.2135891871143002e-05, "loss": 0.58, "step": 205175 }, { "epoch": 2.27, "learning_rate": 1.213496914400449e-05, "loss": 0.605, "step": 205180 }, { "epoch": 2.27, "learning_rate": 1.2134046416865976e-05, "loss": 0.5707, "step": 205185 }, { "epoch": 2.27, "learning_rate": 1.2133123689727464e-05, "loss": 0.5514, "step": 205190 }, { "epoch": 2.27, "learning_rate": 1.2132200962588952e-05, "loss": 0.6761, "step": 205195 }, { "epoch": 2.27, "learning_rate": 1.2131278235450438e-05, "loss": 0.5857, "step": 205200 }, { "epoch": 2.27, "learning_rate": 1.2130355508311926e-05, "loss": 0.5762, "step": 205205 }, { "epoch": 2.27, "learning_rate": 1.2129432781173414e-05, "loss": 0.5455, "step": 205210 }, { "epoch": 2.27, "learning_rate": 1.2128510054034901e-05, "loss": 0.6176, "step": 205215 }, { "epoch": 2.27, "learning_rate": 1.212758732689639e-05, "loss": 0.6011, "step": 205220 }, { "epoch": 2.27, "learning_rate": 1.2126664599757877e-05, "loss": 0.5864, "step": 205225 }, { "epoch": 2.27, "learning_rate": 1.2125741872619365e-05, "loss": 0.6233, "step": 205230 }, { "epoch": 2.27, "learning_rate": 1.2124819145480853e-05, "loss": 0.5883, "step": 205235 }, { "epoch": 2.27, "learning_rate": 1.2123896418342339e-05, "loss": 0.5847, "step": 205240 }, { "epoch": 2.27, "learning_rate": 1.2122973691203827e-05, "loss": 0.6203, "step": 205245 }, { "epoch": 2.27, "learning_rate": 1.2122050964065315e-05, "loss": 0.6167, "step": 205250 }, { "epoch": 2.27, "learning_rate": 1.2121128236926803e-05, "loss": 0.6041, "step": 205255 }, { "epoch": 2.27, "learning_rate": 1.212020550978829e-05, "loss": 0.5319, "step": 205260 }, { "epoch": 2.27, "learning_rate": 1.2119282782649778e-05, "loss": 0.6116, "step": 205265 }, { "epoch": 2.27, "learning_rate": 1.2118360055511266e-05, "loss": 0.6649, "step": 205270 }, { "epoch": 2.27, "learning_rate": 1.2117437328372752e-05, "loss": 0.5632, "step": 205275 }, { "epoch": 2.27, "learning_rate": 1.211651460123424e-05, "loss": 0.5911, "step": 205280 }, { "epoch": 2.27, "learning_rate": 1.2115591874095728e-05, "loss": 0.5977, "step": 205285 }, { "epoch": 2.27, "learning_rate": 1.2114669146957216e-05, "loss": 0.5706, "step": 205290 }, { "epoch": 2.27, "learning_rate": 1.2113746419818704e-05, "loss": 0.6045, "step": 205295 }, { "epoch": 2.27, "learning_rate": 1.211282369268019e-05, "loss": 0.6035, "step": 205300 }, { "epoch": 2.27, "learning_rate": 1.2111900965541678e-05, "loss": 0.6228, "step": 205305 }, { "epoch": 2.27, "learning_rate": 1.2110978238403165e-05, "loss": 0.6143, "step": 205310 }, { "epoch": 2.27, "learning_rate": 1.2110055511264653e-05, "loss": 0.5748, "step": 205315 }, { "epoch": 2.27, "learning_rate": 1.2109132784126141e-05, "loss": 0.6308, "step": 205320 }, { "epoch": 2.27, "learning_rate": 1.2108210056987629e-05, "loss": 0.6172, "step": 205325 }, { "epoch": 2.27, "learning_rate": 1.2107287329849117e-05, "loss": 0.6168, "step": 205330 }, { "epoch": 2.27, "learning_rate": 1.2106364602710605e-05, "loss": 0.623, "step": 205335 }, { "epoch": 2.27, "learning_rate": 1.2105441875572093e-05, "loss": 0.6315, "step": 205340 }, { "epoch": 2.27, "learning_rate": 1.210451914843358e-05, "loss": 0.6306, "step": 205345 }, { "epoch": 2.27, "learning_rate": 1.2103596421295067e-05, "loss": 0.597, "step": 205350 }, { "epoch": 2.27, "learning_rate": 1.2102673694156554e-05, "loss": 0.5758, "step": 205355 }, { "epoch": 2.27, "learning_rate": 1.210175096701804e-05, "loss": 0.6092, "step": 205360 }, { "epoch": 2.27, "learning_rate": 1.2100828239879528e-05, "loss": 0.5707, "step": 205365 }, { "epoch": 2.27, "learning_rate": 1.2099905512741016e-05, "loss": 0.587, "step": 205370 }, { "epoch": 2.27, "learning_rate": 1.2098982785602504e-05, "loss": 0.5519, "step": 205375 }, { "epoch": 2.27, "learning_rate": 1.2098060058463992e-05, "loss": 0.5984, "step": 205380 }, { "epoch": 2.27, "learning_rate": 1.209713733132548e-05, "loss": 0.6093, "step": 205385 }, { "epoch": 2.27, "learning_rate": 1.2096214604186968e-05, "loss": 0.631, "step": 205390 }, { "epoch": 2.27, "learning_rate": 1.2095291877048455e-05, "loss": 0.617, "step": 205395 }, { "epoch": 2.27, "learning_rate": 1.2094369149909943e-05, "loss": 0.5808, "step": 205400 }, { "epoch": 2.27, "learning_rate": 1.209344642277143e-05, "loss": 0.5789, "step": 205405 }, { "epoch": 2.27, "learning_rate": 1.2092523695632917e-05, "loss": 0.5241, "step": 205410 }, { "epoch": 2.27, "learning_rate": 1.2091600968494405e-05, "loss": 0.5909, "step": 205415 }, { "epoch": 2.27, "learning_rate": 1.2090678241355893e-05, "loss": 0.5967, "step": 205420 }, { "epoch": 2.27, "learning_rate": 1.2089755514217381e-05, "loss": 0.628, "step": 205425 }, { "epoch": 2.27, "learning_rate": 1.2088832787078867e-05, "loss": 0.6359, "step": 205430 }, { "epoch": 2.27, "learning_rate": 1.2087910059940355e-05, "loss": 0.5797, "step": 205435 }, { "epoch": 2.27, "learning_rate": 1.2086987332801843e-05, "loss": 0.5746, "step": 205440 }, { "epoch": 2.27, "learning_rate": 1.208606460566333e-05, "loss": 0.6331, "step": 205445 }, { "epoch": 2.27, "learning_rate": 1.2085141878524818e-05, "loss": 0.5726, "step": 205450 }, { "epoch": 2.27, "learning_rate": 1.2084219151386306e-05, "loss": 0.6241, "step": 205455 }, { "epoch": 2.28, "learning_rate": 1.2083296424247794e-05, "loss": 0.5807, "step": 205460 }, { "epoch": 2.28, "learning_rate": 1.208237369710928e-05, "loss": 0.6065, "step": 205465 }, { "epoch": 2.28, "learning_rate": 1.2081450969970768e-05, "loss": 0.6205, "step": 205470 }, { "epoch": 2.28, "learning_rate": 1.2080528242832256e-05, "loss": 0.5982, "step": 205475 }, { "epoch": 2.28, "learning_rate": 1.2079605515693744e-05, "loss": 0.5622, "step": 205480 }, { "epoch": 2.28, "learning_rate": 1.2078682788555232e-05, "loss": 0.6017, "step": 205485 }, { "epoch": 2.28, "learning_rate": 1.207776006141672e-05, "loss": 0.5839, "step": 205490 }, { "epoch": 2.28, "learning_rate": 1.2076837334278207e-05, "loss": 0.639, "step": 205495 }, { "epoch": 2.28, "learning_rate": 1.2075914607139695e-05, "loss": 0.6538, "step": 205500 }, { "epoch": 2.28, "learning_rate": 1.2074991880001181e-05, "loss": 0.5704, "step": 205505 }, { "epoch": 2.28, "learning_rate": 1.207406915286267e-05, "loss": 0.5712, "step": 205510 }, { "epoch": 2.28, "learning_rate": 1.2073146425724157e-05, "loss": 0.5891, "step": 205515 }, { "epoch": 2.28, "learning_rate": 1.2072223698585643e-05, "loss": 0.5807, "step": 205520 }, { "epoch": 2.28, "learning_rate": 1.2071300971447131e-05, "loss": 0.5867, "step": 205525 }, { "epoch": 2.28, "learning_rate": 1.2070378244308619e-05, "loss": 0.5986, "step": 205530 }, { "epoch": 2.28, "learning_rate": 1.2069455517170107e-05, "loss": 0.5878, "step": 205535 }, { "epoch": 2.28, "learning_rate": 1.2068532790031595e-05, "loss": 0.6191, "step": 205540 }, { "epoch": 2.28, "learning_rate": 1.2067610062893082e-05, "loss": 0.5453, "step": 205545 }, { "epoch": 2.28, "learning_rate": 1.206668733575457e-05, "loss": 0.5775, "step": 205550 }, { "epoch": 2.28, "learning_rate": 1.2065764608616058e-05, "loss": 0.6095, "step": 205555 }, { "epoch": 2.28, "learning_rate": 1.2064841881477546e-05, "loss": 0.5749, "step": 205560 }, { "epoch": 2.28, "learning_rate": 1.2063919154339034e-05, "loss": 0.6137, "step": 205565 }, { "epoch": 2.28, "learning_rate": 1.206299642720052e-05, "loss": 0.5954, "step": 205570 }, { "epoch": 2.28, "learning_rate": 1.2062073700062008e-05, "loss": 0.5728, "step": 205575 }, { "epoch": 2.28, "learning_rate": 1.2061150972923494e-05, "loss": 0.592, "step": 205580 }, { "epoch": 2.28, "learning_rate": 1.2060228245784982e-05, "loss": 0.5872, "step": 205585 }, { "epoch": 2.28, "learning_rate": 1.205930551864647e-05, "loss": 0.5923, "step": 205590 }, { "epoch": 2.28, "learning_rate": 1.2058382791507958e-05, "loss": 0.6038, "step": 205595 }, { "epoch": 2.28, "learning_rate": 1.2057460064369445e-05, "loss": 0.5814, "step": 205600 }, { "epoch": 2.28, "learning_rate": 1.2056537337230933e-05, "loss": 0.6093, "step": 205605 }, { "epoch": 2.28, "learning_rate": 1.2055614610092421e-05, "loss": 0.635, "step": 205610 }, { "epoch": 2.28, "learning_rate": 1.2054691882953909e-05, "loss": 0.6196, "step": 205615 }, { "epoch": 2.28, "learning_rate": 1.2053769155815397e-05, "loss": 0.5894, "step": 205620 }, { "epoch": 2.28, "learning_rate": 1.2052846428676885e-05, "loss": 0.599, "step": 205625 }, { "epoch": 2.28, "learning_rate": 1.205192370153837e-05, "loss": 0.5478, "step": 205630 }, { "epoch": 2.28, "learning_rate": 1.2051000974399859e-05, "loss": 0.5571, "step": 205635 }, { "epoch": 2.28, "learning_rate": 1.2050078247261347e-05, "loss": 0.6322, "step": 205640 }, { "epoch": 2.28, "learning_rate": 1.2049155520122834e-05, "loss": 0.585, "step": 205645 }, { "epoch": 2.28, "learning_rate": 1.2048232792984322e-05, "loss": 0.5744, "step": 205650 }, { "epoch": 2.28, "learning_rate": 1.2047310065845808e-05, "loss": 0.5455, "step": 205655 }, { "epoch": 2.28, "learning_rate": 1.2046387338707296e-05, "loss": 0.5777, "step": 205660 }, { "epoch": 2.28, "learning_rate": 1.2045464611568784e-05, "loss": 0.612, "step": 205665 }, { "epoch": 2.28, "learning_rate": 1.2044541884430272e-05, "loss": 0.5805, "step": 205670 }, { "epoch": 2.28, "learning_rate": 1.204361915729176e-05, "loss": 0.5736, "step": 205675 }, { "epoch": 2.28, "learning_rate": 1.2042696430153248e-05, "loss": 0.639, "step": 205680 }, { "epoch": 2.28, "learning_rate": 1.2041773703014734e-05, "loss": 0.5484, "step": 205685 }, { "epoch": 2.28, "learning_rate": 1.2040850975876222e-05, "loss": 0.6376, "step": 205690 }, { "epoch": 2.28, "learning_rate": 1.203992824873771e-05, "loss": 0.585, "step": 205695 }, { "epoch": 2.28, "learning_rate": 1.2039005521599197e-05, "loss": 0.6027, "step": 205700 }, { "epoch": 2.28, "learning_rate": 1.2038082794460685e-05, "loss": 0.6256, "step": 205705 }, { "epoch": 2.28, "learning_rate": 1.2037160067322173e-05, "loss": 0.5361, "step": 205710 }, { "epoch": 2.28, "learning_rate": 1.2036237340183661e-05, "loss": 0.6052, "step": 205715 }, { "epoch": 2.28, "learning_rate": 1.2035314613045149e-05, "loss": 0.597, "step": 205720 }, { "epoch": 2.28, "learning_rate": 1.2034391885906637e-05, "loss": 0.5935, "step": 205725 }, { "epoch": 2.28, "learning_rate": 1.2033469158768123e-05, "loss": 0.619, "step": 205730 }, { "epoch": 2.28, "learning_rate": 1.203254643162961e-05, "loss": 0.5631, "step": 205735 }, { "epoch": 2.28, "learning_rate": 1.2031623704491098e-05, "loss": 0.6335, "step": 205740 }, { "epoch": 2.28, "learning_rate": 1.2030700977352585e-05, "loss": 0.613, "step": 205745 }, { "epoch": 2.28, "learning_rate": 1.2029778250214072e-05, "loss": 0.602, "step": 205750 }, { "epoch": 2.28, "learning_rate": 1.202885552307556e-05, "loss": 0.5439, "step": 205755 }, { "epoch": 2.28, "learning_rate": 1.2027932795937048e-05, "loss": 0.6027, "step": 205760 }, { "epoch": 2.28, "learning_rate": 1.2027010068798536e-05, "loss": 0.6026, "step": 205765 }, { "epoch": 2.28, "learning_rate": 1.2026087341660024e-05, "loss": 0.6128, "step": 205770 }, { "epoch": 2.28, "learning_rate": 1.2025164614521512e-05, "loss": 0.5951, "step": 205775 }, { "epoch": 2.28, "learning_rate": 1.2024241887383e-05, "loss": 0.6037, "step": 205780 }, { "epoch": 2.28, "learning_rate": 1.2023319160244487e-05, "loss": 0.5666, "step": 205785 }, { "epoch": 2.28, "learning_rate": 1.2022396433105974e-05, "loss": 0.5718, "step": 205790 }, { "epoch": 2.28, "learning_rate": 1.2021473705967461e-05, "loss": 0.5899, "step": 205795 }, { "epoch": 2.28, "learning_rate": 1.202055097882895e-05, "loss": 0.6204, "step": 205800 }, { "epoch": 2.28, "learning_rate": 1.2019628251690437e-05, "loss": 0.5938, "step": 205805 }, { "epoch": 2.28, "learning_rate": 1.2018705524551923e-05, "loss": 0.5365, "step": 205810 }, { "epoch": 2.28, "learning_rate": 1.2017782797413411e-05, "loss": 0.5806, "step": 205815 }, { "epoch": 2.28, "learning_rate": 1.2016860070274899e-05, "loss": 0.6221, "step": 205820 }, { "epoch": 2.28, "learning_rate": 1.2015937343136387e-05, "loss": 0.6187, "step": 205825 }, { "epoch": 2.28, "learning_rate": 1.2015014615997875e-05, "loss": 0.629, "step": 205830 }, { "epoch": 2.28, "learning_rate": 1.2014091888859362e-05, "loss": 0.6081, "step": 205835 }, { "epoch": 2.28, "learning_rate": 1.201316916172085e-05, "loss": 0.5908, "step": 205840 }, { "epoch": 2.28, "learning_rate": 1.2012246434582338e-05, "loss": 0.594, "step": 205845 }, { "epoch": 2.28, "learning_rate": 1.2011323707443824e-05, "loss": 0.5976, "step": 205850 }, { "epoch": 2.28, "learning_rate": 1.2010400980305312e-05, "loss": 0.5689, "step": 205855 }, { "epoch": 2.28, "learning_rate": 1.20094782531668e-05, "loss": 0.5852, "step": 205860 }, { "epoch": 2.28, "learning_rate": 1.2008555526028288e-05, "loss": 0.576, "step": 205865 }, { "epoch": 2.28, "learning_rate": 1.2007632798889776e-05, "loss": 0.6217, "step": 205870 }, { "epoch": 2.28, "learning_rate": 1.2006710071751264e-05, "loss": 0.5773, "step": 205875 }, { "epoch": 2.28, "learning_rate": 1.2005787344612751e-05, "loss": 0.5608, "step": 205880 }, { "epoch": 2.28, "learning_rate": 1.2004864617474238e-05, "loss": 0.5933, "step": 205885 }, { "epoch": 2.28, "learning_rate": 1.2003941890335725e-05, "loss": 0.5685, "step": 205890 }, { "epoch": 2.28, "learning_rate": 1.2003019163197213e-05, "loss": 0.5904, "step": 205895 }, { "epoch": 2.28, "learning_rate": 1.2002096436058701e-05, "loss": 0.5586, "step": 205900 }, { "epoch": 2.28, "learning_rate": 1.2001173708920189e-05, "loss": 0.5897, "step": 205905 }, { "epoch": 2.28, "learning_rate": 1.2000250981781675e-05, "loss": 0.5766, "step": 205910 }, { "epoch": 2.28, "learning_rate": 1.1999328254643163e-05, "loss": 0.6403, "step": 205915 }, { "epoch": 2.28, "learning_rate": 1.199840552750465e-05, "loss": 0.5622, "step": 205920 }, { "epoch": 2.28, "learning_rate": 1.1997482800366139e-05, "loss": 0.5762, "step": 205925 }, { "epoch": 2.28, "learning_rate": 1.1996560073227627e-05, "loss": 0.5468, "step": 205930 }, { "epoch": 2.28, "learning_rate": 1.1995637346089114e-05, "loss": 0.5802, "step": 205935 }, { "epoch": 2.28, "learning_rate": 1.1994714618950602e-05, "loss": 0.6329, "step": 205940 }, { "epoch": 2.28, "learning_rate": 1.199379189181209e-05, "loss": 0.5936, "step": 205945 }, { "epoch": 2.28, "learning_rate": 1.1992869164673578e-05, "loss": 0.5824, "step": 205950 }, { "epoch": 2.28, "learning_rate": 1.1991946437535064e-05, "loss": 0.6338, "step": 205955 }, { "epoch": 2.28, "learning_rate": 1.1991023710396552e-05, "loss": 0.5603, "step": 205960 }, { "epoch": 2.28, "learning_rate": 1.1990100983258038e-05, "loss": 0.6222, "step": 205965 }, { "epoch": 2.28, "learning_rate": 1.1989178256119526e-05, "loss": 0.5881, "step": 205970 }, { "epoch": 2.28, "learning_rate": 1.1988255528981014e-05, "loss": 0.5393, "step": 205975 }, { "epoch": 2.28, "learning_rate": 1.1987332801842502e-05, "loss": 0.587, "step": 205980 }, { "epoch": 2.28, "learning_rate": 1.198641007470399e-05, "loss": 0.5947, "step": 205985 }, { "epoch": 2.28, "learning_rate": 1.1985487347565477e-05, "loss": 0.5688, "step": 205990 }, { "epoch": 2.28, "learning_rate": 1.1984564620426965e-05, "loss": 0.5772, "step": 205995 }, { "epoch": 2.28, "learning_rate": 1.1983641893288453e-05, "loss": 0.6241, "step": 206000 }, { "epoch": 2.28, "eval_loss": 0.5696117281913757, "eval_runtime": 69.2715, "eval_samples_per_second": 28.872, "eval_steps_per_second": 14.436, "step": 206000 }, { "epoch": 2.28, "learning_rate": 1.198271916614994e-05, "loss": 0.5812, "step": 206005 }, { "epoch": 2.28, "learning_rate": 1.1981796439011429e-05, "loss": 0.616, "step": 206010 }, { "epoch": 2.28, "learning_rate": 1.1980873711872915e-05, "loss": 0.6119, "step": 206015 }, { "epoch": 2.28, "learning_rate": 1.1979950984734403e-05, "loss": 0.6054, "step": 206020 }, { "epoch": 2.28, "learning_rate": 1.197902825759589e-05, "loss": 0.5988, "step": 206025 }, { "epoch": 2.28, "learning_rate": 1.1978105530457378e-05, "loss": 0.6323, "step": 206030 }, { "epoch": 2.28, "learning_rate": 1.1977182803318865e-05, "loss": 0.5767, "step": 206035 }, { "epoch": 2.28, "learning_rate": 1.1976260076180352e-05, "loss": 0.6016, "step": 206040 }, { "epoch": 2.28, "learning_rate": 1.197533734904184e-05, "loss": 0.5998, "step": 206045 }, { "epoch": 2.28, "learning_rate": 1.1974414621903328e-05, "loss": 0.578, "step": 206050 }, { "epoch": 2.28, "learning_rate": 1.1973491894764816e-05, "loss": 0.5917, "step": 206055 }, { "epoch": 2.28, "learning_rate": 1.1972569167626304e-05, "loss": 0.6152, "step": 206060 }, { "epoch": 2.28, "learning_rate": 1.1971646440487792e-05, "loss": 0.5872, "step": 206065 }, { "epoch": 2.28, "learning_rate": 1.1970723713349278e-05, "loss": 0.5993, "step": 206070 }, { "epoch": 2.28, "learning_rate": 1.1969800986210766e-05, "loss": 0.5856, "step": 206075 }, { "epoch": 2.28, "learning_rate": 1.1968878259072253e-05, "loss": 0.5616, "step": 206080 }, { "epoch": 2.28, "learning_rate": 1.1967955531933741e-05, "loss": 0.6435, "step": 206085 }, { "epoch": 2.28, "learning_rate": 1.196703280479523e-05, "loss": 0.6071, "step": 206090 }, { "epoch": 2.28, "learning_rate": 1.1966110077656717e-05, "loss": 0.5847, "step": 206095 }, { "epoch": 2.28, "learning_rate": 1.1965187350518205e-05, "loss": 0.5708, "step": 206100 }, { "epoch": 2.28, "learning_rate": 1.1964264623379693e-05, "loss": 0.5925, "step": 206105 }, { "epoch": 2.28, "learning_rate": 1.1963341896241179e-05, "loss": 0.5639, "step": 206110 }, { "epoch": 2.28, "learning_rate": 1.1962419169102667e-05, "loss": 0.6021, "step": 206115 }, { "epoch": 2.28, "learning_rate": 1.1961496441964155e-05, "loss": 0.6031, "step": 206120 }, { "epoch": 2.28, "learning_rate": 1.1960573714825642e-05, "loss": 0.551, "step": 206125 }, { "epoch": 2.28, "learning_rate": 1.1959650987687129e-05, "loss": 0.6012, "step": 206130 }, { "epoch": 2.28, "learning_rate": 1.1958728260548616e-05, "loss": 0.5488, "step": 206135 }, { "epoch": 2.28, "learning_rate": 1.1957805533410104e-05, "loss": 0.5782, "step": 206140 }, { "epoch": 2.28, "learning_rate": 1.1956882806271592e-05, "loss": 0.567, "step": 206145 }, { "epoch": 2.28, "learning_rate": 1.195596007913308e-05, "loss": 0.6365, "step": 206150 }, { "epoch": 2.28, "learning_rate": 1.1955037351994568e-05, "loss": 0.5541, "step": 206155 }, { "epoch": 2.28, "learning_rate": 1.1954114624856056e-05, "loss": 0.571, "step": 206160 }, { "epoch": 2.28, "learning_rate": 1.1953191897717544e-05, "loss": 0.6811, "step": 206165 }, { "epoch": 2.28, "learning_rate": 1.1952269170579031e-05, "loss": 0.5865, "step": 206170 }, { "epoch": 2.28, "learning_rate": 1.195134644344052e-05, "loss": 0.5885, "step": 206175 }, { "epoch": 2.28, "learning_rate": 1.1950423716302005e-05, "loss": 0.596, "step": 206180 }, { "epoch": 2.28, "learning_rate": 1.1949500989163492e-05, "loss": 0.5836, "step": 206185 }, { "epoch": 2.28, "learning_rate": 1.194857826202498e-05, "loss": 0.566, "step": 206190 }, { "epoch": 2.28, "learning_rate": 1.1947655534886467e-05, "loss": 0.5692, "step": 206195 }, { "epoch": 2.28, "learning_rate": 1.1946732807747955e-05, "loss": 0.5977, "step": 206200 }, { "epoch": 2.28, "learning_rate": 1.1945810080609443e-05, "loss": 0.6055, "step": 206205 }, { "epoch": 2.28, "learning_rate": 1.194488735347093e-05, "loss": 0.6651, "step": 206210 }, { "epoch": 2.28, "learning_rate": 1.1943964626332419e-05, "loss": 0.6021, "step": 206215 }, { "epoch": 2.28, "learning_rate": 1.1943041899193906e-05, "loss": 0.59, "step": 206220 }, { "epoch": 2.28, "learning_rate": 1.1942119172055394e-05, "loss": 0.6355, "step": 206225 }, { "epoch": 2.28, "learning_rate": 1.1941196444916882e-05, "loss": 0.6781, "step": 206230 }, { "epoch": 2.28, "learning_rate": 1.1940273717778368e-05, "loss": 0.6456, "step": 206235 }, { "epoch": 2.28, "learning_rate": 1.1939350990639856e-05, "loss": 0.646, "step": 206240 }, { "epoch": 2.28, "learning_rate": 1.1938428263501344e-05, "loss": 0.5911, "step": 206245 }, { "epoch": 2.28, "learning_rate": 1.1937505536362832e-05, "loss": 0.5968, "step": 206250 }, { "epoch": 2.28, "learning_rate": 1.193658280922432e-05, "loss": 0.6389, "step": 206255 }, { "epoch": 2.28, "learning_rate": 1.1935660082085808e-05, "loss": 0.6153, "step": 206260 }, { "epoch": 2.28, "learning_rate": 1.1934737354947294e-05, "loss": 0.6482, "step": 206265 }, { "epoch": 2.28, "learning_rate": 1.1933814627808782e-05, "loss": 0.6496, "step": 206270 }, { "epoch": 2.28, "learning_rate": 1.193289190067027e-05, "loss": 0.5294, "step": 206275 }, { "epoch": 2.28, "learning_rate": 1.1931969173531757e-05, "loss": 0.5567, "step": 206280 }, { "epoch": 2.28, "learning_rate": 1.1931046446393245e-05, "loss": 0.6285, "step": 206285 }, { "epoch": 2.28, "learning_rate": 1.1930123719254733e-05, "loss": 0.6103, "step": 206290 }, { "epoch": 2.28, "learning_rate": 1.1929200992116219e-05, "loss": 0.5651, "step": 206295 }, { "epoch": 2.28, "learning_rate": 1.1928278264977707e-05, "loss": 0.6178, "step": 206300 }, { "epoch": 2.28, "learning_rate": 1.1927355537839195e-05, "loss": 0.5219, "step": 206305 }, { "epoch": 2.28, "learning_rate": 1.1926432810700683e-05, "loss": 0.6489, "step": 206310 }, { "epoch": 2.28, "learning_rate": 1.192551008356217e-05, "loss": 0.5677, "step": 206315 }, { "epoch": 2.28, "learning_rate": 1.1924587356423658e-05, "loss": 0.5965, "step": 206320 }, { "epoch": 2.28, "learning_rate": 1.1923664629285146e-05, "loss": 0.5542, "step": 206325 }, { "epoch": 2.28, "learning_rate": 1.1922741902146634e-05, "loss": 0.6351, "step": 206330 }, { "epoch": 2.28, "learning_rate": 1.1921819175008122e-05, "loss": 0.5401, "step": 206335 }, { "epoch": 2.28, "learning_rate": 1.1920896447869608e-05, "loss": 0.5899, "step": 206340 }, { "epoch": 2.28, "learning_rate": 1.1919973720731096e-05, "loss": 0.5285, "step": 206345 }, { "epoch": 2.28, "learning_rate": 1.1919050993592582e-05, "loss": 0.6476, "step": 206350 }, { "epoch": 2.28, "learning_rate": 1.191812826645407e-05, "loss": 0.5206, "step": 206355 }, { "epoch": 2.28, "learning_rate": 1.1917205539315558e-05, "loss": 0.6124, "step": 206360 }, { "epoch": 2.29, "learning_rate": 1.1916282812177046e-05, "loss": 0.6427, "step": 206365 }, { "epoch": 2.29, "learning_rate": 1.1915360085038533e-05, "loss": 0.6147, "step": 206370 }, { "epoch": 2.29, "learning_rate": 1.1914437357900021e-05, "loss": 0.582, "step": 206375 }, { "epoch": 2.29, "learning_rate": 1.191351463076151e-05, "loss": 0.54, "step": 206380 }, { "epoch": 2.29, "learning_rate": 1.1912591903622997e-05, "loss": 0.6061, "step": 206385 }, { "epoch": 2.29, "learning_rate": 1.1911669176484485e-05, "loss": 0.5639, "step": 206390 }, { "epoch": 2.29, "learning_rate": 1.1910746449345973e-05, "loss": 0.6471, "step": 206395 }, { "epoch": 2.29, "learning_rate": 1.1909823722207459e-05, "loss": 0.6422, "step": 206400 }, { "epoch": 2.29, "learning_rate": 1.1908900995068947e-05, "loss": 0.6565, "step": 206405 }, { "epoch": 2.29, "learning_rate": 1.1907978267930435e-05, "loss": 0.6094, "step": 206410 }, { "epoch": 2.29, "learning_rate": 1.190705554079192e-05, "loss": 0.5545, "step": 206415 }, { "epoch": 2.29, "learning_rate": 1.1906132813653409e-05, "loss": 0.6219, "step": 206420 }, { "epoch": 2.29, "learning_rate": 1.1905210086514896e-05, "loss": 0.6014, "step": 206425 }, { "epoch": 2.29, "learning_rate": 1.1904287359376384e-05, "loss": 0.6177, "step": 206430 }, { "epoch": 2.29, "learning_rate": 1.1903364632237872e-05, "loss": 0.6253, "step": 206435 }, { "epoch": 2.29, "learning_rate": 1.190244190509936e-05, "loss": 0.5959, "step": 206440 }, { "epoch": 2.29, "learning_rate": 1.1901519177960848e-05, "loss": 0.6409, "step": 206445 }, { "epoch": 2.29, "learning_rate": 1.1900596450822336e-05, "loss": 0.6144, "step": 206450 }, { "epoch": 2.29, "learning_rate": 1.1899673723683822e-05, "loss": 0.6197, "step": 206455 }, { "epoch": 2.29, "learning_rate": 1.189875099654531e-05, "loss": 0.5893, "step": 206460 }, { "epoch": 2.29, "learning_rate": 1.1897828269406798e-05, "loss": 0.6075, "step": 206465 }, { "epoch": 2.29, "learning_rate": 1.1896905542268285e-05, "loss": 0.6, "step": 206470 }, { "epoch": 2.29, "learning_rate": 1.1895982815129773e-05, "loss": 0.59, "step": 206475 }, { "epoch": 2.29, "learning_rate": 1.1895060087991261e-05, "loss": 0.6101, "step": 206480 }, { "epoch": 2.29, "learning_rate": 1.1894137360852749e-05, "loss": 0.6268, "step": 206485 }, { "epoch": 2.29, "learning_rate": 1.1893214633714235e-05, "loss": 0.6145, "step": 206490 }, { "epoch": 2.29, "learning_rate": 1.1892291906575723e-05, "loss": 0.6398, "step": 206495 }, { "epoch": 2.29, "learning_rate": 1.189136917943721e-05, "loss": 0.6002, "step": 206500 }, { "epoch": 2.29, "learning_rate": 1.1890446452298699e-05, "loss": 0.6212, "step": 206505 }, { "epoch": 2.29, "learning_rate": 1.1889523725160186e-05, "loss": 0.5981, "step": 206510 }, { "epoch": 2.29, "learning_rate": 1.1888600998021673e-05, "loss": 0.6078, "step": 206515 }, { "epoch": 2.29, "learning_rate": 1.188767827088316e-05, "loss": 0.6032, "step": 206520 }, { "epoch": 2.29, "learning_rate": 1.1886755543744648e-05, "loss": 0.5866, "step": 206525 }, { "epoch": 2.29, "learning_rate": 1.1885832816606136e-05, "loss": 0.6167, "step": 206530 }, { "epoch": 2.29, "learning_rate": 1.1884910089467624e-05, "loss": 0.5605, "step": 206535 }, { "epoch": 2.29, "learning_rate": 1.1883987362329112e-05, "loss": 0.5999, "step": 206540 }, { "epoch": 2.29, "learning_rate": 1.18830646351906e-05, "loss": 0.6216, "step": 206545 }, { "epoch": 2.29, "learning_rate": 1.1882141908052088e-05, "loss": 0.6422, "step": 206550 }, { "epoch": 2.29, "learning_rate": 1.1881219180913575e-05, "loss": 0.591, "step": 206555 }, { "epoch": 2.29, "learning_rate": 1.1880296453775063e-05, "loss": 0.5839, "step": 206560 }, { "epoch": 2.29, "learning_rate": 1.187937372663655e-05, "loss": 0.5605, "step": 206565 }, { "epoch": 2.29, "learning_rate": 1.1878450999498037e-05, "loss": 0.5533, "step": 206570 }, { "epoch": 2.29, "learning_rate": 1.1877528272359523e-05, "loss": 0.5503, "step": 206575 }, { "epoch": 2.29, "learning_rate": 1.1876605545221011e-05, "loss": 0.6029, "step": 206580 }, { "epoch": 2.29, "learning_rate": 1.1875682818082499e-05, "loss": 0.6731, "step": 206585 }, { "epoch": 2.29, "learning_rate": 1.1874760090943987e-05, "loss": 0.5938, "step": 206590 }, { "epoch": 2.29, "learning_rate": 1.1873837363805475e-05, "loss": 0.615, "step": 206595 }, { "epoch": 2.29, "learning_rate": 1.1872914636666963e-05, "loss": 0.6277, "step": 206600 }, { "epoch": 2.29, "learning_rate": 1.187199190952845e-05, "loss": 0.6187, "step": 206605 }, { "epoch": 2.29, "learning_rate": 1.1871069182389938e-05, "loss": 0.5808, "step": 206610 }, { "epoch": 2.29, "learning_rate": 1.1870146455251426e-05, "loss": 0.6399, "step": 206615 }, { "epoch": 2.29, "learning_rate": 1.1869223728112912e-05, "loss": 0.622, "step": 206620 }, { "epoch": 2.29, "learning_rate": 1.18683010009744e-05, "loss": 0.6152, "step": 206625 }, { "epoch": 2.29, "learning_rate": 1.1867378273835888e-05, "loss": 0.5955, "step": 206630 }, { "epoch": 2.29, "learning_rate": 1.1866455546697376e-05, "loss": 0.5514, "step": 206635 }, { "epoch": 2.29, "learning_rate": 1.1865532819558864e-05, "loss": 0.5988, "step": 206640 }, { "epoch": 2.29, "learning_rate": 1.186461009242035e-05, "loss": 0.5763, "step": 206645 }, { "epoch": 2.29, "learning_rate": 1.1863687365281838e-05, "loss": 0.6282, "step": 206650 }, { "epoch": 2.29, "learning_rate": 1.1862764638143326e-05, "loss": 0.5405, "step": 206655 }, { "epoch": 2.29, "learning_rate": 1.1861841911004813e-05, "loss": 0.5531, "step": 206660 }, { "epoch": 2.29, "learning_rate": 1.1860919183866301e-05, "loss": 0.6125, "step": 206665 }, { "epoch": 2.29, "learning_rate": 1.1859996456727789e-05, "loss": 0.5924, "step": 206670 }, { "epoch": 2.29, "learning_rate": 1.1859073729589277e-05, "loss": 0.5979, "step": 206675 }, { "epoch": 2.29, "learning_rate": 1.1858151002450763e-05, "loss": 0.6229, "step": 206680 }, { "epoch": 2.29, "learning_rate": 1.1857228275312251e-05, "loss": 0.5764, "step": 206685 }, { "epoch": 2.29, "learning_rate": 1.1856305548173739e-05, "loss": 0.5816, "step": 206690 }, { "epoch": 2.29, "learning_rate": 1.1855382821035227e-05, "loss": 0.6038, "step": 206695 }, { "epoch": 2.29, "learning_rate": 1.1854460093896715e-05, "loss": 0.5833, "step": 206700 }, { "epoch": 2.29, "learning_rate": 1.1853537366758202e-05, "loss": 0.571, "step": 206705 }, { "epoch": 2.29, "learning_rate": 1.185261463961969e-05, "loss": 0.5776, "step": 206710 }, { "epoch": 2.29, "learning_rate": 1.1851691912481178e-05, "loss": 0.6037, "step": 206715 }, { "epoch": 2.29, "learning_rate": 1.1850769185342664e-05, "loss": 0.5601, "step": 206720 }, { "epoch": 2.29, "learning_rate": 1.1849846458204152e-05, "loss": 0.625, "step": 206725 }, { "epoch": 2.29, "learning_rate": 1.184892373106564e-05, "loss": 0.5941, "step": 206730 }, { "epoch": 2.29, "learning_rate": 1.1848001003927126e-05, "loss": 0.6091, "step": 206735 }, { "epoch": 2.29, "learning_rate": 1.1847078276788614e-05, "loss": 0.5591, "step": 206740 }, { "epoch": 2.29, "learning_rate": 1.1846155549650102e-05, "loss": 0.5537, "step": 206745 }, { "epoch": 2.29, "learning_rate": 1.184523282251159e-05, "loss": 0.5936, "step": 206750 }, { "epoch": 2.29, "learning_rate": 1.1844310095373077e-05, "loss": 0.5613, "step": 206755 }, { "epoch": 2.29, "learning_rate": 1.1843387368234565e-05, "loss": 0.6739, "step": 206760 }, { "epoch": 2.29, "learning_rate": 1.1842464641096053e-05, "loss": 0.6395, "step": 206765 }, { "epoch": 2.29, "learning_rate": 1.1841541913957541e-05, "loss": 0.6126, "step": 206770 }, { "epoch": 2.29, "learning_rate": 1.1840619186819029e-05, "loss": 0.5865, "step": 206775 }, { "epoch": 2.29, "learning_rate": 1.1839696459680517e-05, "loss": 0.6104, "step": 206780 }, { "epoch": 2.29, "learning_rate": 1.1838773732542003e-05, "loss": 0.6118, "step": 206785 }, { "epoch": 2.29, "learning_rate": 1.183785100540349e-05, "loss": 0.5546, "step": 206790 }, { "epoch": 2.29, "learning_rate": 1.1836928278264977e-05, "loss": 0.5592, "step": 206795 }, { "epoch": 2.29, "learning_rate": 1.1836005551126465e-05, "loss": 0.5927, "step": 206800 }, { "epoch": 2.29, "learning_rate": 1.1835082823987953e-05, "loss": 0.553, "step": 206805 }, { "epoch": 2.29, "learning_rate": 1.183416009684944e-05, "loss": 0.5813, "step": 206810 }, { "epoch": 2.29, "learning_rate": 1.1833237369710928e-05, "loss": 0.5851, "step": 206815 }, { "epoch": 2.29, "learning_rate": 1.1832314642572416e-05, "loss": 0.5779, "step": 206820 }, { "epoch": 2.29, "learning_rate": 1.1831391915433904e-05, "loss": 0.5696, "step": 206825 }, { "epoch": 2.29, "learning_rate": 1.1830469188295392e-05, "loss": 0.5829, "step": 206830 }, { "epoch": 2.29, "learning_rate": 1.182954646115688e-05, "loss": 0.5852, "step": 206835 }, { "epoch": 2.29, "learning_rate": 1.1828623734018368e-05, "loss": 0.5714, "step": 206840 }, { "epoch": 2.29, "learning_rate": 1.1827701006879854e-05, "loss": 0.5912, "step": 206845 }, { "epoch": 2.29, "learning_rate": 1.1826778279741342e-05, "loss": 0.5983, "step": 206850 }, { "epoch": 2.29, "learning_rate": 1.182585555260283e-05, "loss": 0.5848, "step": 206855 }, { "epoch": 2.29, "learning_rate": 1.1824932825464317e-05, "loss": 0.5808, "step": 206860 }, { "epoch": 2.29, "learning_rate": 1.1824010098325805e-05, "loss": 0.5851, "step": 206865 }, { "epoch": 2.29, "learning_rate": 1.1823087371187291e-05, "loss": 0.6254, "step": 206870 }, { "epoch": 2.29, "learning_rate": 1.1822164644048779e-05, "loss": 0.5924, "step": 206875 }, { "epoch": 2.29, "learning_rate": 1.1821241916910267e-05, "loss": 0.6179, "step": 206880 }, { "epoch": 2.29, "learning_rate": 1.1820319189771755e-05, "loss": 0.5585, "step": 206885 }, { "epoch": 2.29, "learning_rate": 1.1819396462633243e-05, "loss": 0.5256, "step": 206890 }, { "epoch": 2.29, "learning_rate": 1.181847373549473e-05, "loss": 0.6288, "step": 206895 }, { "epoch": 2.29, "learning_rate": 1.1817551008356217e-05, "loss": 0.5952, "step": 206900 }, { "epoch": 2.29, "learning_rate": 1.1816628281217704e-05, "loss": 0.6128, "step": 206905 }, { "epoch": 2.29, "learning_rate": 1.1815705554079192e-05, "loss": 0.6026, "step": 206910 }, { "epoch": 2.29, "learning_rate": 1.181478282694068e-05, "loss": 0.5892, "step": 206915 }, { "epoch": 2.29, "learning_rate": 1.1813860099802168e-05, "loss": 0.5994, "step": 206920 }, { "epoch": 2.29, "learning_rate": 1.1812937372663656e-05, "loss": 0.5884, "step": 206925 }, { "epoch": 2.29, "learning_rate": 1.1812014645525144e-05, "loss": 0.6097, "step": 206930 }, { "epoch": 2.29, "learning_rate": 1.1811091918386632e-05, "loss": 0.5665, "step": 206935 }, { "epoch": 2.29, "learning_rate": 1.181016919124812e-05, "loss": 0.5931, "step": 206940 }, { "epoch": 2.29, "learning_rate": 1.1809246464109606e-05, "loss": 0.6253, "step": 206945 }, { "epoch": 2.29, "learning_rate": 1.1808323736971093e-05, "loss": 0.6239, "step": 206950 }, { "epoch": 2.29, "learning_rate": 1.1807401009832581e-05, "loss": 0.6268, "step": 206955 }, { "epoch": 2.29, "learning_rate": 1.1806478282694067e-05, "loss": 0.6244, "step": 206960 }, { "epoch": 2.29, "learning_rate": 1.1805555555555555e-05, "loss": 0.6027, "step": 206965 }, { "epoch": 2.29, "learning_rate": 1.1804632828417043e-05, "loss": 0.5929, "step": 206970 }, { "epoch": 2.29, "learning_rate": 1.1803710101278531e-05, "loss": 0.5451, "step": 206975 }, { "epoch": 2.29, "learning_rate": 1.1802787374140019e-05, "loss": 0.5963, "step": 206980 }, { "epoch": 2.29, "learning_rate": 1.1801864647001507e-05, "loss": 0.6028, "step": 206985 }, { "epoch": 2.29, "learning_rate": 1.1800941919862995e-05, "loss": 0.6009, "step": 206990 }, { "epoch": 2.29, "learning_rate": 1.1800019192724482e-05, "loss": 0.6048, "step": 206995 }, { "epoch": 2.29, "learning_rate": 1.179909646558597e-05, "loss": 0.6519, "step": 207000 }, { "epoch": 2.29, "eval_loss": 0.536715567111969, "eval_runtime": 70.0501, "eval_samples_per_second": 28.551, "eval_steps_per_second": 14.275, "step": 207000 }, { "epoch": 2.29, "learning_rate": 1.1798173738447456e-05, "loss": 0.5381, "step": 207005 }, { "epoch": 2.29, "learning_rate": 1.1797251011308944e-05, "loss": 0.6075, "step": 207010 }, { "epoch": 2.29, "learning_rate": 1.1796328284170432e-05, "loss": 0.6125, "step": 207015 }, { "epoch": 2.29, "learning_rate": 1.1795405557031918e-05, "loss": 0.5786, "step": 207020 }, { "epoch": 2.29, "learning_rate": 1.1794482829893406e-05, "loss": 0.653, "step": 207025 }, { "epoch": 2.29, "learning_rate": 1.1793560102754894e-05, "loss": 0.5939, "step": 207030 }, { "epoch": 2.29, "learning_rate": 1.1792637375616382e-05, "loss": 0.5359, "step": 207035 }, { "epoch": 2.29, "learning_rate": 1.179171464847787e-05, "loss": 0.6049, "step": 207040 }, { "epoch": 2.29, "learning_rate": 1.1790791921339357e-05, "loss": 0.563, "step": 207045 }, { "epoch": 2.29, "learning_rate": 1.1789869194200845e-05, "loss": 0.5315, "step": 207050 }, { "epoch": 2.29, "learning_rate": 1.1788946467062333e-05, "loss": 0.5852, "step": 207055 }, { "epoch": 2.29, "learning_rate": 1.1788023739923821e-05, "loss": 0.5884, "step": 207060 }, { "epoch": 2.29, "learning_rate": 1.1787101012785307e-05, "loss": 0.6382, "step": 207065 }, { "epoch": 2.29, "learning_rate": 1.1786178285646795e-05, "loss": 0.6232, "step": 207070 }, { "epoch": 2.29, "learning_rate": 1.1785255558508283e-05, "loss": 0.5705, "step": 207075 }, { "epoch": 2.29, "learning_rate": 1.178433283136977e-05, "loss": 0.5621, "step": 207080 }, { "epoch": 2.29, "learning_rate": 1.1783410104231259e-05, "loss": 0.6095, "step": 207085 }, { "epoch": 2.29, "learning_rate": 1.1782487377092746e-05, "loss": 0.6321, "step": 207090 }, { "epoch": 2.29, "learning_rate": 1.1781564649954234e-05, "loss": 0.6042, "step": 207095 }, { "epoch": 2.29, "learning_rate": 1.178064192281572e-05, "loss": 0.6093, "step": 207100 }, { "epoch": 2.29, "learning_rate": 1.1779719195677208e-05, "loss": 0.6025, "step": 207105 }, { "epoch": 2.29, "learning_rate": 1.1778796468538696e-05, "loss": 0.5634, "step": 207110 }, { "epoch": 2.29, "learning_rate": 1.1777873741400184e-05, "loss": 0.534, "step": 207115 }, { "epoch": 2.29, "learning_rate": 1.1776951014261672e-05, "loss": 0.6452, "step": 207120 }, { "epoch": 2.29, "learning_rate": 1.1776028287123158e-05, "loss": 0.5984, "step": 207125 }, { "epoch": 2.29, "learning_rate": 1.1775105559984646e-05, "loss": 0.5675, "step": 207130 }, { "epoch": 2.29, "learning_rate": 1.1774182832846134e-05, "loss": 0.5639, "step": 207135 }, { "epoch": 2.29, "learning_rate": 1.1773260105707622e-05, "loss": 0.616, "step": 207140 }, { "epoch": 2.29, "learning_rate": 1.177233737856911e-05, "loss": 0.5844, "step": 207145 }, { "epoch": 2.29, "learning_rate": 1.1771414651430597e-05, "loss": 0.5814, "step": 207150 }, { "epoch": 2.29, "learning_rate": 1.1770491924292085e-05, "loss": 0.6203, "step": 207155 }, { "epoch": 2.29, "learning_rate": 1.1769569197153573e-05, "loss": 0.5819, "step": 207160 }, { "epoch": 2.29, "learning_rate": 1.176864647001506e-05, "loss": 0.5807, "step": 207165 }, { "epoch": 2.29, "learning_rate": 1.1767723742876547e-05, "loss": 0.6068, "step": 207170 }, { "epoch": 2.29, "learning_rate": 1.1766801015738035e-05, "loss": 0.6522, "step": 207175 }, { "epoch": 2.29, "learning_rate": 1.1765878288599521e-05, "loss": 0.5834, "step": 207180 }, { "epoch": 2.29, "learning_rate": 1.1764955561461009e-05, "loss": 0.5472, "step": 207185 }, { "epoch": 2.29, "learning_rate": 1.1764032834322497e-05, "loss": 0.5868, "step": 207190 }, { "epoch": 2.29, "learning_rate": 1.1763110107183984e-05, "loss": 0.6021, "step": 207195 }, { "epoch": 2.29, "learning_rate": 1.1762187380045472e-05, "loss": 0.5212, "step": 207200 }, { "epoch": 2.29, "learning_rate": 1.176126465290696e-05, "loss": 0.6241, "step": 207205 }, { "epoch": 2.29, "learning_rate": 1.1760341925768448e-05, "loss": 0.6106, "step": 207210 }, { "epoch": 2.29, "learning_rate": 1.1759419198629936e-05, "loss": 0.6293, "step": 207215 }, { "epoch": 2.29, "learning_rate": 1.1758496471491424e-05, "loss": 0.6302, "step": 207220 }, { "epoch": 2.29, "learning_rate": 1.1757573744352912e-05, "loss": 0.5566, "step": 207225 }, { "epoch": 2.29, "learning_rate": 1.1756651017214398e-05, "loss": 0.6591, "step": 207230 }, { "epoch": 2.29, "learning_rate": 1.1755728290075886e-05, "loss": 0.5739, "step": 207235 }, { "epoch": 2.29, "learning_rate": 1.1754805562937373e-05, "loss": 0.5733, "step": 207240 }, { "epoch": 2.29, "learning_rate": 1.1753882835798861e-05, "loss": 0.5973, "step": 207245 }, { "epoch": 2.29, "learning_rate": 1.1752960108660347e-05, "loss": 0.5439, "step": 207250 }, { "epoch": 2.29, "learning_rate": 1.1752037381521835e-05, "loss": 0.6236, "step": 207255 }, { "epoch": 2.29, "learning_rate": 1.1751114654383323e-05, "loss": 0.6038, "step": 207260 }, { "epoch": 2.29, "learning_rate": 1.1750191927244811e-05, "loss": 0.5947, "step": 207265 }, { "epoch": 2.3, "learning_rate": 1.1749269200106299e-05, "loss": 0.5928, "step": 207270 }, { "epoch": 2.3, "learning_rate": 1.1748346472967787e-05, "loss": 0.5527, "step": 207275 }, { "epoch": 2.3, "learning_rate": 1.1747423745829274e-05, "loss": 0.5844, "step": 207280 }, { "epoch": 2.3, "learning_rate": 1.174650101869076e-05, "loss": 0.56, "step": 207285 }, { "epoch": 2.3, "learning_rate": 1.1745578291552248e-05, "loss": 0.6199, "step": 207290 }, { "epoch": 2.3, "learning_rate": 1.1744655564413736e-05, "loss": 0.5891, "step": 207295 }, { "epoch": 2.3, "learning_rate": 1.1743732837275224e-05, "loss": 0.6116, "step": 207300 }, { "epoch": 2.3, "learning_rate": 1.1742810110136712e-05, "loss": 0.5642, "step": 207305 }, { "epoch": 2.3, "learning_rate": 1.17418873829982e-05, "loss": 0.594, "step": 207310 }, { "epoch": 2.3, "learning_rate": 1.1740964655859688e-05, "loss": 0.6352, "step": 207315 }, { "epoch": 2.3, "learning_rate": 1.1740041928721176e-05, "loss": 0.596, "step": 207320 }, { "epoch": 2.3, "learning_rate": 1.1739119201582662e-05, "loss": 0.6126, "step": 207325 }, { "epoch": 2.3, "learning_rate": 1.173819647444415e-05, "loss": 0.6216, "step": 207330 }, { "epoch": 2.3, "learning_rate": 1.1737273747305637e-05, "loss": 0.6289, "step": 207335 }, { "epoch": 2.3, "learning_rate": 1.1736351020167125e-05, "loss": 0.6068, "step": 207340 }, { "epoch": 2.3, "learning_rate": 1.1735428293028611e-05, "loss": 0.6307, "step": 207345 }, { "epoch": 2.3, "learning_rate": 1.17345055658901e-05, "loss": 0.6184, "step": 207350 }, { "epoch": 2.3, "learning_rate": 1.1733582838751587e-05, "loss": 0.6111, "step": 207355 }, { "epoch": 2.3, "learning_rate": 1.1732660111613075e-05, "loss": 0.5721, "step": 207360 }, { "epoch": 2.3, "learning_rate": 1.1731737384474563e-05, "loss": 0.6071, "step": 207365 }, { "epoch": 2.3, "learning_rate": 1.173081465733605e-05, "loss": 0.5825, "step": 207370 }, { "epoch": 2.3, "learning_rate": 1.1729891930197539e-05, "loss": 0.6604, "step": 207375 }, { "epoch": 2.3, "learning_rate": 1.1728969203059026e-05, "loss": 0.6192, "step": 207380 }, { "epoch": 2.3, "learning_rate": 1.1728046475920514e-05, "loss": 0.6512, "step": 207385 }, { "epoch": 2.3, "learning_rate": 1.1727123748782002e-05, "loss": 0.6382, "step": 207390 }, { "epoch": 2.3, "learning_rate": 1.1726201021643488e-05, "loss": 0.6452, "step": 207395 }, { "epoch": 2.3, "learning_rate": 1.1725278294504974e-05, "loss": 0.5687, "step": 207400 }, { "epoch": 2.3, "learning_rate": 1.1724355567366462e-05, "loss": 0.6139, "step": 207405 }, { "epoch": 2.3, "learning_rate": 1.172343284022795e-05, "loss": 0.548, "step": 207410 }, { "epoch": 2.3, "learning_rate": 1.1722510113089438e-05, "loss": 0.5632, "step": 207415 }, { "epoch": 2.3, "learning_rate": 1.1721587385950926e-05, "loss": 0.5603, "step": 207420 }, { "epoch": 2.3, "learning_rate": 1.1720664658812414e-05, "loss": 0.5335, "step": 207425 }, { "epoch": 2.3, "learning_rate": 1.1719741931673901e-05, "loss": 0.551, "step": 207430 }, { "epoch": 2.3, "learning_rate": 1.171881920453539e-05, "loss": 0.6184, "step": 207435 }, { "epoch": 2.3, "learning_rate": 1.1717896477396877e-05, "loss": 0.617, "step": 207440 }, { "epoch": 2.3, "learning_rate": 1.1716973750258365e-05, "loss": 0.5639, "step": 207445 }, { "epoch": 2.3, "learning_rate": 1.1716051023119851e-05, "loss": 0.5982, "step": 207450 }, { "epoch": 2.3, "learning_rate": 1.1715128295981339e-05, "loss": 0.619, "step": 207455 }, { "epoch": 2.3, "learning_rate": 1.1714205568842827e-05, "loss": 0.6002, "step": 207460 }, { "epoch": 2.3, "learning_rate": 1.1713282841704315e-05, "loss": 0.5608, "step": 207465 }, { "epoch": 2.3, "learning_rate": 1.1712360114565803e-05, "loss": 0.5535, "step": 207470 }, { "epoch": 2.3, "learning_rate": 1.171143738742729e-05, "loss": 0.5773, "step": 207475 }, { "epoch": 2.3, "learning_rate": 1.1710514660288777e-05, "loss": 0.5949, "step": 207480 }, { "epoch": 2.3, "learning_rate": 1.1709591933150264e-05, "loss": 0.6286, "step": 207485 }, { "epoch": 2.3, "learning_rate": 1.1708669206011752e-05, "loss": 0.6112, "step": 207490 }, { "epoch": 2.3, "learning_rate": 1.170774647887324e-05, "loss": 0.6057, "step": 207495 }, { "epoch": 2.3, "learning_rate": 1.1706823751734728e-05, "loss": 0.57, "step": 207500 }, { "epoch": 2.3, "learning_rate": 1.1705901024596216e-05, "loss": 0.5845, "step": 207505 }, { "epoch": 2.3, "learning_rate": 1.1704978297457702e-05, "loss": 0.5926, "step": 207510 }, { "epoch": 2.3, "learning_rate": 1.170405557031919e-05, "loss": 0.5782, "step": 207515 }, { "epoch": 2.3, "learning_rate": 1.1703132843180678e-05, "loss": 0.5621, "step": 207520 }, { "epoch": 2.3, "learning_rate": 1.1702210116042166e-05, "loss": 0.649, "step": 207525 }, { "epoch": 2.3, "learning_rate": 1.1701287388903653e-05, "loss": 0.5722, "step": 207530 }, { "epoch": 2.3, "learning_rate": 1.1700364661765141e-05, "loss": 0.5825, "step": 207535 }, { "epoch": 2.3, "learning_rate": 1.1699441934626629e-05, "loss": 0.5917, "step": 207540 }, { "epoch": 2.3, "learning_rate": 1.1698519207488117e-05, "loss": 0.6317, "step": 207545 }, { "epoch": 2.3, "learning_rate": 1.1697596480349605e-05, "loss": 0.6408, "step": 207550 }, { "epoch": 2.3, "learning_rate": 1.1696673753211091e-05, "loss": 0.5992, "step": 207555 }, { "epoch": 2.3, "learning_rate": 1.1695751026072579e-05, "loss": 0.6178, "step": 207560 }, { "epoch": 2.3, "learning_rate": 1.1694828298934065e-05, "loss": 0.5568, "step": 207565 }, { "epoch": 2.3, "learning_rate": 1.1693905571795553e-05, "loss": 0.5642, "step": 207570 }, { "epoch": 2.3, "learning_rate": 1.169298284465704e-05, "loss": 0.5612, "step": 207575 }, { "epoch": 2.3, "learning_rate": 1.1692060117518528e-05, "loss": 0.5681, "step": 207580 }, { "epoch": 2.3, "learning_rate": 1.1691137390380016e-05, "loss": 0.6206, "step": 207585 }, { "epoch": 2.3, "learning_rate": 1.1690214663241504e-05, "loss": 0.5546, "step": 207590 }, { "epoch": 2.3, "learning_rate": 1.1689291936102992e-05, "loss": 0.6406, "step": 207595 }, { "epoch": 2.3, "learning_rate": 1.168836920896448e-05, "loss": 0.5498, "step": 207600 }, { "epoch": 2.3, "learning_rate": 1.1687446481825968e-05, "loss": 0.7172, "step": 207605 }, { "epoch": 2.3, "learning_rate": 1.1686523754687456e-05, "loss": 0.579, "step": 207610 }, { "epoch": 2.3, "learning_rate": 1.1685601027548942e-05, "loss": 0.6027, "step": 207615 }, { "epoch": 2.3, "learning_rate": 1.168467830041043e-05, "loss": 0.6011, "step": 207620 }, { "epoch": 2.3, "learning_rate": 1.1683755573271917e-05, "loss": 0.5807, "step": 207625 }, { "epoch": 2.3, "learning_rate": 1.1682832846133404e-05, "loss": 0.5612, "step": 207630 }, { "epoch": 2.3, "learning_rate": 1.1681910118994891e-05, "loss": 0.5717, "step": 207635 }, { "epoch": 2.3, "learning_rate": 1.168098739185638e-05, "loss": 0.5536, "step": 207640 }, { "epoch": 2.3, "learning_rate": 1.1680064664717867e-05, "loss": 0.6047, "step": 207645 }, { "epoch": 2.3, "learning_rate": 1.1679141937579355e-05, "loss": 0.5386, "step": 207650 }, { "epoch": 2.3, "learning_rate": 1.1678219210440843e-05, "loss": 0.5975, "step": 207655 }, { "epoch": 2.3, "learning_rate": 1.167729648330233e-05, "loss": 0.6175, "step": 207660 }, { "epoch": 2.3, "learning_rate": 1.1676373756163819e-05, "loss": 0.5688, "step": 207665 }, { "epoch": 2.3, "learning_rate": 1.1675451029025306e-05, "loss": 0.6393, "step": 207670 }, { "epoch": 2.3, "learning_rate": 1.1674528301886793e-05, "loss": 0.6217, "step": 207675 }, { "epoch": 2.3, "learning_rate": 1.167360557474828e-05, "loss": 0.5922, "step": 207680 }, { "epoch": 2.3, "learning_rate": 1.1672682847609768e-05, "loss": 0.631, "step": 207685 }, { "epoch": 2.3, "learning_rate": 1.1671760120471256e-05, "loss": 0.6584, "step": 207690 }, { "epoch": 2.3, "learning_rate": 1.1670837393332744e-05, "loss": 0.5992, "step": 207695 }, { "epoch": 2.3, "learning_rate": 1.1669914666194232e-05, "loss": 0.5787, "step": 207700 }, { "epoch": 2.3, "learning_rate": 1.1668991939055718e-05, "loss": 0.6245, "step": 207705 }, { "epoch": 2.3, "learning_rate": 1.1668069211917206e-05, "loss": 0.6232, "step": 207710 }, { "epoch": 2.3, "learning_rate": 1.1667146484778694e-05, "loss": 0.5399, "step": 207715 }, { "epoch": 2.3, "learning_rate": 1.1666223757640181e-05, "loss": 0.5996, "step": 207720 }, { "epoch": 2.3, "learning_rate": 1.166530103050167e-05, "loss": 0.5394, "step": 207725 }, { "epoch": 2.3, "learning_rate": 1.1664378303363155e-05, "loss": 0.6148, "step": 207730 }, { "epoch": 2.3, "learning_rate": 1.1663455576224643e-05, "loss": 0.5771, "step": 207735 }, { "epoch": 2.3, "learning_rate": 1.1662532849086131e-05, "loss": 0.5507, "step": 207740 }, { "epoch": 2.3, "learning_rate": 1.1661610121947619e-05, "loss": 0.5811, "step": 207745 }, { "epoch": 2.3, "learning_rate": 1.1660687394809107e-05, "loss": 0.5887, "step": 207750 }, { "epoch": 2.3, "learning_rate": 1.1659764667670595e-05, "loss": 0.6541, "step": 207755 }, { "epoch": 2.3, "learning_rate": 1.1658841940532083e-05, "loss": 0.5783, "step": 207760 }, { "epoch": 2.3, "learning_rate": 1.165791921339357e-05, "loss": 0.578, "step": 207765 }, { "epoch": 2.3, "learning_rate": 1.1656996486255058e-05, "loss": 0.605, "step": 207770 }, { "epoch": 2.3, "learning_rate": 1.1656073759116546e-05, "loss": 0.5716, "step": 207775 }, { "epoch": 2.3, "learning_rate": 1.1655151031978032e-05, "loss": 0.5504, "step": 207780 }, { "epoch": 2.3, "learning_rate": 1.165422830483952e-05, "loss": 0.6586, "step": 207785 }, { "epoch": 2.3, "learning_rate": 1.1653305577701006e-05, "loss": 0.6145, "step": 207790 }, { "epoch": 2.3, "learning_rate": 1.1652382850562494e-05, "loss": 0.5784, "step": 207795 }, { "epoch": 2.3, "learning_rate": 1.1651460123423982e-05, "loss": 0.5693, "step": 207800 }, { "epoch": 2.3, "learning_rate": 1.165053739628547e-05, "loss": 0.5422, "step": 207805 }, { "epoch": 2.3, "learning_rate": 1.1649614669146958e-05, "loss": 0.5579, "step": 207810 }, { "epoch": 2.3, "learning_rate": 1.1648691942008446e-05, "loss": 0.6476, "step": 207815 }, { "epoch": 2.3, "learning_rate": 1.1647769214869933e-05, "loss": 0.6344, "step": 207820 }, { "epoch": 2.3, "learning_rate": 1.1646846487731421e-05, "loss": 0.567, "step": 207825 }, { "epoch": 2.3, "learning_rate": 1.1645923760592909e-05, "loss": 0.6093, "step": 207830 }, { "epoch": 2.3, "learning_rate": 1.1645001033454395e-05, "loss": 0.6135, "step": 207835 }, { "epoch": 2.3, "learning_rate": 1.1644078306315883e-05, "loss": 0.6292, "step": 207840 }, { "epoch": 2.3, "learning_rate": 1.1643155579177371e-05, "loss": 0.6232, "step": 207845 }, { "epoch": 2.3, "learning_rate": 1.1642232852038859e-05, "loss": 0.572, "step": 207850 }, { "epoch": 2.3, "learning_rate": 1.1641310124900345e-05, "loss": 0.6123, "step": 207855 }, { "epoch": 2.3, "learning_rate": 1.1640387397761833e-05, "loss": 0.5933, "step": 207860 }, { "epoch": 2.3, "learning_rate": 1.163946467062332e-05, "loss": 0.633, "step": 207865 }, { "epoch": 2.3, "learning_rate": 1.1638541943484808e-05, "loss": 0.631, "step": 207870 }, { "epoch": 2.3, "learning_rate": 1.1637619216346296e-05, "loss": 0.5974, "step": 207875 }, { "epoch": 2.3, "learning_rate": 1.1636696489207784e-05, "loss": 0.6179, "step": 207880 }, { "epoch": 2.3, "learning_rate": 1.1635773762069272e-05, "loss": 0.6031, "step": 207885 }, { "epoch": 2.3, "learning_rate": 1.163485103493076e-05, "loss": 0.6407, "step": 207890 }, { "epoch": 2.3, "learning_rate": 1.1633928307792246e-05, "loss": 0.6507, "step": 207895 }, { "epoch": 2.3, "learning_rate": 1.1633005580653734e-05, "loss": 0.5964, "step": 207900 }, { "epoch": 2.3, "learning_rate": 1.1632082853515222e-05, "loss": 0.6182, "step": 207905 }, { "epoch": 2.3, "learning_rate": 1.163116012637671e-05, "loss": 0.5793, "step": 207910 }, { "epoch": 2.3, "learning_rate": 1.1630237399238197e-05, "loss": 0.5951, "step": 207915 }, { "epoch": 2.3, "learning_rate": 1.1629314672099685e-05, "loss": 0.554, "step": 207920 }, { "epoch": 2.3, "learning_rate": 1.1628391944961173e-05, "loss": 0.5717, "step": 207925 }, { "epoch": 2.3, "learning_rate": 1.1627469217822661e-05, "loss": 0.6172, "step": 207930 }, { "epoch": 2.3, "learning_rate": 1.1626546490684147e-05, "loss": 0.5786, "step": 207935 }, { "epoch": 2.3, "learning_rate": 1.1625623763545635e-05, "loss": 0.639, "step": 207940 }, { "epoch": 2.3, "learning_rate": 1.1624701036407123e-05, "loss": 0.5632, "step": 207945 }, { "epoch": 2.3, "learning_rate": 1.1623778309268609e-05, "loss": 0.5694, "step": 207950 }, { "epoch": 2.3, "learning_rate": 1.1622855582130097e-05, "loss": 0.6321, "step": 207955 }, { "epoch": 2.3, "learning_rate": 1.1621932854991585e-05, "loss": 0.5809, "step": 207960 }, { "epoch": 2.3, "learning_rate": 1.1621010127853072e-05, "loss": 0.6128, "step": 207965 }, { "epoch": 2.3, "learning_rate": 1.162008740071456e-05, "loss": 0.619, "step": 207970 }, { "epoch": 2.3, "learning_rate": 1.1619164673576048e-05, "loss": 0.5623, "step": 207975 }, { "epoch": 2.3, "learning_rate": 1.1618241946437536e-05, "loss": 0.6, "step": 207980 }, { "epoch": 2.3, "learning_rate": 1.1617319219299024e-05, "loss": 0.613, "step": 207985 }, { "epoch": 2.3, "learning_rate": 1.1616396492160512e-05, "loss": 0.5931, "step": 207990 }, { "epoch": 2.3, "learning_rate": 1.1615473765022e-05, "loss": 0.5946, "step": 207995 }, { "epoch": 2.3, "learning_rate": 1.1614551037883486e-05, "loss": 0.6161, "step": 208000 }, { "epoch": 2.3, "eval_loss": 0.5665950775146484, "eval_runtime": 69.385, "eval_samples_per_second": 28.825, "eval_steps_per_second": 14.412, "step": 208000 }, { "epoch": 2.3, "learning_rate": 1.1613628310744974e-05, "loss": 0.5823, "step": 208005 }, { "epoch": 2.3, "learning_rate": 1.161270558360646e-05, "loss": 0.6297, "step": 208010 }, { "epoch": 2.3, "learning_rate": 1.1611782856467948e-05, "loss": 0.6341, "step": 208015 }, { "epoch": 2.3, "learning_rate": 1.1610860129329435e-05, "loss": 0.5655, "step": 208020 }, { "epoch": 2.3, "learning_rate": 1.1609937402190923e-05, "loss": 0.62, "step": 208025 }, { "epoch": 2.3, "learning_rate": 1.1609014675052411e-05, "loss": 0.6383, "step": 208030 }, { "epoch": 2.3, "learning_rate": 1.1608091947913899e-05, "loss": 0.5585, "step": 208035 }, { "epoch": 2.3, "learning_rate": 1.1607169220775387e-05, "loss": 0.6018, "step": 208040 }, { "epoch": 2.3, "learning_rate": 1.1606246493636875e-05, "loss": 0.5798, "step": 208045 }, { "epoch": 2.3, "learning_rate": 1.1605323766498363e-05, "loss": 0.5857, "step": 208050 }, { "epoch": 2.3, "learning_rate": 1.160440103935985e-05, "loss": 0.6001, "step": 208055 }, { "epoch": 2.3, "learning_rate": 1.1603478312221337e-05, "loss": 0.5822, "step": 208060 }, { "epoch": 2.3, "learning_rate": 1.1602555585082824e-05, "loss": 0.5961, "step": 208065 }, { "epoch": 2.3, "learning_rate": 1.1601632857944312e-05, "loss": 0.5984, "step": 208070 }, { "epoch": 2.3, "learning_rate": 1.16007101308058e-05, "loss": 0.568, "step": 208075 }, { "epoch": 2.3, "learning_rate": 1.1599787403667288e-05, "loss": 0.5906, "step": 208080 }, { "epoch": 2.3, "learning_rate": 1.1598864676528774e-05, "loss": 0.5474, "step": 208085 }, { "epoch": 2.3, "learning_rate": 1.1597941949390262e-05, "loss": 0.5652, "step": 208090 }, { "epoch": 2.3, "learning_rate": 1.159701922225175e-05, "loss": 0.5545, "step": 208095 }, { "epoch": 2.3, "learning_rate": 1.1596096495113238e-05, "loss": 0.5836, "step": 208100 }, { "epoch": 2.3, "learning_rate": 1.1595173767974725e-05, "loss": 0.6179, "step": 208105 }, { "epoch": 2.3, "learning_rate": 1.1594251040836213e-05, "loss": 0.568, "step": 208110 }, { "epoch": 2.3, "learning_rate": 1.15933283136977e-05, "loss": 0.6133, "step": 208115 }, { "epoch": 2.3, "learning_rate": 1.1592405586559187e-05, "loss": 0.5372, "step": 208120 }, { "epoch": 2.3, "learning_rate": 1.1591482859420675e-05, "loss": 0.5988, "step": 208125 }, { "epoch": 2.3, "learning_rate": 1.1590560132282163e-05, "loss": 0.5806, "step": 208130 }, { "epoch": 2.3, "learning_rate": 1.1589637405143651e-05, "loss": 0.5909, "step": 208135 }, { "epoch": 2.3, "learning_rate": 1.1588714678005139e-05, "loss": 0.5846, "step": 208140 }, { "epoch": 2.3, "learning_rate": 1.1587791950866627e-05, "loss": 0.5913, "step": 208145 }, { "epoch": 2.3, "learning_rate": 1.1586869223728114e-05, "loss": 0.5981, "step": 208150 }, { "epoch": 2.3, "learning_rate": 1.1585946496589602e-05, "loss": 0.5665, "step": 208155 }, { "epoch": 2.3, "learning_rate": 1.1585023769451088e-05, "loss": 0.6237, "step": 208160 }, { "epoch": 2.3, "learning_rate": 1.1584101042312576e-05, "loss": 0.5919, "step": 208165 }, { "epoch": 2.31, "learning_rate": 1.1583178315174064e-05, "loss": 0.6022, "step": 208170 }, { "epoch": 2.31, "learning_rate": 1.158225558803555e-05, "loss": 0.5845, "step": 208175 }, { "epoch": 2.31, "learning_rate": 1.1581332860897038e-05, "loss": 0.6147, "step": 208180 }, { "epoch": 2.31, "learning_rate": 1.1580410133758526e-05, "loss": 0.5235, "step": 208185 }, { "epoch": 2.31, "learning_rate": 1.1579487406620014e-05, "loss": 0.6482, "step": 208190 }, { "epoch": 2.31, "learning_rate": 1.1578564679481502e-05, "loss": 0.6029, "step": 208195 }, { "epoch": 2.31, "learning_rate": 1.157764195234299e-05, "loss": 0.5884, "step": 208200 }, { "epoch": 2.31, "learning_rate": 1.1576719225204477e-05, "loss": 0.5792, "step": 208205 }, { "epoch": 2.31, "learning_rate": 1.1575796498065965e-05, "loss": 0.613, "step": 208210 }, { "epoch": 2.31, "learning_rate": 1.1574873770927453e-05, "loss": 0.5885, "step": 208215 }, { "epoch": 2.31, "learning_rate": 1.1573951043788941e-05, "loss": 0.588, "step": 208220 }, { "epoch": 2.31, "learning_rate": 1.1573028316650427e-05, "loss": 0.6051, "step": 208225 }, { "epoch": 2.31, "learning_rate": 1.1572105589511915e-05, "loss": 0.5561, "step": 208230 }, { "epoch": 2.31, "learning_rate": 1.1571182862373401e-05, "loss": 0.562, "step": 208235 }, { "epoch": 2.31, "learning_rate": 1.1570260135234889e-05, "loss": 0.6223, "step": 208240 }, { "epoch": 2.31, "learning_rate": 1.1569337408096377e-05, "loss": 0.5855, "step": 208245 }, { "epoch": 2.31, "learning_rate": 1.1568414680957865e-05, "loss": 0.5824, "step": 208250 }, { "epoch": 2.31, "learning_rate": 1.1567491953819352e-05, "loss": 0.6276, "step": 208255 }, { "epoch": 2.31, "learning_rate": 1.156656922668084e-05, "loss": 0.5535, "step": 208260 }, { "epoch": 2.31, "learning_rate": 1.1565646499542328e-05, "loss": 0.6223, "step": 208265 }, { "epoch": 2.31, "learning_rate": 1.1564723772403816e-05, "loss": 0.5383, "step": 208270 }, { "epoch": 2.31, "learning_rate": 1.1563801045265304e-05, "loss": 0.5906, "step": 208275 }, { "epoch": 2.31, "learning_rate": 1.156287831812679e-05, "loss": 0.5694, "step": 208280 }, { "epoch": 2.31, "learning_rate": 1.1561955590988278e-05, "loss": 0.5598, "step": 208285 }, { "epoch": 2.31, "learning_rate": 1.1561032863849766e-05, "loss": 0.6303, "step": 208290 }, { "epoch": 2.31, "learning_rate": 1.1560110136711254e-05, "loss": 0.5926, "step": 208295 }, { "epoch": 2.31, "learning_rate": 1.1559187409572741e-05, "loss": 0.6151, "step": 208300 }, { "epoch": 2.31, "learning_rate": 1.155826468243423e-05, "loss": 0.6026, "step": 208305 }, { "epoch": 2.31, "learning_rate": 1.1557341955295717e-05, "loss": 0.6585, "step": 208310 }, { "epoch": 2.31, "learning_rate": 1.1556419228157203e-05, "loss": 0.627, "step": 208315 }, { "epoch": 2.31, "learning_rate": 1.1555496501018691e-05, "loss": 0.5726, "step": 208320 }, { "epoch": 2.31, "learning_rate": 1.1554573773880179e-05, "loss": 0.5657, "step": 208325 }, { "epoch": 2.31, "learning_rate": 1.1553651046741667e-05, "loss": 0.5955, "step": 208330 }, { "epoch": 2.31, "learning_rate": 1.1552728319603155e-05, "loss": 0.5905, "step": 208335 }, { "epoch": 2.31, "learning_rate": 1.155180559246464e-05, "loss": 0.6024, "step": 208340 }, { "epoch": 2.31, "learning_rate": 1.1550882865326129e-05, "loss": 0.5634, "step": 208345 }, { "epoch": 2.31, "learning_rate": 1.1549960138187617e-05, "loss": 0.5829, "step": 208350 }, { "epoch": 2.31, "learning_rate": 1.1549037411049104e-05, "loss": 0.6489, "step": 208355 }, { "epoch": 2.31, "learning_rate": 1.1548114683910592e-05, "loss": 0.5115, "step": 208360 }, { "epoch": 2.31, "learning_rate": 1.154719195677208e-05, "loss": 0.5985, "step": 208365 }, { "epoch": 2.31, "learning_rate": 1.1546269229633568e-05, "loss": 0.6224, "step": 208370 }, { "epoch": 2.31, "learning_rate": 1.1545346502495056e-05, "loss": 0.6404, "step": 208375 }, { "epoch": 2.31, "learning_rate": 1.1544423775356544e-05, "loss": 0.5501, "step": 208380 }, { "epoch": 2.31, "learning_rate": 1.154350104821803e-05, "loss": 0.6189, "step": 208385 }, { "epoch": 2.31, "learning_rate": 1.1542578321079518e-05, "loss": 0.5858, "step": 208390 }, { "epoch": 2.31, "learning_rate": 1.1541655593941004e-05, "loss": 0.5484, "step": 208395 }, { "epoch": 2.31, "learning_rate": 1.1540732866802492e-05, "loss": 0.6324, "step": 208400 }, { "epoch": 2.31, "learning_rate": 1.153981013966398e-05, "loss": 0.5705, "step": 208405 }, { "epoch": 2.31, "learning_rate": 1.1538887412525467e-05, "loss": 0.5871, "step": 208410 }, { "epoch": 2.31, "learning_rate": 1.1537964685386955e-05, "loss": 0.6263, "step": 208415 }, { "epoch": 2.31, "learning_rate": 1.1537041958248443e-05, "loss": 0.5756, "step": 208420 }, { "epoch": 2.31, "learning_rate": 1.1536119231109931e-05, "loss": 0.5597, "step": 208425 }, { "epoch": 2.31, "learning_rate": 1.1535196503971419e-05, "loss": 0.549, "step": 208430 }, { "epoch": 2.31, "learning_rate": 1.1534273776832907e-05, "loss": 0.6203, "step": 208435 }, { "epoch": 2.31, "learning_rate": 1.1533351049694394e-05, "loss": 0.5907, "step": 208440 }, { "epoch": 2.31, "learning_rate": 1.153242832255588e-05, "loss": 0.6045, "step": 208445 }, { "epoch": 2.31, "learning_rate": 1.1531505595417368e-05, "loss": 0.6143, "step": 208450 }, { "epoch": 2.31, "learning_rate": 1.1530582868278856e-05, "loss": 0.5897, "step": 208455 }, { "epoch": 2.31, "learning_rate": 1.1529660141140344e-05, "loss": 0.6094, "step": 208460 }, { "epoch": 2.31, "learning_rate": 1.152873741400183e-05, "loss": 0.6573, "step": 208465 }, { "epoch": 2.31, "learning_rate": 1.1527814686863318e-05, "loss": 0.6008, "step": 208470 }, { "epoch": 2.31, "learning_rate": 1.1526891959724806e-05, "loss": 0.6034, "step": 208475 }, { "epoch": 2.31, "learning_rate": 1.1525969232586294e-05, "loss": 0.6632, "step": 208480 }, { "epoch": 2.31, "learning_rate": 1.1525046505447782e-05, "loss": 0.5624, "step": 208485 }, { "epoch": 2.31, "learning_rate": 1.152412377830927e-05, "loss": 0.5434, "step": 208490 }, { "epoch": 2.31, "learning_rate": 1.1523201051170757e-05, "loss": 0.5499, "step": 208495 }, { "epoch": 2.31, "learning_rate": 1.1522278324032244e-05, "loss": 0.6113, "step": 208500 }, { "epoch": 2.31, "learning_rate": 1.1521355596893731e-05, "loss": 0.5476, "step": 208505 }, { "epoch": 2.31, "learning_rate": 1.152043286975522e-05, "loss": 0.586, "step": 208510 }, { "epoch": 2.31, "learning_rate": 1.1519510142616707e-05, "loss": 0.5893, "step": 208515 }, { "epoch": 2.31, "learning_rate": 1.1518587415478195e-05, "loss": 0.5598, "step": 208520 }, { "epoch": 2.31, "learning_rate": 1.1517664688339683e-05, "loss": 0.5777, "step": 208525 }, { "epoch": 2.31, "learning_rate": 1.151674196120117e-05, "loss": 0.5646, "step": 208530 }, { "epoch": 2.31, "learning_rate": 1.1515819234062658e-05, "loss": 0.5641, "step": 208535 }, { "epoch": 2.31, "learning_rate": 1.1514896506924145e-05, "loss": 0.5962, "step": 208540 }, { "epoch": 2.31, "learning_rate": 1.1513973779785632e-05, "loss": 0.6279, "step": 208545 }, { "epoch": 2.31, "learning_rate": 1.151305105264712e-05, "loss": 0.6404, "step": 208550 }, { "epoch": 2.31, "learning_rate": 1.1512128325508608e-05, "loss": 0.576, "step": 208555 }, { "epoch": 2.31, "learning_rate": 1.1511205598370094e-05, "loss": 0.6381, "step": 208560 }, { "epoch": 2.31, "learning_rate": 1.1510282871231582e-05, "loss": 0.6092, "step": 208565 }, { "epoch": 2.31, "learning_rate": 1.150936014409307e-05, "loss": 0.5521, "step": 208570 }, { "epoch": 2.31, "learning_rate": 1.1508437416954558e-05, "loss": 0.6047, "step": 208575 }, { "epoch": 2.31, "learning_rate": 1.1507514689816046e-05, "loss": 0.609, "step": 208580 }, { "epoch": 2.31, "learning_rate": 1.1506591962677534e-05, "loss": 0.5916, "step": 208585 }, { "epoch": 2.31, "learning_rate": 1.1505669235539021e-05, "loss": 0.5758, "step": 208590 }, { "epoch": 2.31, "learning_rate": 1.150474650840051e-05, "loss": 0.5881, "step": 208595 }, { "epoch": 2.31, "learning_rate": 1.1503823781261997e-05, "loss": 0.6179, "step": 208600 }, { "epoch": 2.31, "learning_rate": 1.1502901054123485e-05, "loss": 0.6581, "step": 208605 }, { "epoch": 2.31, "learning_rate": 1.1501978326984971e-05, "loss": 0.6412, "step": 208610 }, { "epoch": 2.31, "learning_rate": 1.1501055599846457e-05, "loss": 0.57, "step": 208615 }, { "epoch": 2.31, "learning_rate": 1.1500132872707945e-05, "loss": 0.589, "step": 208620 }, { "epoch": 2.31, "learning_rate": 1.1499210145569433e-05, "loss": 0.5653, "step": 208625 }, { "epoch": 2.31, "learning_rate": 1.149828741843092e-05, "loss": 0.5802, "step": 208630 }, { "epoch": 2.31, "learning_rate": 1.1497364691292409e-05, "loss": 0.6049, "step": 208635 }, { "epoch": 2.31, "learning_rate": 1.1496441964153896e-05, "loss": 0.5988, "step": 208640 }, { "epoch": 2.31, "learning_rate": 1.1495519237015384e-05, "loss": 0.6053, "step": 208645 }, { "epoch": 2.31, "learning_rate": 1.1494596509876872e-05, "loss": 0.6054, "step": 208650 }, { "epoch": 2.31, "learning_rate": 1.149367378273836e-05, "loss": 0.6339, "step": 208655 }, { "epoch": 2.31, "learning_rate": 1.1492751055599848e-05, "loss": 0.6252, "step": 208660 }, { "epoch": 2.31, "learning_rate": 1.1491828328461334e-05, "loss": 0.6025, "step": 208665 }, { "epoch": 2.31, "learning_rate": 1.1490905601322822e-05, "loss": 0.5872, "step": 208670 }, { "epoch": 2.31, "learning_rate": 1.148998287418431e-05, "loss": 0.5821, "step": 208675 }, { "epoch": 2.31, "learning_rate": 1.1489060147045798e-05, "loss": 0.591, "step": 208680 }, { "epoch": 2.31, "learning_rate": 1.1488137419907285e-05, "loss": 0.5612, "step": 208685 }, { "epoch": 2.31, "learning_rate": 1.1487214692768772e-05, "loss": 0.5967, "step": 208690 }, { "epoch": 2.31, "learning_rate": 1.148629196563026e-05, "loss": 0.5467, "step": 208695 }, { "epoch": 2.31, "learning_rate": 1.1485369238491747e-05, "loss": 0.5844, "step": 208700 }, { "epoch": 2.31, "learning_rate": 1.1484446511353235e-05, "loss": 0.563, "step": 208705 }, { "epoch": 2.31, "learning_rate": 1.1483523784214723e-05, "loss": 0.602, "step": 208710 }, { "epoch": 2.31, "learning_rate": 1.148260105707621e-05, "loss": 0.5777, "step": 208715 }, { "epoch": 2.31, "learning_rate": 1.1481678329937699e-05, "loss": 0.6168, "step": 208720 }, { "epoch": 2.31, "learning_rate": 1.1480755602799185e-05, "loss": 0.5842, "step": 208725 }, { "epoch": 2.31, "learning_rate": 1.1479832875660673e-05, "loss": 0.5802, "step": 208730 }, { "epoch": 2.31, "learning_rate": 1.147891014852216e-05, "loss": 0.5468, "step": 208735 }, { "epoch": 2.31, "learning_rate": 1.1477987421383648e-05, "loss": 0.5783, "step": 208740 }, { "epoch": 2.31, "learning_rate": 1.1477064694245136e-05, "loss": 0.5819, "step": 208745 }, { "epoch": 2.31, "learning_rate": 1.1476141967106624e-05, "loss": 0.5642, "step": 208750 }, { "epoch": 2.31, "learning_rate": 1.1475219239968112e-05, "loss": 0.5625, "step": 208755 }, { "epoch": 2.31, "learning_rate": 1.14742965128296e-05, "loss": 0.5889, "step": 208760 }, { "epoch": 2.31, "learning_rate": 1.1473373785691088e-05, "loss": 0.5795, "step": 208765 }, { "epoch": 2.31, "learning_rate": 1.1472451058552574e-05, "loss": 0.5934, "step": 208770 }, { "epoch": 2.31, "learning_rate": 1.1471528331414062e-05, "loss": 0.5772, "step": 208775 }, { "epoch": 2.31, "learning_rate": 1.1470605604275548e-05, "loss": 0.5394, "step": 208780 }, { "epoch": 2.31, "learning_rate": 1.1469682877137036e-05, "loss": 0.6068, "step": 208785 }, { "epoch": 2.31, "learning_rate": 1.1468760149998523e-05, "loss": 0.6135, "step": 208790 }, { "epoch": 2.31, "learning_rate": 1.1467837422860011e-05, "loss": 0.5551, "step": 208795 }, { "epoch": 2.31, "learning_rate": 1.14669146957215e-05, "loss": 0.563, "step": 208800 }, { "epoch": 2.31, "learning_rate": 1.1465991968582987e-05, "loss": 0.5879, "step": 208805 }, { "epoch": 2.31, "learning_rate": 1.1465069241444475e-05, "loss": 0.6193, "step": 208810 }, { "epoch": 2.31, "learning_rate": 1.1464146514305963e-05, "loss": 0.5754, "step": 208815 }, { "epoch": 2.31, "learning_rate": 1.146322378716745e-05, "loss": 0.6289, "step": 208820 }, { "epoch": 2.31, "learning_rate": 1.1462301060028938e-05, "loss": 0.5829, "step": 208825 }, { "epoch": 2.31, "learning_rate": 1.1461378332890425e-05, "loss": 0.5638, "step": 208830 }, { "epoch": 2.31, "learning_rate": 1.1460455605751912e-05, "loss": 0.5269, "step": 208835 }, { "epoch": 2.31, "learning_rate": 1.14595328786134e-05, "loss": 0.6108, "step": 208840 }, { "epoch": 2.31, "learning_rate": 1.1458610151474886e-05, "loss": 0.6347, "step": 208845 }, { "epoch": 2.31, "learning_rate": 1.1457687424336374e-05, "loss": 0.6159, "step": 208850 }, { "epoch": 2.31, "learning_rate": 1.1456764697197862e-05, "loss": 0.6457, "step": 208855 }, { "epoch": 2.31, "learning_rate": 1.145584197005935e-05, "loss": 0.618, "step": 208860 }, { "epoch": 2.31, "learning_rate": 1.1454919242920838e-05, "loss": 0.5674, "step": 208865 }, { "epoch": 2.31, "learning_rate": 1.1453996515782326e-05, "loss": 0.581, "step": 208870 }, { "epoch": 2.31, "learning_rate": 1.1453073788643814e-05, "loss": 0.626, "step": 208875 }, { "epoch": 2.31, "learning_rate": 1.1452151061505301e-05, "loss": 0.5739, "step": 208880 }, { "epoch": 2.31, "learning_rate": 1.145122833436679e-05, "loss": 0.5301, "step": 208885 }, { "epoch": 2.31, "learning_rate": 1.1450305607228275e-05, "loss": 0.5771, "step": 208890 }, { "epoch": 2.31, "learning_rate": 1.1449382880089763e-05, "loss": 0.5905, "step": 208895 }, { "epoch": 2.31, "learning_rate": 1.1448460152951251e-05, "loss": 0.6635, "step": 208900 }, { "epoch": 2.31, "learning_rate": 1.1447537425812739e-05, "loss": 0.5599, "step": 208905 }, { "epoch": 2.31, "learning_rate": 1.1446614698674227e-05, "loss": 0.607, "step": 208910 }, { "epoch": 2.31, "learning_rate": 1.1445691971535715e-05, "loss": 0.634, "step": 208915 }, { "epoch": 2.31, "learning_rate": 1.14447692443972e-05, "loss": 0.5628, "step": 208920 }, { "epoch": 2.31, "learning_rate": 1.1443846517258689e-05, "loss": 0.649, "step": 208925 }, { "epoch": 2.31, "learning_rate": 1.1442923790120176e-05, "loss": 0.5461, "step": 208930 }, { "epoch": 2.31, "learning_rate": 1.1442001062981664e-05, "loss": 0.553, "step": 208935 }, { "epoch": 2.31, "learning_rate": 1.1441078335843152e-05, "loss": 0.6149, "step": 208940 }, { "epoch": 2.31, "learning_rate": 1.1440155608704638e-05, "loss": 0.5839, "step": 208945 }, { "epoch": 2.31, "learning_rate": 1.1439232881566126e-05, "loss": 0.5875, "step": 208950 }, { "epoch": 2.31, "learning_rate": 1.1438310154427614e-05, "loss": 0.5879, "step": 208955 }, { "epoch": 2.31, "learning_rate": 1.1437387427289102e-05, "loss": 0.5382, "step": 208960 }, { "epoch": 2.31, "learning_rate": 1.143646470015059e-05, "loss": 0.5941, "step": 208965 }, { "epoch": 2.31, "learning_rate": 1.1435541973012078e-05, "loss": 0.5549, "step": 208970 }, { "epoch": 2.31, "learning_rate": 1.1434619245873565e-05, "loss": 0.5712, "step": 208975 }, { "epoch": 2.31, "learning_rate": 1.1433696518735053e-05, "loss": 0.5916, "step": 208980 }, { "epoch": 2.31, "learning_rate": 1.1432773791596541e-05, "loss": 0.6495, "step": 208985 }, { "epoch": 2.31, "learning_rate": 1.1431851064458029e-05, "loss": 0.5805, "step": 208990 }, { "epoch": 2.31, "learning_rate": 1.1430928337319515e-05, "loss": 0.6094, "step": 208995 }, { "epoch": 2.31, "learning_rate": 1.1430005610181003e-05, "loss": 0.5415, "step": 209000 }, { "epoch": 2.31, "eval_loss": 0.5632826089859009, "eval_runtime": 69.3909, "eval_samples_per_second": 28.822, "eval_steps_per_second": 14.411, "step": 209000 }, { "epoch": 2.31, "learning_rate": 1.1429082883042489e-05, "loss": 0.5882, "step": 209005 }, { "epoch": 2.31, "learning_rate": 1.1428160155903977e-05, "loss": 0.6058, "step": 209010 }, { "epoch": 2.31, "learning_rate": 1.1427237428765465e-05, "loss": 0.6609, "step": 209015 }, { "epoch": 2.31, "learning_rate": 1.1426314701626953e-05, "loss": 0.6384, "step": 209020 }, { "epoch": 2.31, "learning_rate": 1.142539197448844e-05, "loss": 0.582, "step": 209025 }, { "epoch": 2.31, "learning_rate": 1.1424469247349928e-05, "loss": 0.5864, "step": 209030 }, { "epoch": 2.31, "learning_rate": 1.1423546520211416e-05, "loss": 0.6025, "step": 209035 }, { "epoch": 2.31, "learning_rate": 1.1422623793072904e-05, "loss": 0.5757, "step": 209040 }, { "epoch": 2.31, "learning_rate": 1.1421701065934392e-05, "loss": 0.6017, "step": 209045 }, { "epoch": 2.31, "learning_rate": 1.1420778338795878e-05, "loss": 0.6654, "step": 209050 }, { "epoch": 2.31, "learning_rate": 1.1419855611657366e-05, "loss": 0.5686, "step": 209055 }, { "epoch": 2.31, "learning_rate": 1.1418932884518854e-05, "loss": 0.5553, "step": 209060 }, { "epoch": 2.31, "learning_rate": 1.1418010157380342e-05, "loss": 0.5606, "step": 209065 }, { "epoch": 2.31, "learning_rate": 1.1417087430241828e-05, "loss": 0.6296, "step": 209070 }, { "epoch": 2.32, "learning_rate": 1.1416164703103316e-05, "loss": 0.5864, "step": 209075 }, { "epoch": 2.32, "learning_rate": 1.1415241975964803e-05, "loss": 0.6513, "step": 209080 }, { "epoch": 2.32, "learning_rate": 1.1414319248826291e-05, "loss": 0.5768, "step": 209085 }, { "epoch": 2.32, "learning_rate": 1.141339652168778e-05, "loss": 0.5844, "step": 209090 }, { "epoch": 2.32, "learning_rate": 1.1412473794549267e-05, "loss": 0.5635, "step": 209095 }, { "epoch": 2.32, "learning_rate": 1.1411551067410755e-05, "loss": 0.5999, "step": 209100 }, { "epoch": 2.32, "learning_rate": 1.1410628340272243e-05, "loss": 0.5879, "step": 209105 }, { "epoch": 2.32, "learning_rate": 1.1409705613133729e-05, "loss": 0.6138, "step": 209110 }, { "epoch": 2.32, "learning_rate": 1.1408782885995217e-05, "loss": 0.6189, "step": 209115 }, { "epoch": 2.32, "learning_rate": 1.1407860158856705e-05, "loss": 0.6121, "step": 209120 }, { "epoch": 2.32, "learning_rate": 1.1406937431718192e-05, "loss": 0.5782, "step": 209125 }, { "epoch": 2.32, "learning_rate": 1.140601470457968e-05, "loss": 0.6253, "step": 209130 }, { "epoch": 2.32, "learning_rate": 1.1405091977441168e-05, "loss": 0.5734, "step": 209135 }, { "epoch": 2.32, "learning_rate": 1.1404169250302656e-05, "loss": 0.5914, "step": 209140 }, { "epoch": 2.32, "learning_rate": 1.1403246523164144e-05, "loss": 0.5381, "step": 209145 }, { "epoch": 2.32, "learning_rate": 1.140232379602563e-05, "loss": 0.562, "step": 209150 }, { "epoch": 2.32, "learning_rate": 1.1401401068887118e-05, "loss": 0.5208, "step": 209155 }, { "epoch": 2.32, "learning_rate": 1.1400478341748606e-05, "loss": 0.5837, "step": 209160 }, { "epoch": 2.32, "learning_rate": 1.1399555614610092e-05, "loss": 0.6136, "step": 209165 }, { "epoch": 2.32, "learning_rate": 1.139863288747158e-05, "loss": 0.5831, "step": 209170 }, { "epoch": 2.32, "learning_rate": 1.1397710160333067e-05, "loss": 0.648, "step": 209175 }, { "epoch": 2.32, "learning_rate": 1.1396787433194555e-05, "loss": 0.636, "step": 209180 }, { "epoch": 2.32, "learning_rate": 1.1395864706056043e-05, "loss": 0.6267, "step": 209185 }, { "epoch": 2.32, "learning_rate": 1.1394941978917531e-05, "loss": 0.6202, "step": 209190 }, { "epoch": 2.32, "learning_rate": 1.1394019251779019e-05, "loss": 0.6307, "step": 209195 }, { "epoch": 2.32, "learning_rate": 1.1393096524640507e-05, "loss": 0.6261, "step": 209200 }, { "epoch": 2.32, "learning_rate": 1.1392173797501995e-05, "loss": 0.6054, "step": 209205 }, { "epoch": 2.32, "learning_rate": 1.1391251070363482e-05, "loss": 0.593, "step": 209210 }, { "epoch": 2.32, "learning_rate": 1.1390328343224969e-05, "loss": 0.6254, "step": 209215 }, { "epoch": 2.32, "learning_rate": 1.1389405616086456e-05, "loss": 0.6017, "step": 209220 }, { "epoch": 2.32, "learning_rate": 1.1388482888947943e-05, "loss": 0.6003, "step": 209225 }, { "epoch": 2.32, "learning_rate": 1.138756016180943e-05, "loss": 0.5798, "step": 209230 }, { "epoch": 2.32, "learning_rate": 1.1386637434670918e-05, "loss": 0.6086, "step": 209235 }, { "epoch": 2.32, "learning_rate": 1.1385714707532406e-05, "loss": 0.6072, "step": 209240 }, { "epoch": 2.32, "learning_rate": 1.1384791980393894e-05, "loss": 0.642, "step": 209245 }, { "epoch": 2.32, "learning_rate": 1.1383869253255382e-05, "loss": 0.59, "step": 209250 }, { "epoch": 2.32, "learning_rate": 1.138294652611687e-05, "loss": 0.6193, "step": 209255 }, { "epoch": 2.32, "learning_rate": 1.1382023798978358e-05, "loss": 0.5557, "step": 209260 }, { "epoch": 2.32, "learning_rate": 1.1381101071839845e-05, "loss": 0.5692, "step": 209265 }, { "epoch": 2.32, "learning_rate": 1.1380178344701333e-05, "loss": 0.6292, "step": 209270 }, { "epoch": 2.32, "learning_rate": 1.137925561756282e-05, "loss": 0.6023, "step": 209275 }, { "epoch": 2.32, "learning_rate": 1.1378332890424307e-05, "loss": 0.6335, "step": 209280 }, { "epoch": 2.32, "learning_rate": 1.1377410163285795e-05, "loss": 0.6009, "step": 209285 }, { "epoch": 2.32, "learning_rate": 1.1376487436147283e-05, "loss": 0.5785, "step": 209290 }, { "epoch": 2.32, "learning_rate": 1.137556470900877e-05, "loss": 0.584, "step": 209295 }, { "epoch": 2.32, "learning_rate": 1.1374641981870257e-05, "loss": 0.6183, "step": 209300 }, { "epoch": 2.32, "learning_rate": 1.1373719254731745e-05, "loss": 0.5614, "step": 209305 }, { "epoch": 2.32, "learning_rate": 1.1372796527593233e-05, "loss": 0.6157, "step": 209310 }, { "epoch": 2.32, "learning_rate": 1.137187380045472e-05, "loss": 0.5449, "step": 209315 }, { "epoch": 2.32, "learning_rate": 1.1370951073316208e-05, "loss": 0.5762, "step": 209320 }, { "epoch": 2.32, "learning_rate": 1.1370028346177696e-05, "loss": 0.6095, "step": 209325 }, { "epoch": 2.32, "learning_rate": 1.1369105619039182e-05, "loss": 0.5723, "step": 209330 }, { "epoch": 2.32, "learning_rate": 1.136818289190067e-05, "loss": 0.5733, "step": 209335 }, { "epoch": 2.32, "learning_rate": 1.1367260164762158e-05, "loss": 0.5949, "step": 209340 }, { "epoch": 2.32, "learning_rate": 1.1366337437623646e-05, "loss": 0.5952, "step": 209345 }, { "epoch": 2.32, "learning_rate": 1.1365414710485134e-05, "loss": 0.6054, "step": 209350 }, { "epoch": 2.32, "learning_rate": 1.1364491983346622e-05, "loss": 0.5791, "step": 209355 }, { "epoch": 2.32, "learning_rate": 1.136356925620811e-05, "loss": 0.6057, "step": 209360 }, { "epoch": 2.32, "learning_rate": 1.1362646529069597e-05, "loss": 0.6081, "step": 209365 }, { "epoch": 2.32, "learning_rate": 1.1361723801931085e-05, "loss": 0.5588, "step": 209370 }, { "epoch": 2.32, "learning_rate": 1.1360801074792571e-05, "loss": 0.5595, "step": 209375 }, { "epoch": 2.32, "learning_rate": 1.1359878347654059e-05, "loss": 0.6022, "step": 209380 }, { "epoch": 2.32, "learning_rate": 1.1358955620515547e-05, "loss": 0.5905, "step": 209385 }, { "epoch": 2.32, "learning_rate": 1.1358032893377033e-05, "loss": 0.5728, "step": 209390 }, { "epoch": 2.32, "learning_rate": 1.1357110166238521e-05, "loss": 0.6563, "step": 209395 }, { "epoch": 2.32, "learning_rate": 1.1356187439100009e-05, "loss": 0.5998, "step": 209400 }, { "epoch": 2.32, "learning_rate": 1.1355264711961497e-05, "loss": 0.619, "step": 209405 }, { "epoch": 2.32, "learning_rate": 1.1354341984822985e-05, "loss": 0.5851, "step": 209410 }, { "epoch": 2.32, "learning_rate": 1.1353419257684472e-05, "loss": 0.5947, "step": 209415 }, { "epoch": 2.32, "learning_rate": 1.135249653054596e-05, "loss": 0.5947, "step": 209420 }, { "epoch": 2.32, "learning_rate": 1.1351573803407448e-05, "loss": 0.593, "step": 209425 }, { "epoch": 2.32, "learning_rate": 1.1350651076268936e-05, "loss": 0.6167, "step": 209430 }, { "epoch": 2.32, "learning_rate": 1.1349728349130424e-05, "loss": 0.5969, "step": 209435 }, { "epoch": 2.32, "learning_rate": 1.134880562199191e-05, "loss": 0.5758, "step": 209440 }, { "epoch": 2.32, "learning_rate": 1.1347882894853398e-05, "loss": 0.5751, "step": 209445 }, { "epoch": 2.32, "learning_rate": 1.1346960167714884e-05, "loss": 0.6166, "step": 209450 }, { "epoch": 2.32, "learning_rate": 1.1346037440576372e-05, "loss": 0.5898, "step": 209455 }, { "epoch": 2.32, "learning_rate": 1.134511471343786e-05, "loss": 0.5915, "step": 209460 }, { "epoch": 2.32, "learning_rate": 1.1344191986299347e-05, "loss": 0.6644, "step": 209465 }, { "epoch": 2.32, "learning_rate": 1.1343269259160835e-05, "loss": 0.6009, "step": 209470 }, { "epoch": 2.32, "learning_rate": 1.1342346532022323e-05, "loss": 0.5364, "step": 209475 }, { "epoch": 2.32, "learning_rate": 1.1341423804883811e-05, "loss": 0.5754, "step": 209480 }, { "epoch": 2.32, "learning_rate": 1.1340501077745299e-05, "loss": 0.6338, "step": 209485 }, { "epoch": 2.32, "learning_rate": 1.1339578350606787e-05, "loss": 0.6195, "step": 209490 }, { "epoch": 2.32, "learning_rate": 1.1338655623468273e-05, "loss": 0.504, "step": 209495 }, { "epoch": 2.32, "learning_rate": 1.133773289632976e-05, "loss": 0.6329, "step": 209500 }, { "epoch": 2.32, "learning_rate": 1.1336810169191249e-05, "loss": 0.5823, "step": 209505 }, { "epoch": 2.32, "learning_rate": 1.1335887442052736e-05, "loss": 0.582, "step": 209510 }, { "epoch": 2.32, "learning_rate": 1.1334964714914224e-05, "loss": 0.6897, "step": 209515 }, { "epoch": 2.32, "learning_rate": 1.1334041987775712e-05, "loss": 0.5693, "step": 209520 }, { "epoch": 2.32, "learning_rate": 1.1333119260637198e-05, "loss": 0.597, "step": 209525 }, { "epoch": 2.32, "learning_rate": 1.1332196533498686e-05, "loss": 0.6324, "step": 209530 }, { "epoch": 2.32, "learning_rate": 1.1331273806360174e-05, "loss": 0.5744, "step": 209535 }, { "epoch": 2.32, "learning_rate": 1.1330351079221662e-05, "loss": 0.5751, "step": 209540 }, { "epoch": 2.32, "learning_rate": 1.132942835208315e-05, "loss": 0.5908, "step": 209545 }, { "epoch": 2.32, "learning_rate": 1.1328505624944638e-05, "loss": 0.583, "step": 209550 }, { "epoch": 2.32, "learning_rate": 1.1327582897806124e-05, "loss": 0.5788, "step": 209555 }, { "epoch": 2.32, "learning_rate": 1.1326660170667612e-05, "loss": 0.5945, "step": 209560 }, { "epoch": 2.32, "learning_rate": 1.13257374435291e-05, "loss": 0.6026, "step": 209565 }, { "epoch": 2.32, "learning_rate": 1.1324814716390587e-05, "loss": 0.5723, "step": 209570 }, { "epoch": 2.32, "learning_rate": 1.1323891989252075e-05, "loss": 0.6412, "step": 209575 }, { "epoch": 2.32, "learning_rate": 1.1322969262113563e-05, "loss": 0.6163, "step": 209580 }, { "epoch": 2.32, "learning_rate": 1.132204653497505e-05, "loss": 0.5848, "step": 209585 }, { "epoch": 2.32, "learning_rate": 1.1321123807836539e-05, "loss": 0.6278, "step": 209590 }, { "epoch": 2.32, "learning_rate": 1.1320201080698026e-05, "loss": 0.5439, "step": 209595 }, { "epoch": 2.32, "learning_rate": 1.1319278353559513e-05, "loss": 0.542, "step": 209600 }, { "epoch": 2.32, "learning_rate": 1.1318355626421e-05, "loss": 0.6405, "step": 209605 }, { "epoch": 2.32, "learning_rate": 1.1317432899282487e-05, "loss": 0.5397, "step": 209610 }, { "epoch": 2.32, "learning_rate": 1.1316510172143974e-05, "loss": 0.6, "step": 209615 }, { "epoch": 2.32, "learning_rate": 1.1315587445005462e-05, "loss": 0.6092, "step": 209620 }, { "epoch": 2.32, "learning_rate": 1.131466471786695e-05, "loss": 0.5542, "step": 209625 }, { "epoch": 2.32, "learning_rate": 1.1313741990728438e-05, "loss": 0.5515, "step": 209630 }, { "epoch": 2.32, "learning_rate": 1.1312819263589926e-05, "loss": 0.5928, "step": 209635 }, { "epoch": 2.32, "learning_rate": 1.1311896536451414e-05, "loss": 0.5764, "step": 209640 }, { "epoch": 2.32, "learning_rate": 1.1310973809312902e-05, "loss": 0.6101, "step": 209645 }, { "epoch": 2.32, "learning_rate": 1.131005108217439e-05, "loss": 0.5888, "step": 209650 }, { "epoch": 2.32, "learning_rate": 1.1309128355035877e-05, "loss": 0.5801, "step": 209655 }, { "epoch": 2.32, "learning_rate": 1.1308205627897363e-05, "loss": 0.6202, "step": 209660 }, { "epoch": 2.32, "learning_rate": 1.1307282900758851e-05, "loss": 0.5708, "step": 209665 }, { "epoch": 2.32, "learning_rate": 1.1306360173620339e-05, "loss": 0.5926, "step": 209670 }, { "epoch": 2.32, "learning_rate": 1.1305437446481827e-05, "loss": 0.5523, "step": 209675 }, { "epoch": 2.32, "learning_rate": 1.1304514719343313e-05, "loss": 0.5689, "step": 209680 }, { "epoch": 2.32, "learning_rate": 1.1303591992204801e-05, "loss": 0.5715, "step": 209685 }, { "epoch": 2.32, "learning_rate": 1.1302669265066289e-05, "loss": 0.5547, "step": 209690 }, { "epoch": 2.32, "learning_rate": 1.1301746537927777e-05, "loss": 0.5925, "step": 209695 }, { "epoch": 2.32, "learning_rate": 1.1300823810789265e-05, "loss": 0.5884, "step": 209700 }, { "epoch": 2.32, "learning_rate": 1.1299901083650752e-05, "loss": 0.6256, "step": 209705 }, { "epoch": 2.32, "learning_rate": 1.129897835651224e-05, "loss": 0.5975, "step": 209710 }, { "epoch": 2.32, "learning_rate": 1.1298055629373726e-05, "loss": 0.5614, "step": 209715 }, { "epoch": 2.32, "learning_rate": 1.1297132902235214e-05, "loss": 0.5996, "step": 209720 }, { "epoch": 2.32, "learning_rate": 1.1296210175096702e-05, "loss": 0.5457, "step": 209725 }, { "epoch": 2.32, "learning_rate": 1.129528744795819e-05, "loss": 0.5639, "step": 209730 }, { "epoch": 2.32, "learning_rate": 1.1294364720819678e-05, "loss": 0.5549, "step": 209735 }, { "epoch": 2.32, "learning_rate": 1.1293441993681166e-05, "loss": 0.5637, "step": 209740 }, { "epoch": 2.32, "learning_rate": 1.1292519266542653e-05, "loss": 0.5957, "step": 209745 }, { "epoch": 2.32, "learning_rate": 1.1291596539404141e-05, "loss": 0.6052, "step": 209750 }, { "epoch": 2.32, "learning_rate": 1.1290673812265627e-05, "loss": 0.5817, "step": 209755 }, { "epoch": 2.32, "learning_rate": 1.1289751085127115e-05, "loss": 0.6153, "step": 209760 }, { "epoch": 2.32, "learning_rate": 1.1288828357988603e-05, "loss": 0.5523, "step": 209765 }, { "epoch": 2.32, "learning_rate": 1.1287905630850091e-05, "loss": 0.592, "step": 209770 }, { "epoch": 2.32, "learning_rate": 1.1286982903711577e-05, "loss": 0.588, "step": 209775 }, { "epoch": 2.32, "learning_rate": 1.1286060176573065e-05, "loss": 0.5687, "step": 209780 }, { "epoch": 2.32, "learning_rate": 1.1285137449434553e-05, "loss": 0.5982, "step": 209785 }, { "epoch": 2.32, "learning_rate": 1.128421472229604e-05, "loss": 0.5456, "step": 209790 }, { "epoch": 2.32, "learning_rate": 1.1283291995157529e-05, "loss": 0.576, "step": 209795 }, { "epoch": 2.32, "learning_rate": 1.1282369268019016e-05, "loss": 0.5913, "step": 209800 }, { "epoch": 2.32, "learning_rate": 1.1281446540880504e-05, "loss": 0.5636, "step": 209805 }, { "epoch": 2.32, "learning_rate": 1.1280523813741992e-05, "loss": 0.6133, "step": 209810 }, { "epoch": 2.32, "learning_rate": 1.127960108660348e-05, "loss": 0.5656, "step": 209815 }, { "epoch": 2.32, "learning_rate": 1.1278678359464968e-05, "loss": 0.6036, "step": 209820 }, { "epoch": 2.32, "learning_rate": 1.1277755632326454e-05, "loss": 0.5972, "step": 209825 }, { "epoch": 2.32, "learning_rate": 1.127683290518794e-05, "loss": 0.6006, "step": 209830 }, { "epoch": 2.32, "learning_rate": 1.1275910178049428e-05, "loss": 0.574, "step": 209835 }, { "epoch": 2.32, "learning_rate": 1.1274987450910916e-05, "loss": 0.591, "step": 209840 }, { "epoch": 2.32, "learning_rate": 1.1274064723772404e-05, "loss": 0.583, "step": 209845 }, { "epoch": 2.32, "learning_rate": 1.1273141996633891e-05, "loss": 0.5594, "step": 209850 }, { "epoch": 2.32, "learning_rate": 1.127221926949538e-05, "loss": 0.657, "step": 209855 }, { "epoch": 2.32, "learning_rate": 1.1271296542356867e-05, "loss": 0.6205, "step": 209860 }, { "epoch": 2.32, "learning_rate": 1.1270373815218355e-05, "loss": 0.5677, "step": 209865 }, { "epoch": 2.32, "learning_rate": 1.1269451088079843e-05, "loss": 0.5517, "step": 209870 }, { "epoch": 2.32, "learning_rate": 1.126852836094133e-05, "loss": 0.6251, "step": 209875 }, { "epoch": 2.32, "learning_rate": 1.1267605633802817e-05, "loss": 0.5603, "step": 209880 }, { "epoch": 2.32, "learning_rate": 1.1266682906664305e-05, "loss": 0.5731, "step": 209885 }, { "epoch": 2.32, "learning_rate": 1.1265760179525793e-05, "loss": 0.5723, "step": 209890 }, { "epoch": 2.32, "learning_rate": 1.126483745238728e-05, "loss": 0.602, "step": 209895 }, { "epoch": 2.32, "learning_rate": 1.1263914725248768e-05, "loss": 0.6654, "step": 209900 }, { "epoch": 2.32, "learning_rate": 1.1262991998110254e-05, "loss": 0.5723, "step": 209905 }, { "epoch": 2.32, "learning_rate": 1.1262069270971742e-05, "loss": 0.5918, "step": 209910 }, { "epoch": 2.32, "learning_rate": 1.126114654383323e-05, "loss": 0.655, "step": 209915 }, { "epoch": 2.32, "learning_rate": 1.1260223816694718e-05, "loss": 0.5792, "step": 209920 }, { "epoch": 2.32, "learning_rate": 1.1259301089556206e-05, "loss": 0.5386, "step": 209925 }, { "epoch": 2.32, "learning_rate": 1.1258378362417694e-05, "loss": 0.5851, "step": 209930 }, { "epoch": 2.32, "learning_rate": 1.1257455635279182e-05, "loss": 0.5623, "step": 209935 }, { "epoch": 2.32, "learning_rate": 1.1256532908140668e-05, "loss": 0.594, "step": 209940 }, { "epoch": 2.32, "learning_rate": 1.1255610181002156e-05, "loss": 0.6826, "step": 209945 }, { "epoch": 2.32, "learning_rate": 1.1254687453863643e-05, "loss": 0.5441, "step": 209950 }, { "epoch": 2.32, "learning_rate": 1.1253764726725131e-05, "loss": 0.6091, "step": 209955 }, { "epoch": 2.32, "learning_rate": 1.1252841999586619e-05, "loss": 0.6572, "step": 209960 }, { "epoch": 2.32, "learning_rate": 1.1251919272448107e-05, "loss": 0.5773, "step": 209965 }, { "epoch": 2.32, "learning_rate": 1.1250996545309595e-05, "loss": 0.5897, "step": 209970 }, { "epoch": 2.32, "learning_rate": 1.1250073818171083e-05, "loss": 0.6123, "step": 209975 }, { "epoch": 2.33, "learning_rate": 1.1249151091032569e-05, "loss": 0.5871, "step": 209980 }, { "epoch": 2.33, "learning_rate": 1.1248228363894057e-05, "loss": 0.5519, "step": 209985 }, { "epoch": 2.33, "learning_rate": 1.1247305636755544e-05, "loss": 0.6633, "step": 209990 }, { "epoch": 2.33, "learning_rate": 1.124638290961703e-05, "loss": 0.6696, "step": 209995 }, { "epoch": 2.33, "learning_rate": 1.1245460182478518e-05, "loss": 0.633, "step": 210000 }, { "epoch": 2.33, "eval_loss": 0.5498946309089661, "eval_runtime": 69.2527, "eval_samples_per_second": 28.88, "eval_steps_per_second": 14.44, "step": 210000 }, { "epoch": 2.33, "learning_rate": 1.1244537455340006e-05, "loss": 0.5586, "step": 210005 }, { "epoch": 2.33, "learning_rate": 1.1243614728201494e-05, "loss": 0.6032, "step": 210010 }, { "epoch": 2.33, "learning_rate": 1.1242692001062982e-05, "loss": 0.5929, "step": 210015 }, { "epoch": 2.33, "learning_rate": 1.124176927392447e-05, "loss": 0.5788, "step": 210020 }, { "epoch": 2.33, "learning_rate": 1.1240846546785958e-05, "loss": 0.6209, "step": 210025 }, { "epoch": 2.33, "learning_rate": 1.1239923819647446e-05, "loss": 0.6446, "step": 210030 }, { "epoch": 2.33, "learning_rate": 1.1239001092508933e-05, "loss": 0.5658, "step": 210035 }, { "epoch": 2.33, "learning_rate": 1.1238078365370421e-05, "loss": 0.5785, "step": 210040 }, { "epoch": 2.33, "learning_rate": 1.1237155638231907e-05, "loss": 0.6019, "step": 210045 }, { "epoch": 2.33, "learning_rate": 1.1236232911093395e-05, "loss": 0.5841, "step": 210050 }, { "epoch": 2.33, "learning_rate": 1.1235310183954883e-05, "loss": 0.6022, "step": 210055 }, { "epoch": 2.33, "learning_rate": 1.123438745681637e-05, "loss": 0.5685, "step": 210060 }, { "epoch": 2.33, "learning_rate": 1.1233464729677857e-05, "loss": 0.5606, "step": 210065 }, { "epoch": 2.33, "learning_rate": 1.1232542002539345e-05, "loss": 0.5565, "step": 210070 }, { "epoch": 2.33, "learning_rate": 1.1231619275400833e-05, "loss": 0.6395, "step": 210075 }, { "epoch": 2.33, "learning_rate": 1.123069654826232e-05, "loss": 0.5901, "step": 210080 }, { "epoch": 2.33, "learning_rate": 1.1229773821123809e-05, "loss": 0.6335, "step": 210085 }, { "epoch": 2.33, "learning_rate": 1.1228851093985296e-05, "loss": 0.6241, "step": 210090 }, { "epoch": 2.33, "learning_rate": 1.1227928366846784e-05, "loss": 0.5997, "step": 210095 }, { "epoch": 2.33, "learning_rate": 1.1227005639708272e-05, "loss": 0.6191, "step": 210100 }, { "epoch": 2.33, "learning_rate": 1.1226082912569758e-05, "loss": 0.6193, "step": 210105 }, { "epoch": 2.33, "learning_rate": 1.1225160185431246e-05, "loss": 0.5985, "step": 210110 }, { "epoch": 2.33, "learning_rate": 1.1224237458292734e-05, "loss": 0.6045, "step": 210115 }, { "epoch": 2.33, "learning_rate": 1.1223314731154222e-05, "loss": 0.5658, "step": 210120 }, { "epoch": 2.33, "learning_rate": 1.122239200401571e-05, "loss": 0.57, "step": 210125 }, { "epoch": 2.33, "learning_rate": 1.1221469276877197e-05, "loss": 0.6179, "step": 210130 }, { "epoch": 2.33, "learning_rate": 1.1220546549738684e-05, "loss": 0.554, "step": 210135 }, { "epoch": 2.33, "learning_rate": 1.1219623822600171e-05, "loss": 0.5992, "step": 210140 }, { "epoch": 2.33, "learning_rate": 1.121870109546166e-05, "loss": 0.5734, "step": 210145 }, { "epoch": 2.33, "learning_rate": 1.1217778368323147e-05, "loss": 0.5726, "step": 210150 }, { "epoch": 2.33, "learning_rate": 1.1216855641184635e-05, "loss": 0.5699, "step": 210155 }, { "epoch": 2.33, "learning_rate": 1.1215932914046121e-05, "loss": 0.6091, "step": 210160 }, { "epoch": 2.33, "learning_rate": 1.1215010186907609e-05, "loss": 0.6365, "step": 210165 }, { "epoch": 2.33, "learning_rate": 1.1214087459769097e-05, "loss": 0.595, "step": 210170 }, { "epoch": 2.33, "learning_rate": 1.1213164732630585e-05, "loss": 0.5997, "step": 210175 }, { "epoch": 2.33, "learning_rate": 1.1212242005492073e-05, "loss": 0.5623, "step": 210180 }, { "epoch": 2.33, "learning_rate": 1.121131927835356e-05, "loss": 0.5772, "step": 210185 }, { "epoch": 2.33, "learning_rate": 1.1210396551215048e-05, "loss": 0.5933, "step": 210190 }, { "epoch": 2.33, "learning_rate": 1.1209473824076536e-05, "loss": 0.6445, "step": 210195 }, { "epoch": 2.33, "learning_rate": 1.1208551096938024e-05, "loss": 0.6174, "step": 210200 }, { "epoch": 2.33, "learning_rate": 1.1207628369799512e-05, "loss": 0.5923, "step": 210205 }, { "epoch": 2.33, "learning_rate": 1.1206705642660998e-05, "loss": 0.5684, "step": 210210 }, { "epoch": 2.33, "learning_rate": 1.1205782915522486e-05, "loss": 0.6124, "step": 210215 }, { "epoch": 2.33, "learning_rate": 1.1204860188383972e-05, "loss": 0.6531, "step": 210220 }, { "epoch": 2.33, "learning_rate": 1.120393746124546e-05, "loss": 0.5974, "step": 210225 }, { "epoch": 2.33, "learning_rate": 1.1203014734106948e-05, "loss": 0.5662, "step": 210230 }, { "epoch": 2.33, "learning_rate": 1.1202092006968436e-05, "loss": 0.6054, "step": 210235 }, { "epoch": 2.33, "learning_rate": 1.1201169279829923e-05, "loss": 0.6025, "step": 210240 }, { "epoch": 2.33, "learning_rate": 1.1200246552691411e-05, "loss": 0.6183, "step": 210245 }, { "epoch": 2.33, "learning_rate": 1.1199323825552899e-05, "loss": 0.6037, "step": 210250 }, { "epoch": 2.33, "learning_rate": 1.1198401098414387e-05, "loss": 0.5575, "step": 210255 }, { "epoch": 2.33, "learning_rate": 1.1197478371275875e-05, "loss": 0.5852, "step": 210260 }, { "epoch": 2.33, "learning_rate": 1.1196555644137361e-05, "loss": 0.6383, "step": 210265 }, { "epoch": 2.33, "learning_rate": 1.1195632916998849e-05, "loss": 0.6367, "step": 210270 }, { "epoch": 2.33, "learning_rate": 1.1194710189860337e-05, "loss": 0.6325, "step": 210275 }, { "epoch": 2.33, "learning_rate": 1.1193787462721824e-05, "loss": 0.646, "step": 210280 }, { "epoch": 2.33, "learning_rate": 1.119286473558331e-05, "loss": 0.5472, "step": 210285 }, { "epoch": 2.33, "learning_rate": 1.1191942008444798e-05, "loss": 0.6183, "step": 210290 }, { "epoch": 2.33, "learning_rate": 1.1191019281306286e-05, "loss": 0.6104, "step": 210295 }, { "epoch": 2.33, "learning_rate": 1.1190096554167774e-05, "loss": 0.5733, "step": 210300 }, { "epoch": 2.33, "learning_rate": 1.1189173827029262e-05, "loss": 0.6073, "step": 210305 }, { "epoch": 2.33, "learning_rate": 1.118825109989075e-05, "loss": 0.6004, "step": 210310 }, { "epoch": 2.33, "learning_rate": 1.1187328372752238e-05, "loss": 0.5987, "step": 210315 }, { "epoch": 2.33, "learning_rate": 1.1186405645613726e-05, "loss": 0.6111, "step": 210320 }, { "epoch": 2.33, "learning_rate": 1.1185482918475212e-05, "loss": 0.5499, "step": 210325 }, { "epoch": 2.33, "learning_rate": 1.11845601913367e-05, "loss": 0.5723, "step": 210330 }, { "epoch": 2.33, "learning_rate": 1.1183637464198187e-05, "loss": 0.6109, "step": 210335 }, { "epoch": 2.33, "learning_rate": 1.1182714737059675e-05, "loss": 0.5761, "step": 210340 }, { "epoch": 2.33, "learning_rate": 1.1181792009921163e-05, "loss": 0.5985, "step": 210345 }, { "epoch": 2.33, "learning_rate": 1.1180869282782651e-05, "loss": 0.6023, "step": 210350 }, { "epoch": 2.33, "learning_rate": 1.1179946555644139e-05, "loss": 0.6081, "step": 210355 }, { "epoch": 2.33, "learning_rate": 1.1179023828505625e-05, "loss": 0.578, "step": 210360 }, { "epoch": 2.33, "learning_rate": 1.1178101101367113e-05, "loss": 0.5688, "step": 210365 }, { "epoch": 2.33, "learning_rate": 1.11771783742286e-05, "loss": 0.5787, "step": 210370 }, { "epoch": 2.33, "learning_rate": 1.1176255647090089e-05, "loss": 0.5937, "step": 210375 }, { "epoch": 2.33, "learning_rate": 1.1175332919951575e-05, "loss": 0.5754, "step": 210380 }, { "epoch": 2.33, "learning_rate": 1.1174410192813063e-05, "loss": 0.5879, "step": 210385 }, { "epoch": 2.33, "learning_rate": 1.117348746567455e-05, "loss": 0.61, "step": 210390 }, { "epoch": 2.33, "learning_rate": 1.1172564738536038e-05, "loss": 0.5789, "step": 210395 }, { "epoch": 2.33, "learning_rate": 1.1171642011397526e-05, "loss": 0.62, "step": 210400 }, { "epoch": 2.33, "learning_rate": 1.1170719284259014e-05, "loss": 0.5346, "step": 210405 }, { "epoch": 2.33, "learning_rate": 1.1169796557120502e-05, "loss": 0.5793, "step": 210410 }, { "epoch": 2.33, "learning_rate": 1.116887382998199e-05, "loss": 0.5044, "step": 210415 }, { "epoch": 2.33, "learning_rate": 1.1167951102843477e-05, "loss": 0.652, "step": 210420 }, { "epoch": 2.33, "learning_rate": 1.1167028375704965e-05, "loss": 0.5871, "step": 210425 }, { "epoch": 2.33, "learning_rate": 1.1166105648566451e-05, "loss": 0.6576, "step": 210430 }, { "epoch": 2.33, "learning_rate": 1.116518292142794e-05, "loss": 0.6104, "step": 210435 }, { "epoch": 2.33, "learning_rate": 1.1164260194289425e-05, "loss": 0.5568, "step": 210440 }, { "epoch": 2.33, "learning_rate": 1.1163337467150913e-05, "loss": 0.6101, "step": 210445 }, { "epoch": 2.33, "learning_rate": 1.1162414740012401e-05, "loss": 0.6074, "step": 210450 }, { "epoch": 2.33, "learning_rate": 1.1161492012873889e-05, "loss": 0.584, "step": 210455 }, { "epoch": 2.33, "learning_rate": 1.1160569285735377e-05, "loss": 0.5919, "step": 210460 }, { "epoch": 2.33, "learning_rate": 1.1159646558596865e-05, "loss": 0.607, "step": 210465 }, { "epoch": 2.33, "learning_rate": 1.1158723831458353e-05, "loss": 0.6269, "step": 210470 }, { "epoch": 2.33, "learning_rate": 1.115780110431984e-05, "loss": 0.6153, "step": 210475 }, { "epoch": 2.33, "learning_rate": 1.1156878377181328e-05, "loss": 0.556, "step": 210480 }, { "epoch": 2.33, "learning_rate": 1.1155955650042816e-05, "loss": 0.6195, "step": 210485 }, { "epoch": 2.33, "learning_rate": 1.1155032922904302e-05, "loss": 0.5796, "step": 210490 }, { "epoch": 2.33, "learning_rate": 1.115411019576579e-05, "loss": 0.602, "step": 210495 }, { "epoch": 2.33, "learning_rate": 1.1153187468627278e-05, "loss": 0.5951, "step": 210500 }, { "epoch": 2.33, "learning_rate": 1.1152264741488766e-05, "loss": 0.5699, "step": 210505 }, { "epoch": 2.33, "learning_rate": 1.1151342014350254e-05, "loss": 0.6438, "step": 210510 }, { "epoch": 2.33, "learning_rate": 1.115041928721174e-05, "loss": 0.6, "step": 210515 }, { "epoch": 2.33, "learning_rate": 1.1149496560073228e-05, "loss": 0.659, "step": 210520 }, { "epoch": 2.33, "learning_rate": 1.1148573832934715e-05, "loss": 0.5862, "step": 210525 }, { "epoch": 2.33, "learning_rate": 1.1147651105796203e-05, "loss": 0.6379, "step": 210530 }, { "epoch": 2.33, "learning_rate": 1.1146728378657691e-05, "loss": 0.5768, "step": 210535 }, { "epoch": 2.33, "learning_rate": 1.1145805651519179e-05, "loss": 0.6669, "step": 210540 }, { "epoch": 2.33, "learning_rate": 1.1144882924380665e-05, "loss": 0.6615, "step": 210545 }, { "epoch": 2.33, "learning_rate": 1.1143960197242153e-05, "loss": 0.6103, "step": 210550 }, { "epoch": 2.33, "learning_rate": 1.1143037470103641e-05, "loss": 0.6411, "step": 210555 }, { "epoch": 2.33, "learning_rate": 1.1142114742965129e-05, "loss": 0.5598, "step": 210560 }, { "epoch": 2.33, "learning_rate": 1.1141192015826617e-05, "loss": 0.5978, "step": 210565 }, { "epoch": 2.33, "learning_rate": 1.1140269288688104e-05, "loss": 0.5874, "step": 210570 }, { "epoch": 2.33, "learning_rate": 1.1139346561549592e-05, "loss": 0.5675, "step": 210575 }, { "epoch": 2.33, "learning_rate": 1.113842383441108e-05, "loss": 0.643, "step": 210580 }, { "epoch": 2.33, "learning_rate": 1.1137501107272568e-05, "loss": 0.5645, "step": 210585 }, { "epoch": 2.33, "learning_rate": 1.1136578380134054e-05, "loss": 0.5753, "step": 210590 }, { "epoch": 2.33, "learning_rate": 1.1135655652995542e-05, "loss": 0.5748, "step": 210595 }, { "epoch": 2.33, "learning_rate": 1.113473292585703e-05, "loss": 0.5892, "step": 210600 }, { "epoch": 2.33, "learning_rate": 1.1133810198718516e-05, "loss": 0.6104, "step": 210605 }, { "epoch": 2.33, "learning_rate": 1.1132887471580004e-05, "loss": 0.5602, "step": 210610 }, { "epoch": 2.33, "learning_rate": 1.1131964744441492e-05, "loss": 0.5944, "step": 210615 }, { "epoch": 2.33, "learning_rate": 1.113104201730298e-05, "loss": 0.5989, "step": 210620 }, { "epoch": 2.33, "learning_rate": 1.1130119290164467e-05, "loss": 0.6341, "step": 210625 }, { "epoch": 2.33, "learning_rate": 1.1129196563025955e-05, "loss": 0.5891, "step": 210630 }, { "epoch": 2.33, "learning_rate": 1.1128273835887443e-05, "loss": 0.5928, "step": 210635 }, { "epoch": 2.33, "learning_rate": 1.1127351108748931e-05, "loss": 0.6248, "step": 210640 }, { "epoch": 2.33, "learning_rate": 1.1126428381610419e-05, "loss": 0.617, "step": 210645 }, { "epoch": 2.33, "learning_rate": 1.1125505654471907e-05, "loss": 0.5573, "step": 210650 }, { "epoch": 2.33, "learning_rate": 1.1124582927333393e-05, "loss": 0.578, "step": 210655 }, { "epoch": 2.33, "learning_rate": 1.112366020019488e-05, "loss": 0.5984, "step": 210660 }, { "epoch": 2.33, "learning_rate": 1.1122737473056367e-05, "loss": 0.5857, "step": 210665 }, { "epoch": 2.33, "learning_rate": 1.1121814745917855e-05, "loss": 0.5904, "step": 210670 }, { "epoch": 2.33, "learning_rate": 1.1120892018779342e-05, "loss": 0.6541, "step": 210675 }, { "epoch": 2.33, "learning_rate": 1.111996929164083e-05, "loss": 0.5656, "step": 210680 }, { "epoch": 2.33, "learning_rate": 1.1119046564502318e-05, "loss": 0.5854, "step": 210685 }, { "epoch": 2.33, "learning_rate": 1.1118123837363806e-05, "loss": 0.5552, "step": 210690 }, { "epoch": 2.33, "learning_rate": 1.1117201110225294e-05, "loss": 0.6457, "step": 210695 }, { "epoch": 2.33, "learning_rate": 1.1116278383086782e-05, "loss": 0.6126, "step": 210700 }, { "epoch": 2.33, "learning_rate": 1.111535565594827e-05, "loss": 0.6106, "step": 210705 }, { "epoch": 2.33, "learning_rate": 1.1114432928809756e-05, "loss": 0.6088, "step": 210710 }, { "epoch": 2.33, "learning_rate": 1.1113510201671244e-05, "loss": 0.5803, "step": 210715 }, { "epoch": 2.33, "learning_rate": 1.1112587474532731e-05, "loss": 0.6069, "step": 210720 }, { "epoch": 2.33, "learning_rate": 1.111166474739422e-05, "loss": 0.5703, "step": 210725 }, { "epoch": 2.33, "learning_rate": 1.1110742020255707e-05, "loss": 0.6539, "step": 210730 }, { "epoch": 2.33, "learning_rate": 1.1109819293117195e-05, "loss": 0.6858, "step": 210735 }, { "epoch": 2.33, "learning_rate": 1.1108896565978681e-05, "loss": 0.5279, "step": 210740 }, { "epoch": 2.33, "learning_rate": 1.1107973838840169e-05, "loss": 0.5894, "step": 210745 }, { "epoch": 2.33, "learning_rate": 1.1107051111701657e-05, "loss": 0.5862, "step": 210750 }, { "epoch": 2.33, "learning_rate": 1.1106128384563145e-05, "loss": 0.5779, "step": 210755 }, { "epoch": 2.33, "learning_rate": 1.1105205657424633e-05, "loss": 0.5509, "step": 210760 }, { "epoch": 2.33, "learning_rate": 1.110428293028612e-05, "loss": 0.5667, "step": 210765 }, { "epoch": 2.33, "learning_rate": 1.1103360203147607e-05, "loss": 0.5662, "step": 210770 }, { "epoch": 2.33, "learning_rate": 1.1102437476009094e-05, "loss": 0.6347, "step": 210775 }, { "epoch": 2.33, "learning_rate": 1.1101514748870582e-05, "loss": 0.6142, "step": 210780 }, { "epoch": 2.33, "learning_rate": 1.110059202173207e-05, "loss": 0.6134, "step": 210785 }, { "epoch": 2.33, "learning_rate": 1.1099669294593558e-05, "loss": 0.5973, "step": 210790 }, { "epoch": 2.33, "learning_rate": 1.1098746567455046e-05, "loss": 0.5817, "step": 210795 }, { "epoch": 2.33, "learning_rate": 1.1097823840316534e-05, "loss": 0.6188, "step": 210800 }, { "epoch": 2.33, "learning_rate": 1.1096901113178021e-05, "loss": 0.6389, "step": 210805 }, { "epoch": 2.33, "learning_rate": 1.109597838603951e-05, "loss": 0.5566, "step": 210810 }, { "epoch": 2.33, "learning_rate": 1.1095055658900995e-05, "loss": 0.6003, "step": 210815 }, { "epoch": 2.33, "learning_rate": 1.1094132931762483e-05, "loss": 0.5831, "step": 210820 }, { "epoch": 2.33, "learning_rate": 1.109321020462397e-05, "loss": 0.5458, "step": 210825 }, { "epoch": 2.33, "learning_rate": 1.1092287477485457e-05, "loss": 0.5502, "step": 210830 }, { "epoch": 2.33, "learning_rate": 1.1091364750346945e-05, "loss": 0.5825, "step": 210835 }, { "epoch": 2.33, "learning_rate": 1.1090442023208433e-05, "loss": 0.58, "step": 210840 }, { "epoch": 2.33, "learning_rate": 1.1089519296069921e-05, "loss": 0.6184, "step": 210845 }, { "epoch": 2.33, "learning_rate": 1.1088596568931409e-05, "loss": 0.569, "step": 210850 }, { "epoch": 2.33, "learning_rate": 1.1087673841792897e-05, "loss": 0.5476, "step": 210855 }, { "epoch": 2.33, "learning_rate": 1.1086751114654384e-05, "loss": 0.556, "step": 210860 }, { "epoch": 2.33, "learning_rate": 1.1085828387515872e-05, "loss": 0.594, "step": 210865 }, { "epoch": 2.33, "learning_rate": 1.108490566037736e-05, "loss": 0.6077, "step": 210870 }, { "epoch": 2.33, "learning_rate": 1.1083982933238846e-05, "loss": 0.565, "step": 210875 }, { "epoch": 2.34, "learning_rate": 1.1083060206100334e-05, "loss": 0.6056, "step": 210880 }, { "epoch": 2.34, "learning_rate": 1.1082137478961822e-05, "loss": 0.5334, "step": 210885 }, { "epoch": 2.34, "learning_rate": 1.108121475182331e-05, "loss": 0.6002, "step": 210890 }, { "epoch": 2.34, "learning_rate": 1.1080292024684796e-05, "loss": 0.6074, "step": 210895 }, { "epoch": 2.34, "learning_rate": 1.1079369297546284e-05, "loss": 0.6252, "step": 210900 }, { "epoch": 2.34, "learning_rate": 1.1078446570407772e-05, "loss": 0.5632, "step": 210905 }, { "epoch": 2.34, "learning_rate": 1.107752384326926e-05, "loss": 0.5294, "step": 210910 }, { "epoch": 2.34, "learning_rate": 1.1076601116130747e-05, "loss": 0.5935, "step": 210915 }, { "epoch": 2.34, "learning_rate": 1.1075678388992235e-05, "loss": 0.5713, "step": 210920 }, { "epoch": 2.34, "learning_rate": 1.1074755661853723e-05, "loss": 0.5579, "step": 210925 }, { "epoch": 2.34, "learning_rate": 1.107383293471521e-05, "loss": 0.5827, "step": 210930 }, { "epoch": 2.34, "learning_rate": 1.1072910207576697e-05, "loss": 0.6053, "step": 210935 }, { "epoch": 2.34, "learning_rate": 1.1071987480438185e-05, "loss": 0.6093, "step": 210940 }, { "epoch": 2.34, "learning_rate": 1.1071064753299673e-05, "loss": 0.6226, "step": 210945 }, { "epoch": 2.34, "learning_rate": 1.107014202616116e-05, "loss": 0.5382, "step": 210950 }, { "epoch": 2.34, "learning_rate": 1.1069219299022648e-05, "loss": 0.5983, "step": 210955 }, { "epoch": 2.34, "learning_rate": 1.1068296571884136e-05, "loss": 0.6145, "step": 210960 }, { "epoch": 2.34, "learning_rate": 1.1067373844745624e-05, "loss": 0.6047, "step": 210965 }, { "epoch": 2.34, "learning_rate": 1.106645111760711e-05, "loss": 0.5856, "step": 210970 }, { "epoch": 2.34, "learning_rate": 1.1065528390468598e-05, "loss": 0.5818, "step": 210975 }, { "epoch": 2.34, "learning_rate": 1.1064605663330086e-05, "loss": 0.5933, "step": 210980 }, { "epoch": 2.34, "learning_rate": 1.1063682936191574e-05, "loss": 0.565, "step": 210985 }, { "epoch": 2.34, "learning_rate": 1.106276020905306e-05, "loss": 0.5435, "step": 210990 }, { "epoch": 2.34, "learning_rate": 1.1061837481914548e-05, "loss": 0.5817, "step": 210995 }, { "epoch": 2.34, "learning_rate": 1.1060914754776036e-05, "loss": 0.5566, "step": 211000 }, { "epoch": 2.34, "eval_loss": 0.5821637511253357, "eval_runtime": 69.314, "eval_samples_per_second": 28.854, "eval_steps_per_second": 14.427, "step": 211000 }, { "epoch": 2.34, "learning_rate": 1.1059992027637524e-05, "loss": 0.5548, "step": 211005 }, { "epoch": 2.34, "learning_rate": 1.1059069300499011e-05, "loss": 0.5596, "step": 211010 }, { "epoch": 2.34, "learning_rate": 1.10581465733605e-05, "loss": 0.6241, "step": 211015 }, { "epoch": 2.34, "learning_rate": 1.1057223846221987e-05, "loss": 0.5962, "step": 211020 }, { "epoch": 2.34, "learning_rate": 1.1056301119083475e-05, "loss": 0.5725, "step": 211025 }, { "epoch": 2.34, "learning_rate": 1.1055378391944963e-05, "loss": 0.6377, "step": 211030 }, { "epoch": 2.34, "learning_rate": 1.105445566480645e-05, "loss": 0.5567, "step": 211035 }, { "epoch": 2.34, "learning_rate": 1.1053532937667937e-05, "loss": 0.6113, "step": 211040 }, { "epoch": 2.34, "learning_rate": 1.1052610210529425e-05, "loss": 0.5791, "step": 211045 }, { "epoch": 2.34, "learning_rate": 1.105168748339091e-05, "loss": 0.5418, "step": 211050 }, { "epoch": 2.34, "learning_rate": 1.1050764756252399e-05, "loss": 0.6218, "step": 211055 }, { "epoch": 2.34, "learning_rate": 1.1049842029113886e-05, "loss": 0.5852, "step": 211060 }, { "epoch": 2.34, "learning_rate": 1.1048919301975374e-05, "loss": 0.5972, "step": 211065 }, { "epoch": 2.34, "learning_rate": 1.1047996574836862e-05, "loss": 0.5481, "step": 211070 }, { "epoch": 2.34, "learning_rate": 1.104707384769835e-05, "loss": 0.6342, "step": 211075 }, { "epoch": 2.34, "learning_rate": 1.1046151120559838e-05, "loss": 0.5902, "step": 211080 }, { "epoch": 2.34, "learning_rate": 1.1045228393421326e-05, "loss": 0.5817, "step": 211085 }, { "epoch": 2.34, "learning_rate": 1.1044305666282814e-05, "loss": 0.6401, "step": 211090 }, { "epoch": 2.34, "learning_rate": 1.10433829391443e-05, "loss": 0.6391, "step": 211095 }, { "epoch": 2.34, "learning_rate": 1.1042460212005788e-05, "loss": 0.5656, "step": 211100 }, { "epoch": 2.34, "learning_rate": 1.1041537484867275e-05, "loss": 0.5723, "step": 211105 }, { "epoch": 2.34, "learning_rate": 1.1040614757728763e-05, "loss": 0.5892, "step": 211110 }, { "epoch": 2.34, "learning_rate": 1.1039692030590251e-05, "loss": 0.6021, "step": 211115 }, { "epoch": 2.34, "learning_rate": 1.1038769303451737e-05, "loss": 0.572, "step": 211120 }, { "epoch": 2.34, "learning_rate": 1.1037846576313225e-05, "loss": 0.6086, "step": 211125 }, { "epoch": 2.34, "learning_rate": 1.1036923849174713e-05, "loss": 0.6041, "step": 211130 }, { "epoch": 2.34, "learning_rate": 1.10360011220362e-05, "loss": 0.5856, "step": 211135 }, { "epoch": 2.34, "learning_rate": 1.1035078394897689e-05, "loss": 0.5727, "step": 211140 }, { "epoch": 2.34, "learning_rate": 1.1034155667759177e-05, "loss": 0.5432, "step": 211145 }, { "epoch": 2.34, "learning_rate": 1.1033232940620664e-05, "loss": 0.5495, "step": 211150 }, { "epoch": 2.34, "learning_rate": 1.103231021348215e-05, "loss": 0.5874, "step": 211155 }, { "epoch": 2.34, "learning_rate": 1.1031387486343638e-05, "loss": 0.5325, "step": 211160 }, { "epoch": 2.34, "learning_rate": 1.1030464759205126e-05, "loss": 0.6039, "step": 211165 }, { "epoch": 2.34, "learning_rate": 1.1029542032066614e-05, "loss": 0.5963, "step": 211170 }, { "epoch": 2.34, "learning_rate": 1.1028619304928102e-05, "loss": 0.5646, "step": 211175 }, { "epoch": 2.34, "learning_rate": 1.102769657778959e-05, "loss": 0.6098, "step": 211180 }, { "epoch": 2.34, "learning_rate": 1.1026773850651078e-05, "loss": 0.6547, "step": 211185 }, { "epoch": 2.34, "learning_rate": 1.1025851123512565e-05, "loss": 0.5907, "step": 211190 }, { "epoch": 2.34, "learning_rate": 1.1024928396374052e-05, "loss": 0.5704, "step": 211195 }, { "epoch": 2.34, "learning_rate": 1.102400566923554e-05, "loss": 0.6195, "step": 211200 }, { "epoch": 2.34, "learning_rate": 1.1023082942097027e-05, "loss": 0.517, "step": 211205 }, { "epoch": 2.34, "learning_rate": 1.1022160214958513e-05, "loss": 0.5982, "step": 211210 }, { "epoch": 2.34, "learning_rate": 1.1021237487820001e-05, "loss": 0.573, "step": 211215 }, { "epoch": 2.34, "learning_rate": 1.102031476068149e-05, "loss": 0.591, "step": 211220 }, { "epoch": 2.34, "learning_rate": 1.1019392033542977e-05, "loss": 0.5698, "step": 211225 }, { "epoch": 2.34, "learning_rate": 1.1018469306404465e-05, "loss": 0.6059, "step": 211230 }, { "epoch": 2.34, "learning_rate": 1.1017546579265953e-05, "loss": 0.604, "step": 211235 }, { "epoch": 2.34, "learning_rate": 1.101662385212744e-05, "loss": 0.5804, "step": 211240 }, { "epoch": 2.34, "learning_rate": 1.1015701124988928e-05, "loss": 0.6013, "step": 211245 }, { "epoch": 2.34, "learning_rate": 1.1014778397850416e-05, "loss": 0.6211, "step": 211250 }, { "epoch": 2.34, "learning_rate": 1.1013855670711904e-05, "loss": 0.612, "step": 211255 }, { "epoch": 2.34, "learning_rate": 1.101293294357339e-05, "loss": 0.5741, "step": 211260 }, { "epoch": 2.34, "learning_rate": 1.1012010216434878e-05, "loss": 0.5661, "step": 211265 }, { "epoch": 2.34, "learning_rate": 1.1011087489296366e-05, "loss": 0.5772, "step": 211270 }, { "epoch": 2.34, "learning_rate": 1.1010164762157852e-05, "loss": 0.5906, "step": 211275 }, { "epoch": 2.34, "learning_rate": 1.100924203501934e-05, "loss": 0.6083, "step": 211280 }, { "epoch": 2.34, "learning_rate": 1.1008319307880828e-05, "loss": 0.6374, "step": 211285 }, { "epoch": 2.34, "learning_rate": 1.1007396580742316e-05, "loss": 0.642, "step": 211290 }, { "epoch": 2.34, "learning_rate": 1.1006473853603804e-05, "loss": 0.6334, "step": 211295 }, { "epoch": 2.34, "learning_rate": 1.1005551126465291e-05, "loss": 0.5608, "step": 211300 }, { "epoch": 2.34, "learning_rate": 1.100462839932678e-05, "loss": 0.5481, "step": 211305 }, { "epoch": 2.34, "learning_rate": 1.1003705672188267e-05, "loss": 0.5779, "step": 211310 }, { "epoch": 2.34, "learning_rate": 1.1002782945049755e-05, "loss": 0.6182, "step": 211315 }, { "epoch": 2.34, "learning_rate": 1.1001860217911241e-05, "loss": 0.5834, "step": 211320 }, { "epoch": 2.34, "learning_rate": 1.1000937490772729e-05, "loss": 0.5796, "step": 211325 }, { "epoch": 2.34, "learning_rate": 1.1000014763634217e-05, "loss": 0.621, "step": 211330 }, { "epoch": 2.34, "learning_rate": 1.0999092036495705e-05, "loss": 0.5679, "step": 211335 }, { "epoch": 2.34, "learning_rate": 1.0998169309357192e-05, "loss": 0.5879, "step": 211340 }, { "epoch": 2.34, "learning_rate": 1.099724658221868e-05, "loss": 0.547, "step": 211345 }, { "epoch": 2.34, "learning_rate": 1.0996323855080166e-05, "loss": 0.6037, "step": 211350 }, { "epoch": 2.34, "learning_rate": 1.0995401127941654e-05, "loss": 0.5534, "step": 211355 }, { "epoch": 2.34, "learning_rate": 1.0994478400803142e-05, "loss": 0.5973, "step": 211360 }, { "epoch": 2.34, "learning_rate": 1.099355567366463e-05, "loss": 0.5526, "step": 211365 }, { "epoch": 2.34, "learning_rate": 1.0992632946526118e-05, "loss": 0.6394, "step": 211370 }, { "epoch": 2.34, "learning_rate": 1.0991710219387604e-05, "loss": 0.5781, "step": 211375 }, { "epoch": 2.34, "learning_rate": 1.0990787492249092e-05, "loss": 0.5911, "step": 211380 }, { "epoch": 2.34, "learning_rate": 1.098986476511058e-05, "loss": 0.5812, "step": 211385 }, { "epoch": 2.34, "learning_rate": 1.0988942037972068e-05, "loss": 0.6177, "step": 211390 }, { "epoch": 2.34, "learning_rate": 1.0988019310833555e-05, "loss": 0.577, "step": 211395 }, { "epoch": 2.34, "learning_rate": 1.0987096583695043e-05, "loss": 0.6158, "step": 211400 }, { "epoch": 2.34, "learning_rate": 1.0986173856556531e-05, "loss": 0.5731, "step": 211405 }, { "epoch": 2.34, "learning_rate": 1.0985251129418019e-05, "loss": 0.6, "step": 211410 }, { "epoch": 2.34, "learning_rate": 1.0984328402279507e-05, "loss": 0.5995, "step": 211415 }, { "epoch": 2.34, "learning_rate": 1.0983405675140995e-05, "loss": 0.5634, "step": 211420 }, { "epoch": 2.34, "learning_rate": 1.098248294800248e-05, "loss": 0.6188, "step": 211425 }, { "epoch": 2.34, "learning_rate": 1.0981560220863969e-05, "loss": 0.6228, "step": 211430 }, { "epoch": 2.34, "learning_rate": 1.0980637493725455e-05, "loss": 0.61, "step": 211435 }, { "epoch": 2.34, "learning_rate": 1.0979714766586943e-05, "loss": 0.5889, "step": 211440 }, { "epoch": 2.34, "learning_rate": 1.097879203944843e-05, "loss": 0.6058, "step": 211445 }, { "epoch": 2.34, "learning_rate": 1.0977869312309918e-05, "loss": 0.6095, "step": 211450 }, { "epoch": 2.34, "learning_rate": 1.0976946585171406e-05, "loss": 0.5932, "step": 211455 }, { "epoch": 2.34, "learning_rate": 1.0976023858032894e-05, "loss": 0.5685, "step": 211460 }, { "epoch": 2.34, "learning_rate": 1.0975101130894382e-05, "loss": 0.6057, "step": 211465 }, { "epoch": 2.34, "learning_rate": 1.097417840375587e-05, "loss": 0.5881, "step": 211470 }, { "epoch": 2.34, "learning_rate": 1.0973255676617358e-05, "loss": 0.6551, "step": 211475 }, { "epoch": 2.34, "learning_rate": 1.0972332949478844e-05, "loss": 0.6184, "step": 211480 }, { "epoch": 2.34, "learning_rate": 1.0971410222340332e-05, "loss": 0.6558, "step": 211485 }, { "epoch": 2.34, "learning_rate": 1.097048749520182e-05, "loss": 0.5891, "step": 211490 }, { "epoch": 2.34, "learning_rate": 1.0969564768063307e-05, "loss": 0.5983, "step": 211495 }, { "epoch": 2.34, "learning_rate": 1.0968642040924793e-05, "loss": 0.604, "step": 211500 }, { "epoch": 2.34, "learning_rate": 1.0967719313786281e-05, "loss": 0.5528, "step": 211505 }, { "epoch": 2.34, "learning_rate": 1.096679658664777e-05, "loss": 0.6406, "step": 211510 }, { "epoch": 2.34, "learning_rate": 1.0965873859509257e-05, "loss": 0.5782, "step": 211515 }, { "epoch": 2.34, "learning_rate": 1.0964951132370745e-05, "loss": 0.6259, "step": 211520 }, { "epoch": 2.34, "learning_rate": 1.0964028405232233e-05, "loss": 0.5154, "step": 211525 }, { "epoch": 2.34, "learning_rate": 1.096310567809372e-05, "loss": 0.6297, "step": 211530 }, { "epoch": 2.34, "learning_rate": 1.0962182950955208e-05, "loss": 0.6189, "step": 211535 }, { "epoch": 2.34, "learning_rate": 1.0961260223816695e-05, "loss": 0.6332, "step": 211540 }, { "epoch": 2.34, "learning_rate": 1.0960337496678182e-05, "loss": 0.5817, "step": 211545 }, { "epoch": 2.34, "learning_rate": 1.095941476953967e-05, "loss": 0.5831, "step": 211550 }, { "epoch": 2.34, "learning_rate": 1.0958492042401158e-05, "loss": 0.6078, "step": 211555 }, { "epoch": 2.34, "learning_rate": 1.0957569315262646e-05, "loss": 0.6316, "step": 211560 }, { "epoch": 2.34, "learning_rate": 1.0956646588124134e-05, "loss": 0.6339, "step": 211565 }, { "epoch": 2.34, "learning_rate": 1.0955723860985622e-05, "loss": 0.5882, "step": 211570 }, { "epoch": 2.34, "learning_rate": 1.0954801133847108e-05, "loss": 0.5873, "step": 211575 }, { "epoch": 2.34, "learning_rate": 1.0953878406708596e-05, "loss": 0.5592, "step": 211580 }, { "epoch": 2.34, "learning_rate": 1.0952955679570084e-05, "loss": 0.6322, "step": 211585 }, { "epoch": 2.34, "learning_rate": 1.0952032952431571e-05, "loss": 0.5685, "step": 211590 }, { "epoch": 2.34, "learning_rate": 1.095111022529306e-05, "loss": 0.6234, "step": 211595 }, { "epoch": 2.34, "learning_rate": 1.0950187498154545e-05, "loss": 0.5903, "step": 211600 }, { "epoch": 2.34, "learning_rate": 1.0949264771016033e-05, "loss": 0.6395, "step": 211605 }, { "epoch": 2.34, "learning_rate": 1.0948342043877521e-05, "loss": 0.6072, "step": 211610 }, { "epoch": 2.34, "learning_rate": 1.0947419316739009e-05, "loss": 0.6158, "step": 211615 }, { "epoch": 2.34, "learning_rate": 1.0946496589600497e-05, "loss": 0.5775, "step": 211620 }, { "epoch": 2.34, "learning_rate": 1.0945573862461985e-05, "loss": 0.6053, "step": 211625 }, { "epoch": 2.34, "learning_rate": 1.0944651135323472e-05, "loss": 0.6096, "step": 211630 }, { "epoch": 2.34, "learning_rate": 1.094372840818496e-05, "loss": 0.5773, "step": 211635 }, { "epoch": 2.34, "learning_rate": 1.0942805681046448e-05, "loss": 0.6782, "step": 211640 }, { "epoch": 2.34, "learning_rate": 1.0941882953907934e-05, "loss": 0.5355, "step": 211645 }, { "epoch": 2.34, "learning_rate": 1.0940960226769422e-05, "loss": 0.601, "step": 211650 }, { "epoch": 2.34, "learning_rate": 1.0940037499630908e-05, "loss": 0.5655, "step": 211655 }, { "epoch": 2.34, "learning_rate": 1.0939114772492396e-05, "loss": 0.6197, "step": 211660 }, { "epoch": 2.34, "learning_rate": 1.0938192045353884e-05, "loss": 0.6066, "step": 211665 }, { "epoch": 2.34, "learning_rate": 1.0937269318215372e-05, "loss": 0.6067, "step": 211670 }, { "epoch": 2.34, "learning_rate": 1.093634659107686e-05, "loss": 0.5913, "step": 211675 }, { "epoch": 2.34, "learning_rate": 1.0935423863938348e-05, "loss": 0.6402, "step": 211680 }, { "epoch": 2.34, "learning_rate": 1.0934501136799835e-05, "loss": 0.5758, "step": 211685 }, { "epoch": 2.34, "learning_rate": 1.0933578409661323e-05, "loss": 0.6043, "step": 211690 }, { "epoch": 2.34, "learning_rate": 1.0932655682522811e-05, "loss": 0.5388, "step": 211695 }, { "epoch": 2.34, "learning_rate": 1.0931732955384299e-05, "loss": 0.6215, "step": 211700 }, { "epoch": 2.34, "learning_rate": 1.0930810228245785e-05, "loss": 0.5407, "step": 211705 }, { "epoch": 2.34, "learning_rate": 1.0929887501107273e-05, "loss": 0.6175, "step": 211710 }, { "epoch": 2.34, "learning_rate": 1.092896477396876e-05, "loss": 0.5779, "step": 211715 }, { "epoch": 2.34, "learning_rate": 1.0928042046830249e-05, "loss": 0.5523, "step": 211720 }, { "epoch": 2.34, "learning_rate": 1.0927119319691736e-05, "loss": 0.6122, "step": 211725 }, { "epoch": 2.34, "learning_rate": 1.0926196592553223e-05, "loss": 0.5994, "step": 211730 }, { "epoch": 2.34, "learning_rate": 1.092527386541471e-05, "loss": 0.5521, "step": 211735 }, { "epoch": 2.34, "learning_rate": 1.0924351138276198e-05, "loss": 0.5947, "step": 211740 }, { "epoch": 2.34, "learning_rate": 1.0923428411137686e-05, "loss": 0.525, "step": 211745 }, { "epoch": 2.34, "learning_rate": 1.0922505683999174e-05, "loss": 0.5985, "step": 211750 }, { "epoch": 2.34, "learning_rate": 1.0921582956860662e-05, "loss": 0.62, "step": 211755 }, { "epoch": 2.34, "learning_rate": 1.0920660229722148e-05, "loss": 0.6119, "step": 211760 }, { "epoch": 2.34, "learning_rate": 1.0919737502583636e-05, "loss": 0.5941, "step": 211765 }, { "epoch": 2.34, "learning_rate": 1.0918814775445124e-05, "loss": 0.6254, "step": 211770 }, { "epoch": 2.34, "learning_rate": 1.0917892048306612e-05, "loss": 0.6118, "step": 211775 }, { "epoch": 2.34, "learning_rate": 1.09169693211681e-05, "loss": 0.5412, "step": 211780 }, { "epoch": 2.35, "learning_rate": 1.0916046594029587e-05, "loss": 0.5655, "step": 211785 }, { "epoch": 2.35, "learning_rate": 1.0915123866891075e-05, "loss": 0.5587, "step": 211790 }, { "epoch": 2.35, "learning_rate": 1.0914201139752563e-05, "loss": 0.5595, "step": 211795 }, { "epoch": 2.35, "learning_rate": 1.091327841261405e-05, "loss": 0.5481, "step": 211800 }, { "epoch": 2.35, "learning_rate": 1.0912355685475537e-05, "loss": 0.6011, "step": 211805 }, { "epoch": 2.35, "learning_rate": 1.0911432958337025e-05, "loss": 0.6218, "step": 211810 }, { "epoch": 2.35, "learning_rate": 1.0910510231198513e-05, "loss": 0.6112, "step": 211815 }, { "epoch": 2.35, "learning_rate": 1.0909587504059999e-05, "loss": 0.5981, "step": 211820 }, { "epoch": 2.35, "learning_rate": 1.0908664776921487e-05, "loss": 0.5828, "step": 211825 }, { "epoch": 2.35, "learning_rate": 1.0907742049782975e-05, "loss": 0.6073, "step": 211830 }, { "epoch": 2.35, "learning_rate": 1.0906819322644462e-05, "loss": 0.5778, "step": 211835 }, { "epoch": 2.35, "learning_rate": 1.090589659550595e-05, "loss": 0.5489, "step": 211840 }, { "epoch": 2.35, "learning_rate": 1.0904973868367438e-05, "loss": 0.5918, "step": 211845 }, { "epoch": 2.35, "learning_rate": 1.0904051141228926e-05, "loss": 0.5735, "step": 211850 }, { "epoch": 2.35, "learning_rate": 1.0903128414090414e-05, "loss": 0.6098, "step": 211855 }, { "epoch": 2.35, "learning_rate": 1.0902205686951902e-05, "loss": 0.5835, "step": 211860 }, { "epoch": 2.35, "learning_rate": 1.090128295981339e-05, "loss": 0.6265, "step": 211865 }, { "epoch": 2.35, "learning_rate": 1.0900360232674876e-05, "loss": 0.545, "step": 211870 }, { "epoch": 2.35, "learning_rate": 1.0899437505536363e-05, "loss": 0.5739, "step": 211875 }, { "epoch": 2.35, "learning_rate": 1.089851477839785e-05, "loss": 0.5574, "step": 211880 }, { "epoch": 2.35, "learning_rate": 1.0897592051259337e-05, "loss": 0.5985, "step": 211885 }, { "epoch": 2.35, "learning_rate": 1.0896669324120825e-05, "loss": 0.6141, "step": 211890 }, { "epoch": 2.35, "learning_rate": 1.0895746596982313e-05, "loss": 0.5979, "step": 211895 }, { "epoch": 2.35, "learning_rate": 1.0894823869843801e-05, "loss": 0.6109, "step": 211900 }, { "epoch": 2.35, "learning_rate": 1.0893901142705289e-05, "loss": 0.594, "step": 211905 }, { "epoch": 2.35, "learning_rate": 1.0892978415566777e-05, "loss": 0.6168, "step": 211910 }, { "epoch": 2.35, "learning_rate": 1.0892055688428265e-05, "loss": 0.592, "step": 211915 }, { "epoch": 2.35, "learning_rate": 1.0891132961289752e-05, "loss": 0.5611, "step": 211920 }, { "epoch": 2.35, "learning_rate": 1.0890210234151239e-05, "loss": 0.5827, "step": 211925 }, { "epoch": 2.35, "learning_rate": 1.0889287507012726e-05, "loss": 0.6129, "step": 211930 }, { "epoch": 2.35, "learning_rate": 1.0888364779874214e-05, "loss": 0.5746, "step": 211935 }, { "epoch": 2.35, "learning_rate": 1.0887442052735702e-05, "loss": 0.6003, "step": 211940 }, { "epoch": 2.35, "learning_rate": 1.088651932559719e-05, "loss": 0.5892, "step": 211945 }, { "epoch": 2.35, "learning_rate": 1.0885596598458678e-05, "loss": 0.6344, "step": 211950 }, { "epoch": 2.35, "learning_rate": 1.0884673871320164e-05, "loss": 0.6286, "step": 211955 }, { "epoch": 2.35, "learning_rate": 1.0883751144181652e-05, "loss": 0.5407, "step": 211960 }, { "epoch": 2.35, "learning_rate": 1.088282841704314e-05, "loss": 0.6611, "step": 211965 }, { "epoch": 2.35, "learning_rate": 1.0881905689904628e-05, "loss": 0.5639, "step": 211970 }, { "epoch": 2.35, "learning_rate": 1.0880982962766115e-05, "loss": 0.5527, "step": 211975 }, { "epoch": 2.35, "learning_rate": 1.0880060235627603e-05, "loss": 0.6056, "step": 211980 }, { "epoch": 2.35, "learning_rate": 1.087913750848909e-05, "loss": 0.5687, "step": 211985 }, { "epoch": 2.35, "learning_rate": 1.0878214781350577e-05, "loss": 0.5919, "step": 211990 }, { "epoch": 2.35, "learning_rate": 1.0877292054212065e-05, "loss": 0.5993, "step": 211995 }, { "epoch": 2.35, "learning_rate": 1.0876369327073553e-05, "loss": 0.6158, "step": 212000 }, { "epoch": 2.35, "eval_loss": 0.5825923085212708, "eval_runtime": 69.4044, "eval_samples_per_second": 28.817, "eval_steps_per_second": 14.408, "step": 212000 }, { "epoch": 2.35, "learning_rate": 1.087544659993504e-05, "loss": 0.5594, "step": 212005 }, { "epoch": 2.35, "learning_rate": 1.0874523872796529e-05, "loss": 0.591, "step": 212010 }, { "epoch": 2.35, "learning_rate": 1.0873601145658016e-05, "loss": 0.5621, "step": 212015 }, { "epoch": 2.35, "learning_rate": 1.0872678418519504e-05, "loss": 0.601, "step": 212020 }, { "epoch": 2.35, "learning_rate": 1.0871755691380992e-05, "loss": 0.6016, "step": 212025 }, { "epoch": 2.35, "learning_rate": 1.0870832964242478e-05, "loss": 0.5814, "step": 212030 }, { "epoch": 2.35, "learning_rate": 1.0869910237103966e-05, "loss": 0.5995, "step": 212035 }, { "epoch": 2.35, "learning_rate": 1.0868987509965452e-05, "loss": 0.5692, "step": 212040 }, { "epoch": 2.35, "learning_rate": 1.086806478282694e-05, "loss": 0.5801, "step": 212045 }, { "epoch": 2.35, "learning_rate": 1.0867142055688428e-05, "loss": 0.593, "step": 212050 }, { "epoch": 2.35, "learning_rate": 1.0866219328549916e-05, "loss": 0.5829, "step": 212055 }, { "epoch": 2.35, "learning_rate": 1.0865296601411404e-05, "loss": 0.523, "step": 212060 }, { "epoch": 2.35, "learning_rate": 1.0864373874272892e-05, "loss": 0.6299, "step": 212065 }, { "epoch": 2.35, "learning_rate": 1.086345114713438e-05, "loss": 0.6335, "step": 212070 }, { "epoch": 2.35, "learning_rate": 1.0862528419995867e-05, "loss": 0.5234, "step": 212075 }, { "epoch": 2.35, "learning_rate": 1.0861605692857355e-05, "loss": 0.6037, "step": 212080 }, { "epoch": 2.35, "learning_rate": 1.0860682965718843e-05, "loss": 0.5683, "step": 212085 }, { "epoch": 2.35, "learning_rate": 1.0859760238580329e-05, "loss": 0.5873, "step": 212090 }, { "epoch": 2.35, "learning_rate": 1.0858837511441817e-05, "loss": 0.5563, "step": 212095 }, { "epoch": 2.35, "learning_rate": 1.0857914784303305e-05, "loss": 0.5713, "step": 212100 }, { "epoch": 2.35, "learning_rate": 1.0856992057164793e-05, "loss": 0.6319, "step": 212105 }, { "epoch": 2.35, "learning_rate": 1.0856069330026279e-05, "loss": 0.5681, "step": 212110 }, { "epoch": 2.35, "learning_rate": 1.0855146602887767e-05, "loss": 0.5346, "step": 212115 }, { "epoch": 2.35, "learning_rate": 1.0854223875749255e-05, "loss": 0.5368, "step": 212120 }, { "epoch": 2.35, "learning_rate": 1.0853301148610742e-05, "loss": 0.5813, "step": 212125 }, { "epoch": 2.35, "learning_rate": 1.085237842147223e-05, "loss": 0.6178, "step": 212130 }, { "epoch": 2.35, "learning_rate": 1.0851455694333718e-05, "loss": 0.6271, "step": 212135 }, { "epoch": 2.35, "learning_rate": 1.0850532967195206e-05, "loss": 0.5916, "step": 212140 }, { "epoch": 2.35, "learning_rate": 1.0849610240056692e-05, "loss": 0.5813, "step": 212145 }, { "epoch": 2.35, "learning_rate": 1.084868751291818e-05, "loss": 0.5981, "step": 212150 }, { "epoch": 2.35, "learning_rate": 1.0847764785779668e-05, "loss": 0.6022, "step": 212155 }, { "epoch": 2.35, "learning_rate": 1.0846842058641156e-05, "loss": 0.6476, "step": 212160 }, { "epoch": 2.35, "learning_rate": 1.0845919331502643e-05, "loss": 0.5694, "step": 212165 }, { "epoch": 2.35, "learning_rate": 1.0844996604364131e-05, "loss": 0.6117, "step": 212170 }, { "epoch": 2.35, "learning_rate": 1.084407387722562e-05, "loss": 0.619, "step": 212175 }, { "epoch": 2.35, "learning_rate": 1.0843151150087107e-05, "loss": 0.6401, "step": 212180 }, { "epoch": 2.35, "learning_rate": 1.0842228422948593e-05, "loss": 0.5935, "step": 212185 }, { "epoch": 2.35, "learning_rate": 1.0841305695810081e-05, "loss": 0.5992, "step": 212190 }, { "epoch": 2.35, "learning_rate": 1.0840382968671569e-05, "loss": 0.6159, "step": 212195 }, { "epoch": 2.35, "learning_rate": 1.0839460241533057e-05, "loss": 0.6018, "step": 212200 }, { "epoch": 2.35, "learning_rate": 1.0838537514394543e-05, "loss": 0.6073, "step": 212205 }, { "epoch": 2.35, "learning_rate": 1.083761478725603e-05, "loss": 0.5842, "step": 212210 }, { "epoch": 2.35, "learning_rate": 1.0836692060117519e-05, "loss": 0.5925, "step": 212215 }, { "epoch": 2.35, "learning_rate": 1.0835769332979006e-05, "loss": 0.5804, "step": 212220 }, { "epoch": 2.35, "learning_rate": 1.0834846605840494e-05, "loss": 0.6314, "step": 212225 }, { "epoch": 2.35, "learning_rate": 1.0833923878701982e-05, "loss": 0.5581, "step": 212230 }, { "epoch": 2.35, "learning_rate": 1.083300115156347e-05, "loss": 0.6391, "step": 212235 }, { "epoch": 2.35, "learning_rate": 1.0832078424424958e-05, "loss": 0.5845, "step": 212240 }, { "epoch": 2.35, "learning_rate": 1.0831155697286446e-05, "loss": 0.6183, "step": 212245 }, { "epoch": 2.35, "learning_rate": 1.0830232970147933e-05, "loss": 0.5853, "step": 212250 }, { "epoch": 2.35, "learning_rate": 1.082931024300942e-05, "loss": 0.5608, "step": 212255 }, { "epoch": 2.35, "learning_rate": 1.0828387515870908e-05, "loss": 0.6054, "step": 212260 }, { "epoch": 2.35, "learning_rate": 1.0827464788732394e-05, "loss": 0.6188, "step": 212265 }, { "epoch": 2.35, "learning_rate": 1.0826542061593882e-05, "loss": 0.6035, "step": 212270 }, { "epoch": 2.35, "learning_rate": 1.082561933445537e-05, "loss": 0.6214, "step": 212275 }, { "epoch": 2.35, "learning_rate": 1.0824696607316857e-05, "loss": 0.6331, "step": 212280 }, { "epoch": 2.35, "learning_rate": 1.0823773880178345e-05, "loss": 0.6047, "step": 212285 }, { "epoch": 2.35, "learning_rate": 1.0822851153039833e-05, "loss": 0.5552, "step": 212290 }, { "epoch": 2.35, "learning_rate": 1.082192842590132e-05, "loss": 0.6102, "step": 212295 }, { "epoch": 2.35, "learning_rate": 1.0821005698762809e-05, "loss": 0.6451, "step": 212300 }, { "epoch": 2.35, "learning_rate": 1.0820082971624296e-05, "loss": 0.6013, "step": 212305 }, { "epoch": 2.35, "learning_rate": 1.0819160244485783e-05, "loss": 0.5385, "step": 212310 }, { "epoch": 2.35, "learning_rate": 1.081823751734727e-05, "loss": 0.5626, "step": 212315 }, { "epoch": 2.35, "learning_rate": 1.0817314790208758e-05, "loss": 0.6023, "step": 212320 }, { "epoch": 2.35, "learning_rate": 1.0816392063070246e-05, "loss": 0.6041, "step": 212325 }, { "epoch": 2.35, "learning_rate": 1.0815469335931734e-05, "loss": 0.5485, "step": 212330 }, { "epoch": 2.35, "learning_rate": 1.081454660879322e-05, "loss": 0.572, "step": 212335 }, { "epoch": 2.35, "learning_rate": 1.0813623881654708e-05, "loss": 0.5821, "step": 212340 }, { "epoch": 2.35, "learning_rate": 1.0812701154516196e-05, "loss": 0.5756, "step": 212345 }, { "epoch": 2.35, "learning_rate": 1.0811778427377684e-05, "loss": 0.6137, "step": 212350 }, { "epoch": 2.35, "learning_rate": 1.0810855700239172e-05, "loss": 0.6199, "step": 212355 }, { "epoch": 2.35, "learning_rate": 1.080993297310066e-05, "loss": 0.5884, "step": 212360 }, { "epoch": 2.35, "learning_rate": 1.0809010245962147e-05, "loss": 0.613, "step": 212365 }, { "epoch": 2.35, "learning_rate": 1.0808087518823633e-05, "loss": 0.5538, "step": 212370 }, { "epoch": 2.35, "learning_rate": 1.0807164791685121e-05, "loss": 0.6347, "step": 212375 }, { "epoch": 2.35, "learning_rate": 1.0806242064546609e-05, "loss": 0.6229, "step": 212380 }, { "epoch": 2.35, "learning_rate": 1.0805319337408097e-05, "loss": 0.6076, "step": 212385 }, { "epoch": 2.35, "learning_rate": 1.0804396610269585e-05, "loss": 0.5776, "step": 212390 }, { "epoch": 2.35, "learning_rate": 1.0803473883131073e-05, "loss": 0.5627, "step": 212395 }, { "epoch": 2.35, "learning_rate": 1.080255115599256e-05, "loss": 0.5918, "step": 212400 }, { "epoch": 2.35, "learning_rate": 1.0801628428854048e-05, "loss": 0.5965, "step": 212405 }, { "epoch": 2.35, "learning_rate": 1.0800705701715534e-05, "loss": 0.6256, "step": 212410 }, { "epoch": 2.35, "learning_rate": 1.0799782974577022e-05, "loss": 0.6173, "step": 212415 }, { "epoch": 2.35, "learning_rate": 1.079886024743851e-05, "loss": 0.6227, "step": 212420 }, { "epoch": 2.35, "learning_rate": 1.0797937520299996e-05, "loss": 0.5765, "step": 212425 }, { "epoch": 2.35, "learning_rate": 1.0797014793161484e-05, "loss": 0.5998, "step": 212430 }, { "epoch": 2.35, "learning_rate": 1.0796092066022972e-05, "loss": 0.6104, "step": 212435 }, { "epoch": 2.35, "learning_rate": 1.079516933888446e-05, "loss": 0.5859, "step": 212440 }, { "epoch": 2.35, "learning_rate": 1.0794246611745948e-05, "loss": 0.5897, "step": 212445 }, { "epoch": 2.35, "learning_rate": 1.0793323884607436e-05, "loss": 0.6103, "step": 212450 }, { "epoch": 2.35, "learning_rate": 1.0792401157468923e-05, "loss": 0.6043, "step": 212455 }, { "epoch": 2.35, "learning_rate": 1.0791478430330411e-05, "loss": 0.546, "step": 212460 }, { "epoch": 2.35, "learning_rate": 1.0790555703191899e-05, "loss": 0.5571, "step": 212465 }, { "epoch": 2.35, "learning_rate": 1.0789632976053387e-05, "loss": 0.5988, "step": 212470 }, { "epoch": 2.35, "learning_rate": 1.0788710248914873e-05, "loss": 0.6133, "step": 212475 }, { "epoch": 2.35, "learning_rate": 1.0787787521776361e-05, "loss": 0.5713, "step": 212480 }, { "epoch": 2.35, "learning_rate": 1.0786864794637847e-05, "loss": 0.6434, "step": 212485 }, { "epoch": 2.35, "learning_rate": 1.0785942067499335e-05, "loss": 0.575, "step": 212490 }, { "epoch": 2.35, "learning_rate": 1.0785019340360823e-05, "loss": 0.5468, "step": 212495 }, { "epoch": 2.35, "learning_rate": 1.078409661322231e-05, "loss": 0.5827, "step": 212500 }, { "epoch": 2.35, "learning_rate": 1.0783173886083799e-05, "loss": 0.6303, "step": 212505 }, { "epoch": 2.35, "learning_rate": 1.0782251158945286e-05, "loss": 0.5788, "step": 212510 }, { "epoch": 2.35, "learning_rate": 1.0781328431806774e-05, "loss": 0.6457, "step": 212515 }, { "epoch": 2.35, "learning_rate": 1.0780405704668262e-05, "loss": 0.5781, "step": 212520 }, { "epoch": 2.35, "learning_rate": 1.077948297752975e-05, "loss": 0.6036, "step": 212525 }, { "epoch": 2.35, "learning_rate": 1.0778560250391238e-05, "loss": 0.6045, "step": 212530 }, { "epoch": 2.35, "learning_rate": 1.0777637523252724e-05, "loss": 0.5765, "step": 212535 }, { "epoch": 2.35, "learning_rate": 1.0776714796114212e-05, "loss": 0.5824, "step": 212540 }, { "epoch": 2.35, "learning_rate": 1.07757920689757e-05, "loss": 0.5415, "step": 212545 }, { "epoch": 2.35, "learning_rate": 1.0774869341837187e-05, "loss": 0.6394, "step": 212550 }, { "epoch": 2.35, "learning_rate": 1.0773946614698675e-05, "loss": 0.5578, "step": 212555 }, { "epoch": 2.35, "learning_rate": 1.0773023887560163e-05, "loss": 0.5704, "step": 212560 }, { "epoch": 2.35, "learning_rate": 1.077210116042165e-05, "loss": 0.5881, "step": 212565 }, { "epoch": 2.35, "learning_rate": 1.0771178433283137e-05, "loss": 0.6172, "step": 212570 }, { "epoch": 2.35, "learning_rate": 1.0770255706144625e-05, "loss": 0.5706, "step": 212575 }, { "epoch": 2.35, "learning_rate": 1.0769332979006113e-05, "loss": 0.5871, "step": 212580 }, { "epoch": 2.35, "learning_rate": 1.07684102518676e-05, "loss": 0.6235, "step": 212585 }, { "epoch": 2.35, "learning_rate": 1.0767487524729087e-05, "loss": 0.584, "step": 212590 }, { "epoch": 2.35, "learning_rate": 1.0766564797590575e-05, "loss": 0.5727, "step": 212595 }, { "epoch": 2.35, "learning_rate": 1.0765642070452063e-05, "loss": 0.5596, "step": 212600 }, { "epoch": 2.35, "learning_rate": 1.076471934331355e-05, "loss": 0.5991, "step": 212605 }, { "epoch": 2.35, "learning_rate": 1.0763796616175038e-05, "loss": 0.5784, "step": 212610 }, { "epoch": 2.35, "learning_rate": 1.0762873889036526e-05, "loss": 0.6262, "step": 212615 }, { "epoch": 2.35, "learning_rate": 1.0761951161898014e-05, "loss": 0.5609, "step": 212620 }, { "epoch": 2.35, "learning_rate": 1.0761028434759502e-05, "loss": 0.6271, "step": 212625 }, { "epoch": 2.35, "learning_rate": 1.076010570762099e-05, "loss": 0.5493, "step": 212630 }, { "epoch": 2.35, "learning_rate": 1.0759182980482478e-05, "loss": 0.5795, "step": 212635 }, { "epoch": 2.35, "learning_rate": 1.0758260253343964e-05, "loss": 0.6234, "step": 212640 }, { "epoch": 2.35, "learning_rate": 1.0757337526205452e-05, "loss": 0.5762, "step": 212645 }, { "epoch": 2.35, "learning_rate": 1.0756414799066938e-05, "loss": 0.6217, "step": 212650 }, { "epoch": 2.35, "learning_rate": 1.0755492071928426e-05, "loss": 0.5786, "step": 212655 }, { "epoch": 2.35, "learning_rate": 1.0754569344789913e-05, "loss": 0.5966, "step": 212660 }, { "epoch": 2.35, "learning_rate": 1.0753646617651401e-05, "loss": 0.615, "step": 212665 }, { "epoch": 2.35, "learning_rate": 1.0752723890512889e-05, "loss": 0.597, "step": 212670 }, { "epoch": 2.35, "learning_rate": 1.0751801163374377e-05, "loss": 0.5861, "step": 212675 }, { "epoch": 2.35, "learning_rate": 1.0750878436235865e-05, "loss": 0.5784, "step": 212680 }, { "epoch": 2.36, "learning_rate": 1.0749955709097353e-05, "loss": 0.629, "step": 212685 }, { "epoch": 2.36, "learning_rate": 1.074903298195884e-05, "loss": 0.6242, "step": 212690 }, { "epoch": 2.36, "learning_rate": 1.0748110254820327e-05, "loss": 0.5432, "step": 212695 }, { "epoch": 2.36, "learning_rate": 1.0747187527681814e-05, "loss": 0.6543, "step": 212700 }, { "epoch": 2.36, "learning_rate": 1.0746264800543302e-05, "loss": 0.6037, "step": 212705 }, { "epoch": 2.36, "learning_rate": 1.074534207340479e-05, "loss": 0.5641, "step": 212710 }, { "epoch": 2.36, "learning_rate": 1.0744419346266276e-05, "loss": 0.6072, "step": 212715 }, { "epoch": 2.36, "learning_rate": 1.0743496619127764e-05, "loss": 0.6083, "step": 212720 }, { "epoch": 2.36, "learning_rate": 1.0742573891989252e-05, "loss": 0.5775, "step": 212725 }, { "epoch": 2.36, "learning_rate": 1.074165116485074e-05, "loss": 0.5748, "step": 212730 }, { "epoch": 2.36, "learning_rate": 1.0740728437712228e-05, "loss": 0.5728, "step": 212735 }, { "epoch": 2.36, "learning_rate": 1.0739805710573716e-05, "loss": 0.5807, "step": 212740 }, { "epoch": 2.36, "learning_rate": 1.0738882983435203e-05, "loss": 0.6253, "step": 212745 }, { "epoch": 2.36, "learning_rate": 1.0737960256296691e-05, "loss": 0.6164, "step": 212750 }, { "epoch": 2.36, "learning_rate": 1.0737037529158177e-05, "loss": 0.607, "step": 212755 }, { "epoch": 2.36, "learning_rate": 1.0736114802019665e-05, "loss": 0.6053, "step": 212760 }, { "epoch": 2.36, "learning_rate": 1.0735192074881153e-05, "loss": 0.5842, "step": 212765 }, { "epoch": 2.36, "learning_rate": 1.0734269347742641e-05, "loss": 0.6045, "step": 212770 }, { "epoch": 2.36, "learning_rate": 1.0733346620604129e-05, "loss": 0.5467, "step": 212775 }, { "epoch": 2.36, "learning_rate": 1.0732423893465617e-05, "loss": 0.6208, "step": 212780 }, { "epoch": 2.36, "learning_rate": 1.0731501166327105e-05, "loss": 0.5814, "step": 212785 }, { "epoch": 2.36, "learning_rate": 1.073057843918859e-05, "loss": 0.6012, "step": 212790 }, { "epoch": 2.36, "learning_rate": 1.0729655712050079e-05, "loss": 0.5827, "step": 212795 }, { "epoch": 2.36, "learning_rate": 1.0728732984911566e-05, "loss": 0.5501, "step": 212800 }, { "epoch": 2.36, "learning_rate": 1.0727810257773054e-05, "loss": 0.5966, "step": 212805 }, { "epoch": 2.36, "learning_rate": 1.0726887530634542e-05, "loss": 0.5983, "step": 212810 }, { "epoch": 2.36, "learning_rate": 1.0725964803496028e-05, "loss": 0.5856, "step": 212815 }, { "epoch": 2.36, "learning_rate": 1.0725042076357516e-05, "loss": 0.5584, "step": 212820 }, { "epoch": 2.36, "learning_rate": 1.0724119349219004e-05, "loss": 0.5912, "step": 212825 }, { "epoch": 2.36, "learning_rate": 1.0723196622080492e-05, "loss": 0.5695, "step": 212830 }, { "epoch": 2.36, "learning_rate": 1.072227389494198e-05, "loss": 0.61, "step": 212835 }, { "epoch": 2.36, "learning_rate": 1.0721351167803467e-05, "loss": 0.6494, "step": 212840 }, { "epoch": 2.36, "learning_rate": 1.0720428440664955e-05, "loss": 0.5441, "step": 212845 }, { "epoch": 2.36, "learning_rate": 1.0719505713526443e-05, "loss": 0.6607, "step": 212850 }, { "epoch": 2.36, "learning_rate": 1.0718582986387931e-05, "loss": 0.5998, "step": 212855 }, { "epoch": 2.36, "learning_rate": 1.0717660259249417e-05, "loss": 0.5976, "step": 212860 }, { "epoch": 2.36, "learning_rate": 1.0716737532110905e-05, "loss": 0.5922, "step": 212865 }, { "epoch": 2.36, "learning_rate": 1.0715814804972391e-05, "loss": 0.6296, "step": 212870 }, { "epoch": 2.36, "learning_rate": 1.0714892077833879e-05, "loss": 0.5651, "step": 212875 }, { "epoch": 2.36, "learning_rate": 1.0713969350695367e-05, "loss": 0.5491, "step": 212880 }, { "epoch": 2.36, "learning_rate": 1.0713046623556855e-05, "loss": 0.5468, "step": 212885 }, { "epoch": 2.36, "learning_rate": 1.0712123896418343e-05, "loss": 0.6122, "step": 212890 }, { "epoch": 2.36, "learning_rate": 1.071120116927983e-05, "loss": 0.5323, "step": 212895 }, { "epoch": 2.36, "learning_rate": 1.0710278442141318e-05, "loss": 0.5989, "step": 212900 }, { "epoch": 2.36, "learning_rate": 1.0709355715002806e-05, "loss": 0.5365, "step": 212905 }, { "epoch": 2.36, "learning_rate": 1.0708432987864294e-05, "loss": 0.5654, "step": 212910 }, { "epoch": 2.36, "learning_rate": 1.0707510260725782e-05, "loss": 0.563, "step": 212915 }, { "epoch": 2.36, "learning_rate": 1.0706587533587268e-05, "loss": 0.6347, "step": 212920 }, { "epoch": 2.36, "learning_rate": 1.0705664806448756e-05, "loss": 0.6145, "step": 212925 }, { "epoch": 2.36, "learning_rate": 1.0704742079310244e-05, "loss": 0.5689, "step": 212930 }, { "epoch": 2.36, "learning_rate": 1.0703819352171731e-05, "loss": 0.5425, "step": 212935 }, { "epoch": 2.36, "learning_rate": 1.070289662503322e-05, "loss": 0.5453, "step": 212940 }, { "epoch": 2.36, "learning_rate": 1.0701973897894706e-05, "loss": 0.604, "step": 212945 }, { "epoch": 2.36, "learning_rate": 1.0701051170756193e-05, "loss": 0.5753, "step": 212950 }, { "epoch": 2.36, "learning_rate": 1.0700128443617681e-05, "loss": 0.5705, "step": 212955 }, { "epoch": 2.36, "learning_rate": 1.0699205716479169e-05, "loss": 0.5866, "step": 212960 }, { "epoch": 2.36, "learning_rate": 1.0698282989340657e-05, "loss": 0.6273, "step": 212965 }, { "epoch": 2.36, "learning_rate": 1.0697360262202145e-05, "loss": 0.5884, "step": 212970 }, { "epoch": 2.36, "learning_rate": 1.0696437535063631e-05, "loss": 0.6151, "step": 212975 }, { "epoch": 2.36, "learning_rate": 1.0695514807925119e-05, "loss": 0.54, "step": 212980 }, { "epoch": 2.36, "learning_rate": 1.0694592080786607e-05, "loss": 0.5353, "step": 212985 }, { "epoch": 2.36, "learning_rate": 1.0693669353648094e-05, "loss": 0.6149, "step": 212990 }, { "epoch": 2.36, "learning_rate": 1.0692746626509582e-05, "loss": 0.5828, "step": 212995 }, { "epoch": 2.36, "learning_rate": 1.069182389937107e-05, "loss": 0.5574, "step": 213000 }, { "epoch": 2.36, "eval_loss": 0.5428625345230103, "eval_runtime": 69.4298, "eval_samples_per_second": 28.806, "eval_steps_per_second": 14.403, "step": 213000 }, { "epoch": 2.36, "learning_rate": 1.0690901172232558e-05, "loss": 0.5713, "step": 213005 }, { "epoch": 2.36, "learning_rate": 1.0689978445094046e-05, "loss": 0.585, "step": 213010 }, { "epoch": 2.36, "learning_rate": 1.0689055717955534e-05, "loss": 0.6232, "step": 213015 }, { "epoch": 2.36, "learning_rate": 1.068813299081702e-05, "loss": 0.6064, "step": 213020 }, { "epoch": 2.36, "learning_rate": 1.0687210263678508e-05, "loss": 0.6487, "step": 213025 }, { "epoch": 2.36, "learning_rate": 1.0686287536539996e-05, "loss": 0.5545, "step": 213030 }, { "epoch": 2.36, "learning_rate": 1.0685364809401482e-05, "loss": 0.5889, "step": 213035 }, { "epoch": 2.36, "learning_rate": 1.068444208226297e-05, "loss": 0.638, "step": 213040 }, { "epoch": 2.36, "learning_rate": 1.0683519355124457e-05, "loss": 0.5825, "step": 213045 }, { "epoch": 2.36, "learning_rate": 1.0682596627985945e-05, "loss": 0.5702, "step": 213050 }, { "epoch": 2.36, "learning_rate": 1.0681673900847433e-05, "loss": 0.6309, "step": 213055 }, { "epoch": 2.36, "learning_rate": 1.0680751173708921e-05, "loss": 0.5323, "step": 213060 }, { "epoch": 2.36, "learning_rate": 1.0679828446570409e-05, "loss": 0.5599, "step": 213065 }, { "epoch": 2.36, "learning_rate": 1.0678905719431897e-05, "loss": 0.5666, "step": 213070 }, { "epoch": 2.36, "learning_rate": 1.0677982992293384e-05, "loss": 0.5514, "step": 213075 }, { "epoch": 2.36, "learning_rate": 1.0677060265154872e-05, "loss": 0.5798, "step": 213080 }, { "epoch": 2.36, "learning_rate": 1.0676137538016358e-05, "loss": 0.5888, "step": 213085 }, { "epoch": 2.36, "learning_rate": 1.0675214810877846e-05, "loss": 0.5894, "step": 213090 }, { "epoch": 2.36, "learning_rate": 1.0674292083739332e-05, "loss": 0.6777, "step": 213095 }, { "epoch": 2.36, "learning_rate": 1.067336935660082e-05, "loss": 0.6037, "step": 213100 }, { "epoch": 2.36, "learning_rate": 1.0672446629462308e-05, "loss": 0.6313, "step": 213105 }, { "epoch": 2.36, "learning_rate": 1.0671523902323796e-05, "loss": 0.6379, "step": 213110 }, { "epoch": 2.36, "learning_rate": 1.0670601175185284e-05, "loss": 0.6131, "step": 213115 }, { "epoch": 2.36, "learning_rate": 1.0669678448046772e-05, "loss": 0.622, "step": 213120 }, { "epoch": 2.36, "learning_rate": 1.066875572090826e-05, "loss": 0.6034, "step": 213125 }, { "epoch": 2.36, "learning_rate": 1.0667832993769747e-05, "loss": 0.5717, "step": 213130 }, { "epoch": 2.36, "learning_rate": 1.0666910266631235e-05, "loss": 0.5782, "step": 213135 }, { "epoch": 2.36, "learning_rate": 1.0665987539492721e-05, "loss": 0.5919, "step": 213140 }, { "epoch": 2.36, "learning_rate": 1.066506481235421e-05, "loss": 0.5921, "step": 213145 }, { "epoch": 2.36, "learning_rate": 1.0664142085215697e-05, "loss": 0.6476, "step": 213150 }, { "epoch": 2.36, "learning_rate": 1.0663219358077185e-05, "loss": 0.5789, "step": 213155 }, { "epoch": 2.36, "learning_rate": 1.0662296630938673e-05, "loss": 0.6278, "step": 213160 }, { "epoch": 2.36, "learning_rate": 1.066137390380016e-05, "loss": 0.625, "step": 213165 }, { "epoch": 2.36, "learning_rate": 1.0660451176661647e-05, "loss": 0.5763, "step": 213170 }, { "epoch": 2.36, "learning_rate": 1.0659528449523135e-05, "loss": 0.5887, "step": 213175 }, { "epoch": 2.36, "learning_rate": 1.0658605722384623e-05, "loss": 0.5582, "step": 213180 }, { "epoch": 2.36, "learning_rate": 1.065768299524611e-05, "loss": 0.5844, "step": 213185 }, { "epoch": 2.36, "learning_rate": 1.0656760268107598e-05, "loss": 0.6117, "step": 213190 }, { "epoch": 2.36, "learning_rate": 1.0655837540969086e-05, "loss": 0.5604, "step": 213195 }, { "epoch": 2.36, "learning_rate": 1.0654914813830572e-05, "loss": 0.6223, "step": 213200 }, { "epoch": 2.36, "learning_rate": 1.065399208669206e-05, "loss": 0.6324, "step": 213205 }, { "epoch": 2.36, "learning_rate": 1.0653069359553548e-05, "loss": 0.5527, "step": 213210 }, { "epoch": 2.36, "learning_rate": 1.0652146632415036e-05, "loss": 0.5523, "step": 213215 }, { "epoch": 2.36, "learning_rate": 1.0651223905276524e-05, "loss": 0.6534, "step": 213220 }, { "epoch": 2.36, "learning_rate": 1.0650301178138011e-05, "loss": 0.6131, "step": 213225 }, { "epoch": 2.36, "learning_rate": 1.06493784509995e-05, "loss": 0.6267, "step": 213230 }, { "epoch": 2.36, "learning_rate": 1.0648455723860987e-05, "loss": 0.6078, "step": 213235 }, { "epoch": 2.36, "learning_rate": 1.0647532996722475e-05, "loss": 0.6324, "step": 213240 }, { "epoch": 2.36, "learning_rate": 1.0646610269583961e-05, "loss": 0.5969, "step": 213245 }, { "epoch": 2.36, "learning_rate": 1.0645687542445449e-05, "loss": 0.5995, "step": 213250 }, { "epoch": 2.36, "learning_rate": 1.0644764815306935e-05, "loss": 0.5616, "step": 213255 }, { "epoch": 2.36, "learning_rate": 1.0643842088168423e-05, "loss": 0.5872, "step": 213260 }, { "epoch": 2.36, "learning_rate": 1.0642919361029911e-05, "loss": 0.5611, "step": 213265 }, { "epoch": 2.36, "learning_rate": 1.0641996633891399e-05, "loss": 0.5954, "step": 213270 }, { "epoch": 2.36, "learning_rate": 1.0641073906752887e-05, "loss": 0.6009, "step": 213275 }, { "epoch": 2.36, "learning_rate": 1.0640151179614374e-05, "loss": 0.5743, "step": 213280 }, { "epoch": 2.36, "learning_rate": 1.0639228452475862e-05, "loss": 0.5581, "step": 213285 }, { "epoch": 2.36, "learning_rate": 1.063830572533735e-05, "loss": 0.5472, "step": 213290 }, { "epoch": 2.36, "learning_rate": 1.0637382998198838e-05, "loss": 0.663, "step": 213295 }, { "epoch": 2.36, "learning_rate": 1.0636460271060326e-05, "loss": 0.6553, "step": 213300 }, { "epoch": 2.36, "learning_rate": 1.0635537543921812e-05, "loss": 0.6043, "step": 213305 }, { "epoch": 2.36, "learning_rate": 1.06346148167833e-05, "loss": 0.5711, "step": 213310 }, { "epoch": 2.36, "learning_rate": 1.0633692089644788e-05, "loss": 0.6235, "step": 213315 }, { "epoch": 2.36, "learning_rate": 1.0632769362506274e-05, "loss": 0.5894, "step": 213320 }, { "epoch": 2.36, "learning_rate": 1.0631846635367762e-05, "loss": 0.6111, "step": 213325 }, { "epoch": 2.36, "learning_rate": 1.063092390822925e-05, "loss": 0.6069, "step": 213330 }, { "epoch": 2.36, "learning_rate": 1.0630001181090737e-05, "loss": 0.6304, "step": 213335 }, { "epoch": 2.36, "learning_rate": 1.0629078453952225e-05, "loss": 0.5951, "step": 213340 }, { "epoch": 2.36, "learning_rate": 1.0628155726813713e-05, "loss": 0.5615, "step": 213345 }, { "epoch": 2.36, "learning_rate": 1.0627232999675201e-05, "loss": 0.5828, "step": 213350 }, { "epoch": 2.36, "learning_rate": 1.0626310272536689e-05, "loss": 0.5768, "step": 213355 }, { "epoch": 2.36, "learning_rate": 1.0625387545398177e-05, "loss": 0.5674, "step": 213360 }, { "epoch": 2.36, "learning_rate": 1.0624464818259663e-05, "loss": 0.6131, "step": 213365 }, { "epoch": 2.36, "learning_rate": 1.062354209112115e-05, "loss": 0.5713, "step": 213370 }, { "epoch": 2.36, "learning_rate": 1.0622619363982638e-05, "loss": 0.5676, "step": 213375 }, { "epoch": 2.36, "learning_rate": 1.0621696636844126e-05, "loss": 0.5867, "step": 213380 }, { "epoch": 2.36, "learning_rate": 1.0620773909705614e-05, "loss": 0.5739, "step": 213385 }, { "epoch": 2.36, "learning_rate": 1.0619851182567102e-05, "loss": 0.6119, "step": 213390 }, { "epoch": 2.36, "learning_rate": 1.061892845542859e-05, "loss": 0.5998, "step": 213395 }, { "epoch": 2.36, "learning_rate": 1.0618005728290076e-05, "loss": 0.5722, "step": 213400 }, { "epoch": 2.36, "learning_rate": 1.0617083001151564e-05, "loss": 0.5327, "step": 213405 }, { "epoch": 2.36, "learning_rate": 1.0616160274013052e-05, "loss": 0.6239, "step": 213410 }, { "epoch": 2.36, "learning_rate": 1.061523754687454e-05, "loss": 0.6047, "step": 213415 }, { "epoch": 2.36, "learning_rate": 1.0614314819736026e-05, "loss": 0.5786, "step": 213420 }, { "epoch": 2.36, "learning_rate": 1.0613392092597514e-05, "loss": 0.5707, "step": 213425 }, { "epoch": 2.36, "learning_rate": 1.0612469365459001e-05, "loss": 0.5727, "step": 213430 }, { "epoch": 2.36, "learning_rate": 1.061154663832049e-05, "loss": 0.5721, "step": 213435 }, { "epoch": 2.36, "learning_rate": 1.0610623911181977e-05, "loss": 0.6243, "step": 213440 }, { "epoch": 2.36, "learning_rate": 1.0609701184043465e-05, "loss": 0.5775, "step": 213445 }, { "epoch": 2.36, "learning_rate": 1.0608778456904953e-05, "loss": 0.583, "step": 213450 }, { "epoch": 2.36, "learning_rate": 1.060785572976644e-05, "loss": 0.5463, "step": 213455 }, { "epoch": 2.36, "learning_rate": 1.0606933002627929e-05, "loss": 0.6301, "step": 213460 }, { "epoch": 2.36, "learning_rate": 1.0606010275489416e-05, "loss": 0.5574, "step": 213465 }, { "epoch": 2.36, "learning_rate": 1.0605087548350903e-05, "loss": 0.6014, "step": 213470 }, { "epoch": 2.36, "learning_rate": 1.060416482121239e-05, "loss": 0.5393, "step": 213475 }, { "epoch": 2.36, "learning_rate": 1.0603242094073877e-05, "loss": 0.5607, "step": 213480 }, { "epoch": 2.36, "learning_rate": 1.0602319366935364e-05, "loss": 0.5856, "step": 213485 }, { "epoch": 2.36, "learning_rate": 1.0601396639796852e-05, "loss": 0.6104, "step": 213490 }, { "epoch": 2.36, "learning_rate": 1.060047391265834e-05, "loss": 0.5293, "step": 213495 }, { "epoch": 2.36, "learning_rate": 1.0599551185519828e-05, "loss": 0.5303, "step": 213500 }, { "epoch": 2.36, "learning_rate": 1.0598628458381316e-05, "loss": 0.5712, "step": 213505 }, { "epoch": 2.36, "learning_rate": 1.0597705731242804e-05, "loss": 0.5784, "step": 213510 }, { "epoch": 2.36, "learning_rate": 1.0596783004104291e-05, "loss": 0.5763, "step": 213515 }, { "epoch": 2.36, "learning_rate": 1.059586027696578e-05, "loss": 0.5685, "step": 213520 }, { "epoch": 2.36, "learning_rate": 1.0594937549827265e-05, "loss": 0.5811, "step": 213525 }, { "epoch": 2.36, "learning_rate": 1.0594014822688753e-05, "loss": 0.6027, "step": 213530 }, { "epoch": 2.36, "learning_rate": 1.0593092095550241e-05, "loss": 0.5876, "step": 213535 }, { "epoch": 2.36, "learning_rate": 1.0592169368411729e-05, "loss": 0.6064, "step": 213540 }, { "epoch": 2.36, "learning_rate": 1.0591246641273217e-05, "loss": 0.628, "step": 213545 }, { "epoch": 2.36, "learning_rate": 1.0590323914134703e-05, "loss": 0.6179, "step": 213550 }, { "epoch": 2.36, "learning_rate": 1.0589401186996191e-05, "loss": 0.5943, "step": 213555 }, { "epoch": 2.36, "learning_rate": 1.0588478459857679e-05, "loss": 0.6009, "step": 213560 }, { "epoch": 2.36, "learning_rate": 1.0587555732719167e-05, "loss": 0.5685, "step": 213565 }, { "epoch": 2.36, "learning_rate": 1.0586633005580654e-05, "loss": 0.6187, "step": 213570 }, { "epoch": 2.36, "learning_rate": 1.0585710278442142e-05, "loss": 0.5339, "step": 213575 }, { "epoch": 2.36, "learning_rate": 1.058478755130363e-05, "loss": 0.5987, "step": 213580 }, { "epoch": 2.36, "learning_rate": 1.0583864824165116e-05, "loss": 0.5962, "step": 213585 }, { "epoch": 2.37, "learning_rate": 1.0582942097026604e-05, "loss": 0.577, "step": 213590 }, { "epoch": 2.37, "learning_rate": 1.0582019369888092e-05, "loss": 0.5991, "step": 213595 }, { "epoch": 2.37, "learning_rate": 1.058109664274958e-05, "loss": 0.568, "step": 213600 }, { "epoch": 2.37, "learning_rate": 1.0580173915611068e-05, "loss": 0.6154, "step": 213605 }, { "epoch": 2.37, "learning_rate": 1.0579251188472555e-05, "loss": 0.6268, "step": 213610 }, { "epoch": 2.37, "learning_rate": 1.0578328461334043e-05, "loss": 0.6401, "step": 213615 }, { "epoch": 2.37, "learning_rate": 1.0577405734195531e-05, "loss": 0.5902, "step": 213620 }, { "epoch": 2.37, "learning_rate": 1.0576483007057017e-05, "loss": 0.6045, "step": 213625 }, { "epoch": 2.37, "learning_rate": 1.0575560279918505e-05, "loss": 0.6298, "step": 213630 }, { "epoch": 2.37, "learning_rate": 1.0574637552779993e-05, "loss": 0.6447, "step": 213635 }, { "epoch": 2.37, "learning_rate": 1.057371482564148e-05, "loss": 0.6004, "step": 213640 }, { "epoch": 2.37, "learning_rate": 1.0572792098502967e-05, "loss": 0.5867, "step": 213645 }, { "epoch": 2.37, "learning_rate": 1.0571869371364455e-05, "loss": 0.5898, "step": 213650 }, { "epoch": 2.37, "learning_rate": 1.0570946644225943e-05, "loss": 0.5885, "step": 213655 }, { "epoch": 2.37, "learning_rate": 1.057002391708743e-05, "loss": 0.6132, "step": 213660 }, { "epoch": 2.37, "learning_rate": 1.0569101189948918e-05, "loss": 0.5792, "step": 213665 }, { "epoch": 2.37, "learning_rate": 1.0568178462810406e-05, "loss": 0.5282, "step": 213670 }, { "epoch": 2.37, "learning_rate": 1.0567255735671894e-05, "loss": 0.5768, "step": 213675 }, { "epoch": 2.37, "learning_rate": 1.0566333008533382e-05, "loss": 0.5901, "step": 213680 }, { "epoch": 2.37, "learning_rate": 1.056541028139487e-05, "loss": 0.6153, "step": 213685 }, { "epoch": 2.37, "learning_rate": 1.0564487554256356e-05, "loss": 0.6151, "step": 213690 }, { "epoch": 2.37, "learning_rate": 1.0563564827117844e-05, "loss": 0.5349, "step": 213695 }, { "epoch": 2.37, "learning_rate": 1.056264209997933e-05, "loss": 0.5816, "step": 213700 }, { "epoch": 2.37, "learning_rate": 1.0561719372840818e-05, "loss": 0.5792, "step": 213705 }, { "epoch": 2.37, "learning_rate": 1.0560796645702306e-05, "loss": 0.5596, "step": 213710 }, { "epoch": 2.37, "learning_rate": 1.0559873918563794e-05, "loss": 0.5864, "step": 213715 }, { "epoch": 2.37, "learning_rate": 1.0558951191425281e-05, "loss": 0.6217, "step": 213720 }, { "epoch": 2.37, "learning_rate": 1.055802846428677e-05, "loss": 0.6118, "step": 213725 }, { "epoch": 2.37, "learning_rate": 1.0557105737148257e-05, "loss": 0.5991, "step": 213730 }, { "epoch": 2.37, "learning_rate": 1.0556183010009745e-05, "loss": 0.628, "step": 213735 }, { "epoch": 2.37, "learning_rate": 1.0555260282871233e-05, "loss": 0.5379, "step": 213740 }, { "epoch": 2.37, "learning_rate": 1.055433755573272e-05, "loss": 0.5608, "step": 213745 }, { "epoch": 2.37, "learning_rate": 1.0553414828594207e-05, "loss": 0.5752, "step": 213750 }, { "epoch": 2.37, "learning_rate": 1.0552492101455695e-05, "loss": 0.6256, "step": 213755 }, { "epoch": 2.37, "learning_rate": 1.0551569374317182e-05, "loss": 0.5462, "step": 213760 }, { "epoch": 2.37, "learning_rate": 1.055064664717867e-05, "loss": 0.5839, "step": 213765 }, { "epoch": 2.37, "learning_rate": 1.0549723920040158e-05, "loss": 0.6583, "step": 213770 }, { "epoch": 2.37, "learning_rate": 1.0548801192901646e-05, "loss": 0.5597, "step": 213775 }, { "epoch": 2.37, "learning_rate": 1.0547878465763132e-05, "loss": 0.6182, "step": 213780 }, { "epoch": 2.37, "learning_rate": 1.054695573862462e-05, "loss": 0.5924, "step": 213785 }, { "epoch": 2.37, "learning_rate": 1.0546033011486108e-05, "loss": 0.5996, "step": 213790 }, { "epoch": 2.37, "learning_rate": 1.0545110284347596e-05, "loss": 0.593, "step": 213795 }, { "epoch": 2.37, "learning_rate": 1.0544187557209084e-05, "loss": 0.5729, "step": 213800 }, { "epoch": 2.37, "learning_rate": 1.054326483007057e-05, "loss": 0.5899, "step": 213805 }, { "epoch": 2.37, "learning_rate": 1.0542342102932058e-05, "loss": 0.6036, "step": 213810 }, { "epoch": 2.37, "learning_rate": 1.0541419375793545e-05, "loss": 0.5647, "step": 213815 }, { "epoch": 2.37, "learning_rate": 1.0540496648655033e-05, "loss": 0.5964, "step": 213820 }, { "epoch": 2.37, "learning_rate": 1.0539573921516521e-05, "loss": 0.5642, "step": 213825 }, { "epoch": 2.37, "learning_rate": 1.0538651194378009e-05, "loss": 0.6159, "step": 213830 }, { "epoch": 2.37, "learning_rate": 1.0537728467239497e-05, "loss": 0.6016, "step": 213835 }, { "epoch": 2.37, "learning_rate": 1.0536805740100985e-05, "loss": 0.6442, "step": 213840 }, { "epoch": 2.37, "learning_rate": 1.0535883012962473e-05, "loss": 0.5398, "step": 213845 }, { "epoch": 2.37, "learning_rate": 1.053496028582396e-05, "loss": 0.6233, "step": 213850 }, { "epoch": 2.37, "learning_rate": 1.0534037558685447e-05, "loss": 0.5395, "step": 213855 }, { "epoch": 2.37, "learning_rate": 1.0533114831546934e-05, "loss": 0.5638, "step": 213860 }, { "epoch": 2.37, "learning_rate": 1.053219210440842e-05, "loss": 0.5595, "step": 213865 }, { "epoch": 2.37, "learning_rate": 1.0531269377269908e-05, "loss": 0.6038, "step": 213870 }, { "epoch": 2.37, "learning_rate": 1.0530346650131396e-05, "loss": 0.6156, "step": 213875 }, { "epoch": 2.37, "learning_rate": 1.0529423922992884e-05, "loss": 0.647, "step": 213880 }, { "epoch": 2.37, "learning_rate": 1.0528501195854372e-05, "loss": 0.6201, "step": 213885 }, { "epoch": 2.37, "learning_rate": 1.052757846871586e-05, "loss": 0.557, "step": 213890 }, { "epoch": 2.37, "learning_rate": 1.0526655741577348e-05, "loss": 0.5958, "step": 213895 }, { "epoch": 2.37, "learning_rate": 1.0525733014438835e-05, "loss": 0.6392, "step": 213900 }, { "epoch": 2.37, "learning_rate": 1.0524810287300323e-05, "loss": 0.5704, "step": 213905 }, { "epoch": 2.37, "learning_rate": 1.0523887560161811e-05, "loss": 0.6455, "step": 213910 }, { "epoch": 2.37, "learning_rate": 1.0522964833023297e-05, "loss": 0.6098, "step": 213915 }, { "epoch": 2.37, "learning_rate": 1.0522042105884785e-05, "loss": 0.6409, "step": 213920 }, { "epoch": 2.37, "learning_rate": 1.0521119378746273e-05, "loss": 0.6465, "step": 213925 }, { "epoch": 2.37, "learning_rate": 1.052019665160776e-05, "loss": 0.5999, "step": 213930 }, { "epoch": 2.37, "learning_rate": 1.0519273924469247e-05, "loss": 0.6162, "step": 213935 }, { "epoch": 2.37, "learning_rate": 1.0518351197330735e-05, "loss": 0.6086, "step": 213940 }, { "epoch": 2.37, "learning_rate": 1.0517428470192223e-05, "loss": 0.6051, "step": 213945 }, { "epoch": 2.37, "learning_rate": 1.051650574305371e-05, "loss": 0.5362, "step": 213950 }, { "epoch": 2.37, "learning_rate": 1.0515583015915198e-05, "loss": 0.6146, "step": 213955 }, { "epoch": 2.37, "learning_rate": 1.0514660288776686e-05, "loss": 0.5719, "step": 213960 }, { "epoch": 2.37, "learning_rate": 1.0513737561638174e-05, "loss": 0.5659, "step": 213965 }, { "epoch": 2.37, "learning_rate": 1.051281483449966e-05, "loss": 0.5602, "step": 213970 }, { "epoch": 2.37, "learning_rate": 1.0511892107361148e-05, "loss": 0.6295, "step": 213975 }, { "epoch": 2.37, "learning_rate": 1.0510969380222636e-05, "loss": 0.5856, "step": 213980 }, { "epoch": 2.37, "learning_rate": 1.0510046653084124e-05, "loss": 0.6112, "step": 213985 }, { "epoch": 2.37, "learning_rate": 1.0509123925945612e-05, "loss": 0.5358, "step": 213990 }, { "epoch": 2.37, "learning_rate": 1.05082011988071e-05, "loss": 0.5931, "step": 213995 }, { "epoch": 2.37, "learning_rate": 1.0507278471668587e-05, "loss": 0.5748, "step": 214000 }, { "epoch": 2.37, "eval_loss": 0.573622465133667, "eval_runtime": 69.1241, "eval_samples_per_second": 28.933, "eval_steps_per_second": 14.467, "step": 214000 }, { "epoch": 2.37, "learning_rate": 1.0506355744530074e-05, "loss": 0.5752, "step": 214005 }, { "epoch": 2.37, "learning_rate": 1.0505433017391561e-05, "loss": 0.571, "step": 214010 }, { "epoch": 2.37, "learning_rate": 1.050451029025305e-05, "loss": 0.6275, "step": 214015 }, { "epoch": 2.37, "learning_rate": 1.0503587563114537e-05, "loss": 0.602, "step": 214020 }, { "epoch": 2.37, "learning_rate": 1.0502664835976025e-05, "loss": 0.5836, "step": 214025 }, { "epoch": 2.37, "learning_rate": 1.0501742108837511e-05, "loss": 0.5989, "step": 214030 }, { "epoch": 2.37, "learning_rate": 1.0500819381698999e-05, "loss": 0.5761, "step": 214035 }, { "epoch": 2.37, "learning_rate": 1.0499896654560487e-05, "loss": 0.6398, "step": 214040 }, { "epoch": 2.37, "learning_rate": 1.0498973927421975e-05, "loss": 0.592, "step": 214045 }, { "epoch": 2.37, "learning_rate": 1.0498051200283462e-05, "loss": 0.5638, "step": 214050 }, { "epoch": 2.37, "learning_rate": 1.049712847314495e-05, "loss": 0.5693, "step": 214055 }, { "epoch": 2.37, "learning_rate": 1.0496205746006438e-05, "loss": 0.5877, "step": 214060 }, { "epoch": 2.37, "learning_rate": 1.0495283018867926e-05, "loss": 0.5852, "step": 214065 }, { "epoch": 2.37, "learning_rate": 1.0494360291729414e-05, "loss": 0.5684, "step": 214070 }, { "epoch": 2.37, "learning_rate": 1.04934375645909e-05, "loss": 0.6231, "step": 214075 }, { "epoch": 2.37, "learning_rate": 1.0492514837452388e-05, "loss": 0.6338, "step": 214080 }, { "epoch": 2.37, "learning_rate": 1.0491592110313874e-05, "loss": 0.6136, "step": 214085 }, { "epoch": 2.37, "learning_rate": 1.0490669383175362e-05, "loss": 0.6401, "step": 214090 }, { "epoch": 2.37, "learning_rate": 1.048974665603685e-05, "loss": 0.6068, "step": 214095 }, { "epoch": 2.37, "learning_rate": 1.0488823928898338e-05, "loss": 0.5933, "step": 214100 }, { "epoch": 2.37, "learning_rate": 1.0487901201759825e-05, "loss": 0.5876, "step": 214105 }, { "epoch": 2.37, "learning_rate": 1.0486978474621313e-05, "loss": 0.6318, "step": 214110 }, { "epoch": 2.37, "learning_rate": 1.0486055747482801e-05, "loss": 0.5871, "step": 214115 }, { "epoch": 2.37, "learning_rate": 1.0485133020344289e-05, "loss": 0.5782, "step": 214120 }, { "epoch": 2.37, "learning_rate": 1.0484210293205777e-05, "loss": 0.6278, "step": 214125 }, { "epoch": 2.37, "learning_rate": 1.0483287566067265e-05, "loss": 0.5842, "step": 214130 }, { "epoch": 2.37, "learning_rate": 1.048236483892875e-05, "loss": 0.5515, "step": 214135 }, { "epoch": 2.37, "learning_rate": 1.0481442111790239e-05, "loss": 0.5801, "step": 214140 }, { "epoch": 2.37, "learning_rate": 1.0480519384651727e-05, "loss": 0.6289, "step": 214145 }, { "epoch": 2.37, "learning_rate": 1.0479596657513214e-05, "loss": 0.6086, "step": 214150 }, { "epoch": 2.37, "learning_rate": 1.04786739303747e-05, "loss": 0.6365, "step": 214155 }, { "epoch": 2.37, "learning_rate": 1.0477751203236188e-05, "loss": 0.6097, "step": 214160 }, { "epoch": 2.37, "learning_rate": 1.0476828476097676e-05, "loss": 0.6285, "step": 214165 }, { "epoch": 2.37, "learning_rate": 1.0475905748959164e-05, "loss": 0.5726, "step": 214170 }, { "epoch": 2.37, "learning_rate": 1.0474983021820652e-05, "loss": 0.5867, "step": 214175 }, { "epoch": 2.37, "learning_rate": 1.047406029468214e-05, "loss": 0.5661, "step": 214180 }, { "epoch": 2.37, "learning_rate": 1.0473137567543628e-05, "loss": 0.5984, "step": 214185 }, { "epoch": 2.37, "learning_rate": 1.0472214840405114e-05, "loss": 0.6485, "step": 214190 }, { "epoch": 2.37, "learning_rate": 1.0471292113266602e-05, "loss": 0.5558, "step": 214195 }, { "epoch": 2.37, "learning_rate": 1.047036938612809e-05, "loss": 0.6438, "step": 214200 }, { "epoch": 2.37, "learning_rate": 1.0469446658989577e-05, "loss": 0.6051, "step": 214205 }, { "epoch": 2.37, "learning_rate": 1.0468523931851065e-05, "loss": 0.6044, "step": 214210 }, { "epoch": 2.37, "learning_rate": 1.0467601204712553e-05, "loss": 0.6058, "step": 214215 }, { "epoch": 2.37, "learning_rate": 1.046667847757404e-05, "loss": 0.6252, "step": 214220 }, { "epoch": 2.37, "learning_rate": 1.0465755750435529e-05, "loss": 0.5469, "step": 214225 }, { "epoch": 2.37, "learning_rate": 1.0464833023297017e-05, "loss": 0.5965, "step": 214230 }, { "epoch": 2.37, "learning_rate": 1.0463910296158503e-05, "loss": 0.6037, "step": 214235 }, { "epoch": 2.37, "learning_rate": 1.046298756901999e-05, "loss": 0.5468, "step": 214240 }, { "epoch": 2.37, "learning_rate": 1.0462064841881478e-05, "loss": 0.6095, "step": 214245 }, { "epoch": 2.37, "learning_rate": 1.0461142114742965e-05, "loss": 0.5847, "step": 214250 }, { "epoch": 2.37, "learning_rate": 1.0460219387604452e-05, "loss": 0.5733, "step": 214255 }, { "epoch": 2.37, "learning_rate": 1.045929666046594e-05, "loss": 0.6006, "step": 214260 }, { "epoch": 2.37, "learning_rate": 1.0458373933327428e-05, "loss": 0.6037, "step": 214265 }, { "epoch": 2.37, "learning_rate": 1.0457451206188916e-05, "loss": 0.6195, "step": 214270 }, { "epoch": 2.37, "learning_rate": 1.0456528479050404e-05, "loss": 0.55, "step": 214275 }, { "epoch": 2.37, "learning_rate": 1.0455605751911892e-05, "loss": 0.5975, "step": 214280 }, { "epoch": 2.37, "learning_rate": 1.045468302477338e-05, "loss": 0.5458, "step": 214285 }, { "epoch": 2.37, "learning_rate": 1.0453760297634867e-05, "loss": 0.6233, "step": 214290 }, { "epoch": 2.37, "learning_rate": 1.0452837570496355e-05, "loss": 0.5836, "step": 214295 }, { "epoch": 2.37, "learning_rate": 1.0451914843357841e-05, "loss": 0.5722, "step": 214300 }, { "epoch": 2.37, "learning_rate": 1.045099211621933e-05, "loss": 0.5608, "step": 214305 }, { "epoch": 2.37, "learning_rate": 1.0450069389080815e-05, "loss": 0.6095, "step": 214310 }, { "epoch": 2.37, "learning_rate": 1.0449146661942303e-05, "loss": 0.5987, "step": 214315 }, { "epoch": 2.37, "learning_rate": 1.0448223934803791e-05, "loss": 0.5664, "step": 214320 }, { "epoch": 2.37, "learning_rate": 1.0447301207665279e-05, "loss": 0.5334, "step": 214325 }, { "epoch": 2.37, "learning_rate": 1.0446378480526767e-05, "loss": 0.6406, "step": 214330 }, { "epoch": 2.37, "learning_rate": 1.0445455753388255e-05, "loss": 0.5882, "step": 214335 }, { "epoch": 2.37, "learning_rate": 1.0444533026249742e-05, "loss": 0.6253, "step": 214340 }, { "epoch": 2.37, "learning_rate": 1.044361029911123e-05, "loss": 0.6152, "step": 214345 }, { "epoch": 2.37, "learning_rate": 1.0442687571972718e-05, "loss": 0.595, "step": 214350 }, { "epoch": 2.37, "learning_rate": 1.0441764844834204e-05, "loss": 0.5827, "step": 214355 }, { "epoch": 2.37, "learning_rate": 1.0440842117695692e-05, "loss": 0.6385, "step": 214360 }, { "epoch": 2.37, "learning_rate": 1.043991939055718e-05, "loss": 0.621, "step": 214365 }, { "epoch": 2.37, "learning_rate": 1.0438996663418668e-05, "loss": 0.5854, "step": 214370 }, { "epoch": 2.37, "learning_rate": 1.0438073936280156e-05, "loss": 0.5783, "step": 214375 }, { "epoch": 2.37, "learning_rate": 1.0437151209141644e-05, "loss": 0.5676, "step": 214380 }, { "epoch": 2.37, "learning_rate": 1.043622848200313e-05, "loss": 0.5928, "step": 214385 }, { "epoch": 2.37, "learning_rate": 1.0435305754864618e-05, "loss": 0.6207, "step": 214390 }, { "epoch": 2.37, "learning_rate": 1.0434383027726105e-05, "loss": 0.5772, "step": 214395 }, { "epoch": 2.37, "learning_rate": 1.0433460300587593e-05, "loss": 0.5834, "step": 214400 }, { "epoch": 2.37, "learning_rate": 1.0432537573449081e-05, "loss": 0.565, "step": 214405 }, { "epoch": 2.37, "learning_rate": 1.0431614846310569e-05, "loss": 0.592, "step": 214410 }, { "epoch": 2.37, "learning_rate": 1.0430692119172055e-05, "loss": 0.6068, "step": 214415 }, { "epoch": 2.37, "learning_rate": 1.0429769392033543e-05, "loss": 0.6327, "step": 214420 }, { "epoch": 2.37, "learning_rate": 1.042884666489503e-05, "loss": 0.5999, "step": 214425 }, { "epoch": 2.37, "learning_rate": 1.0427923937756519e-05, "loss": 0.5968, "step": 214430 }, { "epoch": 2.37, "learning_rate": 1.0427001210618006e-05, "loss": 0.6721, "step": 214435 }, { "epoch": 2.37, "learning_rate": 1.0426078483479494e-05, "loss": 0.5434, "step": 214440 }, { "epoch": 2.37, "learning_rate": 1.0425155756340982e-05, "loss": 0.6099, "step": 214445 }, { "epoch": 2.37, "learning_rate": 1.042423302920247e-05, "loss": 0.5695, "step": 214450 }, { "epoch": 2.37, "learning_rate": 1.0423310302063958e-05, "loss": 0.564, "step": 214455 }, { "epoch": 2.37, "learning_rate": 1.0422387574925444e-05, "loss": 0.6084, "step": 214460 }, { "epoch": 2.37, "learning_rate": 1.0421464847786932e-05, "loss": 0.5751, "step": 214465 }, { "epoch": 2.37, "learning_rate": 1.0420542120648418e-05, "loss": 0.5668, "step": 214470 }, { "epoch": 2.37, "learning_rate": 1.0419619393509906e-05, "loss": 0.5678, "step": 214475 }, { "epoch": 2.37, "learning_rate": 1.0418696666371394e-05, "loss": 0.5479, "step": 214480 }, { "epoch": 2.37, "learning_rate": 1.0417773939232882e-05, "loss": 0.5896, "step": 214485 }, { "epoch": 2.37, "learning_rate": 1.041685121209437e-05, "loss": 0.5803, "step": 214490 }, { "epoch": 2.38, "learning_rate": 1.0415928484955857e-05, "loss": 0.6078, "step": 214495 }, { "epoch": 2.38, "learning_rate": 1.0415005757817345e-05, "loss": 0.5523, "step": 214500 }, { "epoch": 2.38, "learning_rate": 1.0414083030678833e-05, "loss": 0.6354, "step": 214505 }, { "epoch": 2.38, "learning_rate": 1.041316030354032e-05, "loss": 0.5929, "step": 214510 }, { "epoch": 2.38, "learning_rate": 1.0412237576401809e-05, "loss": 0.585, "step": 214515 }, { "epoch": 2.38, "learning_rate": 1.0411314849263295e-05, "loss": 0.5915, "step": 214520 }, { "epoch": 2.38, "learning_rate": 1.0410392122124783e-05, "loss": 0.6401, "step": 214525 }, { "epoch": 2.38, "learning_rate": 1.040946939498627e-05, "loss": 0.6169, "step": 214530 }, { "epoch": 2.38, "learning_rate": 1.0408546667847757e-05, "loss": 0.5621, "step": 214535 }, { "epoch": 2.38, "learning_rate": 1.0407623940709245e-05, "loss": 0.6001, "step": 214540 }, { "epoch": 2.38, "learning_rate": 1.0406701213570732e-05, "loss": 0.6026, "step": 214545 }, { "epoch": 2.38, "learning_rate": 1.040577848643222e-05, "loss": 0.5602, "step": 214550 }, { "epoch": 2.38, "learning_rate": 1.0404855759293708e-05, "loss": 0.6101, "step": 214555 }, { "epoch": 2.38, "learning_rate": 1.0403933032155196e-05, "loss": 0.5692, "step": 214560 }, { "epoch": 2.38, "learning_rate": 1.0403010305016684e-05, "loss": 0.6204, "step": 214565 }, { "epoch": 2.38, "learning_rate": 1.0402087577878172e-05, "loss": 0.5669, "step": 214570 }, { "epoch": 2.38, "learning_rate": 1.040116485073966e-05, "loss": 0.6038, "step": 214575 }, { "epoch": 2.38, "learning_rate": 1.0400242123601146e-05, "loss": 0.6132, "step": 214580 }, { "epoch": 2.38, "learning_rate": 1.0399319396462633e-05, "loss": 0.5618, "step": 214585 }, { "epoch": 2.38, "learning_rate": 1.0398396669324121e-05, "loss": 0.6533, "step": 214590 }, { "epoch": 2.38, "learning_rate": 1.039747394218561e-05, "loss": 0.5646, "step": 214595 }, { "epoch": 2.38, "learning_rate": 1.0396551215047097e-05, "loss": 0.5724, "step": 214600 }, { "epoch": 2.38, "learning_rate": 1.0395628487908585e-05, "loss": 0.6021, "step": 214605 }, { "epoch": 2.38, "learning_rate": 1.0394705760770071e-05, "loss": 0.5627, "step": 214610 }, { "epoch": 2.38, "learning_rate": 1.0393783033631559e-05, "loss": 0.5859, "step": 214615 }, { "epoch": 2.38, "learning_rate": 1.0392860306493047e-05, "loss": 0.5775, "step": 214620 }, { "epoch": 2.38, "learning_rate": 1.0391937579354535e-05, "loss": 0.5707, "step": 214625 }, { "epoch": 2.38, "learning_rate": 1.0391014852216022e-05, "loss": 0.6044, "step": 214630 }, { "epoch": 2.38, "learning_rate": 1.0390092125077509e-05, "loss": 0.6029, "step": 214635 }, { "epoch": 2.38, "learning_rate": 1.0389169397938996e-05, "loss": 0.5974, "step": 214640 }, { "epoch": 2.38, "learning_rate": 1.0388246670800484e-05, "loss": 0.5796, "step": 214645 }, { "epoch": 2.38, "learning_rate": 1.0387323943661972e-05, "loss": 0.531, "step": 214650 }, { "epoch": 2.38, "learning_rate": 1.038640121652346e-05, "loss": 0.597, "step": 214655 }, { "epoch": 2.38, "learning_rate": 1.0385478489384948e-05, "loss": 0.5873, "step": 214660 }, { "epoch": 2.38, "learning_rate": 1.0384555762246436e-05, "loss": 0.5269, "step": 214665 }, { "epoch": 2.38, "learning_rate": 1.0383633035107924e-05, "loss": 0.6192, "step": 214670 }, { "epoch": 2.38, "learning_rate": 1.0382710307969411e-05, "loss": 0.5888, "step": 214675 }, { "epoch": 2.38, "learning_rate": 1.03817875808309e-05, "loss": 0.6001, "step": 214680 }, { "epoch": 2.38, "learning_rate": 1.0380864853692385e-05, "loss": 0.5458, "step": 214685 }, { "epoch": 2.38, "learning_rate": 1.0379942126553873e-05, "loss": 0.5906, "step": 214690 }, { "epoch": 2.38, "learning_rate": 1.037901939941536e-05, "loss": 0.5703, "step": 214695 }, { "epoch": 2.38, "learning_rate": 1.0378096672276847e-05, "loss": 0.5679, "step": 214700 }, { "epoch": 2.38, "learning_rate": 1.0377173945138335e-05, "loss": 0.5801, "step": 214705 }, { "epoch": 2.38, "learning_rate": 1.0376251217999823e-05, "loss": 0.5947, "step": 214710 }, { "epoch": 2.38, "learning_rate": 1.037532849086131e-05, "loss": 0.5893, "step": 214715 }, { "epoch": 2.38, "learning_rate": 1.0374405763722799e-05, "loss": 0.5932, "step": 214720 }, { "epoch": 2.38, "learning_rate": 1.0373483036584286e-05, "loss": 0.597, "step": 214725 }, { "epoch": 2.38, "learning_rate": 1.0372560309445774e-05, "loss": 0.5485, "step": 214730 }, { "epoch": 2.38, "learning_rate": 1.0371637582307262e-05, "loss": 0.5411, "step": 214735 }, { "epoch": 2.38, "learning_rate": 1.0370714855168748e-05, "loss": 0.6098, "step": 214740 }, { "epoch": 2.38, "learning_rate": 1.0369792128030236e-05, "loss": 0.5794, "step": 214745 }, { "epoch": 2.38, "learning_rate": 1.0368869400891724e-05, "loss": 0.5854, "step": 214750 }, { "epoch": 2.38, "learning_rate": 1.0367946673753212e-05, "loss": 0.5707, "step": 214755 }, { "epoch": 2.38, "learning_rate": 1.03670239466147e-05, "loss": 0.5922, "step": 214760 }, { "epoch": 2.38, "learning_rate": 1.0366101219476186e-05, "loss": 0.6202, "step": 214765 }, { "epoch": 2.38, "learning_rate": 1.0365178492337674e-05, "loss": 0.693, "step": 214770 }, { "epoch": 2.38, "learning_rate": 1.0364255765199162e-05, "loss": 0.5645, "step": 214775 }, { "epoch": 2.38, "learning_rate": 1.036333303806065e-05, "loss": 0.5926, "step": 214780 }, { "epoch": 2.38, "learning_rate": 1.0362410310922137e-05, "loss": 0.6218, "step": 214785 }, { "epoch": 2.38, "learning_rate": 1.0361487583783625e-05, "loss": 0.5612, "step": 214790 }, { "epoch": 2.38, "learning_rate": 1.0360564856645113e-05, "loss": 0.5021, "step": 214795 }, { "epoch": 2.38, "learning_rate": 1.0359642129506599e-05, "loss": 0.5986, "step": 214800 }, { "epoch": 2.38, "learning_rate": 1.0358719402368087e-05, "loss": 0.5701, "step": 214805 }, { "epoch": 2.38, "learning_rate": 1.0357796675229575e-05, "loss": 0.5846, "step": 214810 }, { "epoch": 2.38, "learning_rate": 1.0356873948091063e-05, "loss": 0.6276, "step": 214815 }, { "epoch": 2.38, "learning_rate": 1.035595122095255e-05, "loss": 0.5763, "step": 214820 }, { "epoch": 2.38, "learning_rate": 1.0355028493814038e-05, "loss": 0.6178, "step": 214825 }, { "epoch": 2.38, "learning_rate": 1.0354105766675526e-05, "loss": 0.5768, "step": 214830 }, { "epoch": 2.38, "learning_rate": 1.0353183039537014e-05, "loss": 0.6527, "step": 214835 }, { "epoch": 2.38, "learning_rate": 1.03522603123985e-05, "loss": 0.6129, "step": 214840 }, { "epoch": 2.38, "learning_rate": 1.0351337585259988e-05, "loss": 0.5936, "step": 214845 }, { "epoch": 2.38, "learning_rate": 1.0350414858121476e-05, "loss": 0.5146, "step": 214850 }, { "epoch": 2.38, "learning_rate": 1.0349492130982962e-05, "loss": 0.5983, "step": 214855 }, { "epoch": 2.38, "learning_rate": 1.034856940384445e-05, "loss": 0.5831, "step": 214860 }, { "epoch": 2.38, "learning_rate": 1.0347646676705938e-05, "loss": 0.6113, "step": 214865 }, { "epoch": 2.38, "learning_rate": 1.0346723949567426e-05, "loss": 0.5578, "step": 214870 }, { "epoch": 2.38, "learning_rate": 1.0345801222428913e-05, "loss": 0.5776, "step": 214875 }, { "epoch": 2.38, "learning_rate": 1.0344878495290401e-05, "loss": 0.5479, "step": 214880 }, { "epoch": 2.38, "learning_rate": 1.0343955768151889e-05, "loss": 0.5894, "step": 214885 }, { "epoch": 2.38, "learning_rate": 1.0343033041013377e-05, "loss": 0.5792, "step": 214890 }, { "epoch": 2.38, "learning_rate": 1.0342110313874865e-05, "loss": 0.5496, "step": 214895 }, { "epoch": 2.38, "learning_rate": 1.0341187586736353e-05, "loss": 0.6005, "step": 214900 }, { "epoch": 2.38, "learning_rate": 1.0340264859597839e-05, "loss": 0.5568, "step": 214905 }, { "epoch": 2.38, "learning_rate": 1.0339342132459327e-05, "loss": 0.5904, "step": 214910 }, { "epoch": 2.38, "learning_rate": 1.0338419405320813e-05, "loss": 0.632, "step": 214915 }, { "epoch": 2.38, "learning_rate": 1.03374966781823e-05, "loss": 0.5272, "step": 214920 }, { "epoch": 2.38, "learning_rate": 1.0336573951043789e-05, "loss": 0.581, "step": 214925 }, { "epoch": 2.38, "learning_rate": 1.0335651223905276e-05, "loss": 0.6032, "step": 214930 }, { "epoch": 2.38, "learning_rate": 1.0334728496766764e-05, "loss": 0.5584, "step": 214935 }, { "epoch": 2.38, "learning_rate": 1.0333805769628252e-05, "loss": 0.6114, "step": 214940 }, { "epoch": 2.38, "learning_rate": 1.033288304248974e-05, "loss": 0.5908, "step": 214945 }, { "epoch": 2.38, "learning_rate": 1.0331960315351228e-05, "loss": 0.647, "step": 214950 }, { "epoch": 2.38, "learning_rate": 1.0331037588212716e-05, "loss": 0.6062, "step": 214955 }, { "epoch": 2.38, "learning_rate": 1.0330114861074203e-05, "loss": 0.5736, "step": 214960 }, { "epoch": 2.38, "learning_rate": 1.032919213393569e-05, "loss": 0.6047, "step": 214965 }, { "epoch": 2.38, "learning_rate": 1.0328269406797177e-05, "loss": 0.5673, "step": 214970 }, { "epoch": 2.38, "learning_rate": 1.0327346679658665e-05, "loss": 0.57, "step": 214975 }, { "epoch": 2.38, "learning_rate": 1.0326423952520153e-05, "loss": 0.5797, "step": 214980 }, { "epoch": 2.38, "learning_rate": 1.0325501225381641e-05, "loss": 0.6267, "step": 214985 }, { "epoch": 2.38, "learning_rate": 1.0324578498243127e-05, "loss": 0.5767, "step": 214990 }, { "epoch": 2.38, "learning_rate": 1.0323655771104615e-05, "loss": 0.587, "step": 214995 }, { "epoch": 2.38, "learning_rate": 1.0322733043966103e-05, "loss": 0.5818, "step": 215000 }, { "epoch": 2.38, "eval_loss": 0.5599018931388855, "eval_runtime": 69.7047, "eval_samples_per_second": 28.692, "eval_steps_per_second": 14.346, "step": 215000 }, { "epoch": 2.38, "learning_rate": 1.032181031682759e-05, "loss": 0.6087, "step": 215005 }, { "epoch": 2.38, "learning_rate": 1.0320887589689079e-05, "loss": 0.5848, "step": 215010 }, { "epoch": 2.38, "learning_rate": 1.0319964862550566e-05, "loss": 0.6386, "step": 215015 }, { "epoch": 2.38, "learning_rate": 1.0319042135412053e-05, "loss": 0.5744, "step": 215020 }, { "epoch": 2.38, "learning_rate": 1.031811940827354e-05, "loss": 0.5831, "step": 215025 }, { "epoch": 2.38, "learning_rate": 1.0317196681135028e-05, "loss": 0.5865, "step": 215030 }, { "epoch": 2.38, "learning_rate": 1.0316273953996516e-05, "loss": 0.642, "step": 215035 }, { "epoch": 2.38, "learning_rate": 1.0315351226858004e-05, "loss": 0.5838, "step": 215040 }, { "epoch": 2.38, "learning_rate": 1.0314428499719492e-05, "loss": 0.5874, "step": 215045 }, { "epoch": 2.38, "learning_rate": 1.031350577258098e-05, "loss": 0.6271, "step": 215050 }, { "epoch": 2.38, "learning_rate": 1.0312583045442468e-05, "loss": 0.6178, "step": 215055 }, { "epoch": 2.38, "learning_rate": 1.0311660318303955e-05, "loss": 0.5692, "step": 215060 }, { "epoch": 2.38, "learning_rate": 1.0310737591165443e-05, "loss": 0.5885, "step": 215065 }, { "epoch": 2.38, "learning_rate": 1.030981486402693e-05, "loss": 0.6063, "step": 215070 }, { "epoch": 2.38, "learning_rate": 1.0308892136888417e-05, "loss": 0.5441, "step": 215075 }, { "epoch": 2.38, "learning_rate": 1.0307969409749903e-05, "loss": 0.5496, "step": 215080 }, { "epoch": 2.38, "learning_rate": 1.0307046682611391e-05, "loss": 0.5642, "step": 215085 }, { "epoch": 2.38, "learning_rate": 1.0306123955472879e-05, "loss": 0.6122, "step": 215090 }, { "epoch": 2.38, "learning_rate": 1.0305201228334367e-05, "loss": 0.6469, "step": 215095 }, { "epoch": 2.38, "learning_rate": 1.0304278501195855e-05, "loss": 0.5561, "step": 215100 }, { "epoch": 2.38, "learning_rate": 1.0303355774057343e-05, "loss": 0.6198, "step": 215105 }, { "epoch": 2.38, "learning_rate": 1.030243304691883e-05, "loss": 0.5743, "step": 215110 }, { "epoch": 2.38, "learning_rate": 1.0301510319780318e-05, "loss": 0.5633, "step": 215115 }, { "epoch": 2.38, "learning_rate": 1.0300587592641806e-05, "loss": 0.5327, "step": 215120 }, { "epoch": 2.38, "learning_rate": 1.0299664865503294e-05, "loss": 0.5284, "step": 215125 }, { "epoch": 2.38, "learning_rate": 1.029874213836478e-05, "loss": 0.6064, "step": 215130 }, { "epoch": 2.38, "learning_rate": 1.0297819411226268e-05, "loss": 0.6128, "step": 215135 }, { "epoch": 2.38, "learning_rate": 1.0296896684087756e-05, "loss": 0.591, "step": 215140 }, { "epoch": 2.38, "learning_rate": 1.0295973956949242e-05, "loss": 0.6085, "step": 215145 }, { "epoch": 2.38, "learning_rate": 1.029505122981073e-05, "loss": 0.6159, "step": 215150 }, { "epoch": 2.38, "learning_rate": 1.0294128502672218e-05, "loss": 0.6019, "step": 215155 }, { "epoch": 2.38, "learning_rate": 1.0293205775533706e-05, "loss": 0.6448, "step": 215160 }, { "epoch": 2.38, "learning_rate": 1.0292283048395193e-05, "loss": 0.5166, "step": 215165 }, { "epoch": 2.38, "learning_rate": 1.0291360321256681e-05, "loss": 0.5584, "step": 215170 }, { "epoch": 2.38, "learning_rate": 1.0290437594118169e-05, "loss": 0.5917, "step": 215175 }, { "epoch": 2.38, "learning_rate": 1.0289514866979657e-05, "loss": 0.5928, "step": 215180 }, { "epoch": 2.38, "learning_rate": 1.0288592139841143e-05, "loss": 0.6237, "step": 215185 }, { "epoch": 2.38, "learning_rate": 1.0287669412702631e-05, "loss": 0.5922, "step": 215190 }, { "epoch": 2.38, "learning_rate": 1.0286746685564119e-05, "loss": 0.6289, "step": 215195 }, { "epoch": 2.38, "learning_rate": 1.0285823958425607e-05, "loss": 0.571, "step": 215200 }, { "epoch": 2.38, "learning_rate": 1.0284901231287095e-05, "loss": 0.57, "step": 215205 }, { "epoch": 2.38, "learning_rate": 1.0283978504148582e-05, "loss": 0.5065, "step": 215210 }, { "epoch": 2.38, "learning_rate": 1.028305577701007e-05, "loss": 0.6236, "step": 215215 }, { "epoch": 2.38, "learning_rate": 1.0282133049871556e-05, "loss": 0.5861, "step": 215220 }, { "epoch": 2.38, "learning_rate": 1.0281210322733044e-05, "loss": 0.5702, "step": 215225 }, { "epoch": 2.38, "learning_rate": 1.0280287595594532e-05, "loss": 0.5151, "step": 215230 }, { "epoch": 2.38, "learning_rate": 1.027936486845602e-05, "loss": 0.5667, "step": 215235 }, { "epoch": 2.38, "learning_rate": 1.0278442141317508e-05, "loss": 0.5913, "step": 215240 }, { "epoch": 2.38, "learning_rate": 1.0277519414178994e-05, "loss": 0.5836, "step": 215245 }, { "epoch": 2.38, "learning_rate": 1.0276596687040482e-05, "loss": 0.5875, "step": 215250 }, { "epoch": 2.38, "learning_rate": 1.027567395990197e-05, "loss": 0.6066, "step": 215255 }, { "epoch": 2.38, "learning_rate": 1.0274751232763457e-05, "loss": 0.5664, "step": 215260 }, { "epoch": 2.38, "learning_rate": 1.0273828505624945e-05, "loss": 0.595, "step": 215265 }, { "epoch": 2.38, "learning_rate": 1.0272905778486433e-05, "loss": 0.5619, "step": 215270 }, { "epoch": 2.38, "learning_rate": 1.0271983051347921e-05, "loss": 0.5664, "step": 215275 }, { "epoch": 2.38, "learning_rate": 1.0271060324209409e-05, "loss": 0.5982, "step": 215280 }, { "epoch": 2.38, "learning_rate": 1.0270137597070897e-05, "loss": 0.5977, "step": 215285 }, { "epoch": 2.38, "learning_rate": 1.0269214869932383e-05, "loss": 0.5973, "step": 215290 }, { "epoch": 2.38, "learning_rate": 1.026829214279387e-05, "loss": 0.617, "step": 215295 }, { "epoch": 2.38, "learning_rate": 1.0267369415655357e-05, "loss": 0.6513, "step": 215300 }, { "epoch": 2.38, "learning_rate": 1.0266446688516845e-05, "loss": 0.5555, "step": 215305 }, { "epoch": 2.38, "learning_rate": 1.0265523961378333e-05, "loss": 0.5901, "step": 215310 }, { "epoch": 2.38, "learning_rate": 1.026460123423982e-05, "loss": 0.5825, "step": 215315 }, { "epoch": 2.38, "learning_rate": 1.0263678507101308e-05, "loss": 0.5682, "step": 215320 }, { "epoch": 2.38, "learning_rate": 1.0262755779962796e-05, "loss": 0.5662, "step": 215325 }, { "epoch": 2.38, "learning_rate": 1.0261833052824284e-05, "loss": 0.6033, "step": 215330 }, { "epoch": 2.38, "learning_rate": 1.0260910325685772e-05, "loss": 0.5909, "step": 215335 }, { "epoch": 2.38, "learning_rate": 1.025998759854726e-05, "loss": 0.6226, "step": 215340 }, { "epoch": 2.38, "learning_rate": 1.0259064871408748e-05, "loss": 0.5473, "step": 215345 }, { "epoch": 2.38, "learning_rate": 1.0258142144270234e-05, "loss": 0.5909, "step": 215350 }, { "epoch": 2.38, "learning_rate": 1.0257219417131722e-05, "loss": 0.5635, "step": 215355 }, { "epoch": 2.38, "learning_rate": 1.025629668999321e-05, "loss": 0.6102, "step": 215360 }, { "epoch": 2.38, "learning_rate": 1.0255373962854697e-05, "loss": 0.6681, "step": 215365 }, { "epoch": 2.38, "learning_rate": 1.0254451235716183e-05, "loss": 0.5754, "step": 215370 }, { "epoch": 2.38, "learning_rate": 1.0253528508577671e-05, "loss": 0.5417, "step": 215375 }, { "epoch": 2.38, "learning_rate": 1.0252605781439159e-05, "loss": 0.5521, "step": 215380 }, { "epoch": 2.38, "learning_rate": 1.0251683054300647e-05, "loss": 0.5967, "step": 215385 }, { "epoch": 2.38, "learning_rate": 1.0250760327162135e-05, "loss": 0.6363, "step": 215390 }, { "epoch": 2.39, "learning_rate": 1.0249837600023623e-05, "loss": 0.5863, "step": 215395 }, { "epoch": 2.39, "learning_rate": 1.024891487288511e-05, "loss": 0.6414, "step": 215400 }, { "epoch": 2.39, "learning_rate": 1.0247992145746597e-05, "loss": 0.5903, "step": 215405 }, { "epoch": 2.39, "learning_rate": 1.0247069418608084e-05, "loss": 0.5776, "step": 215410 }, { "epoch": 2.39, "learning_rate": 1.0246146691469572e-05, "loss": 0.5652, "step": 215415 }, { "epoch": 2.39, "learning_rate": 1.024522396433106e-05, "loss": 0.5709, "step": 215420 }, { "epoch": 2.39, "learning_rate": 1.0244301237192548e-05, "loss": 0.5711, "step": 215425 }, { "epoch": 2.39, "learning_rate": 1.0243378510054036e-05, "loss": 0.6577, "step": 215430 }, { "epoch": 2.39, "learning_rate": 1.0242455782915524e-05, "loss": 0.5325, "step": 215435 }, { "epoch": 2.39, "learning_rate": 1.0241533055777012e-05, "loss": 0.6315, "step": 215440 }, { "epoch": 2.39, "learning_rate": 1.0240610328638498e-05, "loss": 0.5926, "step": 215445 }, { "epoch": 2.39, "learning_rate": 1.0239687601499986e-05, "loss": 0.616, "step": 215450 }, { "epoch": 2.39, "learning_rate": 1.0238764874361473e-05, "loss": 0.596, "step": 215455 }, { "epoch": 2.39, "learning_rate": 1.0237842147222961e-05, "loss": 0.5681, "step": 215460 }, { "epoch": 2.39, "learning_rate": 1.0236919420084447e-05, "loss": 0.6205, "step": 215465 }, { "epoch": 2.39, "learning_rate": 1.0235996692945935e-05, "loss": 0.5878, "step": 215470 }, { "epoch": 2.39, "learning_rate": 1.0235073965807423e-05, "loss": 0.6063, "step": 215475 }, { "epoch": 2.39, "learning_rate": 1.0234151238668911e-05, "loss": 0.6089, "step": 215480 }, { "epoch": 2.39, "learning_rate": 1.0233228511530399e-05, "loss": 0.6116, "step": 215485 }, { "epoch": 2.39, "learning_rate": 1.0232305784391887e-05, "loss": 0.5516, "step": 215490 }, { "epoch": 2.39, "learning_rate": 1.0231383057253374e-05, "loss": 0.564, "step": 215495 }, { "epoch": 2.39, "learning_rate": 1.0230460330114862e-05, "loss": 0.6128, "step": 215500 }, { "epoch": 2.39, "learning_rate": 1.022953760297635e-05, "loss": 0.5842, "step": 215505 }, { "epoch": 2.39, "learning_rate": 1.0228614875837838e-05, "loss": 0.5632, "step": 215510 }, { "epoch": 2.39, "learning_rate": 1.0227692148699324e-05, "loss": 0.5673, "step": 215515 }, { "epoch": 2.39, "learning_rate": 1.0226769421560812e-05, "loss": 0.6272, "step": 215520 }, { "epoch": 2.39, "learning_rate": 1.0225846694422298e-05, "loss": 0.5759, "step": 215525 }, { "epoch": 2.39, "learning_rate": 1.0224923967283786e-05, "loss": 0.6097, "step": 215530 }, { "epoch": 2.39, "learning_rate": 1.0224001240145274e-05, "loss": 0.579, "step": 215535 }, { "epoch": 2.39, "learning_rate": 1.0223078513006762e-05, "loss": 0.6223, "step": 215540 }, { "epoch": 2.39, "learning_rate": 1.022215578586825e-05, "loss": 0.572, "step": 215545 }, { "epoch": 2.39, "learning_rate": 1.0221233058729737e-05, "loss": 0.6083, "step": 215550 }, { "epoch": 2.39, "learning_rate": 1.0220310331591225e-05, "loss": 0.624, "step": 215555 }, { "epoch": 2.39, "learning_rate": 1.0219387604452713e-05, "loss": 0.6286, "step": 215560 }, { "epoch": 2.39, "learning_rate": 1.0218464877314201e-05, "loss": 0.6012, "step": 215565 }, { "epoch": 2.39, "learning_rate": 1.0217542150175687e-05, "loss": 0.589, "step": 215570 }, { "epoch": 2.39, "learning_rate": 1.0216619423037175e-05, "loss": 0.6302, "step": 215575 }, { "epoch": 2.39, "learning_rate": 1.0215696695898663e-05, "loss": 0.5394, "step": 215580 }, { "epoch": 2.39, "learning_rate": 1.021477396876015e-05, "loss": 0.5857, "step": 215585 }, { "epoch": 2.39, "learning_rate": 1.0213851241621639e-05, "loss": 0.5629, "step": 215590 }, { "epoch": 2.39, "learning_rate": 1.0212928514483126e-05, "loss": 0.5796, "step": 215595 }, { "epoch": 2.39, "learning_rate": 1.0212005787344613e-05, "loss": 0.5608, "step": 215600 }, { "epoch": 2.39, "learning_rate": 1.02110830602061e-05, "loss": 0.5514, "step": 215605 }, { "epoch": 2.39, "learning_rate": 1.0210160333067588e-05, "loss": 0.5408, "step": 215610 }, { "epoch": 2.39, "learning_rate": 1.0209237605929076e-05, "loss": 0.6279, "step": 215615 }, { "epoch": 2.39, "learning_rate": 1.0208314878790564e-05, "loss": 0.5779, "step": 215620 }, { "epoch": 2.39, "learning_rate": 1.0207392151652052e-05, "loss": 0.5823, "step": 215625 }, { "epoch": 2.39, "learning_rate": 1.0206469424513538e-05, "loss": 0.5517, "step": 215630 }, { "epoch": 2.39, "learning_rate": 1.0205546697375026e-05, "loss": 0.6032, "step": 215635 }, { "epoch": 2.39, "learning_rate": 1.0204623970236514e-05, "loss": 0.598, "step": 215640 }, { "epoch": 2.39, "learning_rate": 1.0203701243098001e-05, "loss": 0.5664, "step": 215645 }, { "epoch": 2.39, "learning_rate": 1.020277851595949e-05, "loss": 0.6043, "step": 215650 }, { "epoch": 2.39, "learning_rate": 1.0201855788820977e-05, "loss": 0.5861, "step": 215655 }, { "epoch": 2.39, "learning_rate": 1.0200933061682465e-05, "loss": 0.5919, "step": 215660 }, { "epoch": 2.39, "learning_rate": 1.0200010334543953e-05, "loss": 0.605, "step": 215665 }, { "epoch": 2.39, "learning_rate": 1.019908760740544e-05, "loss": 0.5955, "step": 215670 }, { "epoch": 2.39, "learning_rate": 1.0198164880266927e-05, "loss": 0.5539, "step": 215675 }, { "epoch": 2.39, "learning_rate": 1.0197242153128415e-05, "loss": 0.5982, "step": 215680 }, { "epoch": 2.39, "learning_rate": 1.0196319425989901e-05, "loss": 0.6247, "step": 215685 }, { "epoch": 2.39, "learning_rate": 1.0195396698851389e-05, "loss": 0.5672, "step": 215690 }, { "epoch": 2.39, "learning_rate": 1.0194473971712877e-05, "loss": 0.6166, "step": 215695 }, { "epoch": 2.39, "learning_rate": 1.0193551244574364e-05, "loss": 0.5504, "step": 215700 }, { "epoch": 2.39, "learning_rate": 1.0192628517435852e-05, "loss": 0.5707, "step": 215705 }, { "epoch": 2.39, "learning_rate": 1.019170579029734e-05, "loss": 0.5819, "step": 215710 }, { "epoch": 2.39, "learning_rate": 1.0190783063158828e-05, "loss": 0.5966, "step": 215715 }, { "epoch": 2.39, "learning_rate": 1.0189860336020316e-05, "loss": 0.5867, "step": 215720 }, { "epoch": 2.39, "learning_rate": 1.0188937608881804e-05, "loss": 0.5706, "step": 215725 }, { "epoch": 2.39, "learning_rate": 1.0188014881743292e-05, "loss": 0.6203, "step": 215730 }, { "epoch": 2.39, "learning_rate": 1.0187092154604778e-05, "loss": 0.6314, "step": 215735 }, { "epoch": 2.39, "learning_rate": 1.0186169427466266e-05, "loss": 0.562, "step": 215740 }, { "epoch": 2.39, "learning_rate": 1.0185246700327753e-05, "loss": 0.6635, "step": 215745 }, { "epoch": 2.39, "learning_rate": 1.018432397318924e-05, "loss": 0.6548, "step": 215750 }, { "epoch": 2.39, "learning_rate": 1.0183401246050727e-05, "loss": 0.5673, "step": 215755 }, { "epoch": 2.39, "learning_rate": 1.0182478518912215e-05, "loss": 0.5972, "step": 215760 }, { "epoch": 2.39, "learning_rate": 1.0181555791773703e-05, "loss": 0.5802, "step": 215765 }, { "epoch": 2.39, "learning_rate": 1.0180633064635191e-05, "loss": 0.5484, "step": 215770 }, { "epoch": 2.39, "learning_rate": 1.0179710337496679e-05, "loss": 0.5878, "step": 215775 }, { "epoch": 2.39, "learning_rate": 1.0178787610358167e-05, "loss": 0.6663, "step": 215780 }, { "epoch": 2.39, "learning_rate": 1.0177864883219654e-05, "loss": 0.5557, "step": 215785 }, { "epoch": 2.39, "learning_rate": 1.0176942156081142e-05, "loss": 0.5819, "step": 215790 }, { "epoch": 2.39, "learning_rate": 1.0176019428942628e-05, "loss": 0.5934, "step": 215795 }, { "epoch": 2.39, "learning_rate": 1.0175096701804116e-05, "loss": 0.606, "step": 215800 }, { "epoch": 2.39, "learning_rate": 1.0174173974665604e-05, "loss": 0.6295, "step": 215805 }, { "epoch": 2.39, "learning_rate": 1.0173251247527092e-05, "loss": 0.5883, "step": 215810 }, { "epoch": 2.39, "learning_rate": 1.017232852038858e-05, "loss": 0.5864, "step": 215815 }, { "epoch": 2.39, "learning_rate": 1.0171405793250068e-05, "loss": 0.5483, "step": 215820 }, { "epoch": 2.39, "learning_rate": 1.0170483066111554e-05, "loss": 0.6204, "step": 215825 }, { "epoch": 2.39, "learning_rate": 1.0169560338973042e-05, "loss": 0.5795, "step": 215830 }, { "epoch": 2.39, "learning_rate": 1.016863761183453e-05, "loss": 0.5757, "step": 215835 }, { "epoch": 2.39, "learning_rate": 1.0167714884696017e-05, "loss": 0.5996, "step": 215840 }, { "epoch": 2.39, "learning_rate": 1.0166792157557505e-05, "loss": 0.576, "step": 215845 }, { "epoch": 2.39, "learning_rate": 1.0165869430418991e-05, "loss": 0.597, "step": 215850 }, { "epoch": 2.39, "learning_rate": 1.016494670328048e-05, "loss": 0.5865, "step": 215855 }, { "epoch": 2.39, "learning_rate": 1.0164023976141967e-05, "loss": 0.636, "step": 215860 }, { "epoch": 2.39, "learning_rate": 1.0163101249003455e-05, "loss": 0.5706, "step": 215865 }, { "epoch": 2.39, "learning_rate": 1.0162178521864943e-05, "loss": 0.6323, "step": 215870 }, { "epoch": 2.39, "learning_rate": 1.016125579472643e-05, "loss": 0.531, "step": 215875 }, { "epoch": 2.39, "learning_rate": 1.0160333067587919e-05, "loss": 0.5785, "step": 215880 }, { "epoch": 2.39, "learning_rate": 1.0159410340449406e-05, "loss": 0.5873, "step": 215885 }, { "epoch": 2.39, "learning_rate": 1.0158487613310894e-05, "loss": 0.5842, "step": 215890 }, { "epoch": 2.39, "learning_rate": 1.0157564886172382e-05, "loss": 0.6203, "step": 215895 }, { "epoch": 2.39, "learning_rate": 1.0156642159033868e-05, "loss": 0.5797, "step": 215900 }, { "epoch": 2.39, "learning_rate": 1.0155719431895356e-05, "loss": 0.6076, "step": 215905 }, { "epoch": 2.39, "learning_rate": 1.0154796704756842e-05, "loss": 0.6045, "step": 215910 }, { "epoch": 2.39, "learning_rate": 1.015387397761833e-05, "loss": 0.5846, "step": 215915 }, { "epoch": 2.39, "learning_rate": 1.0152951250479818e-05, "loss": 0.6259, "step": 215920 }, { "epoch": 2.39, "learning_rate": 1.0152028523341306e-05, "loss": 0.5577, "step": 215925 }, { "epoch": 2.39, "learning_rate": 1.0151105796202794e-05, "loss": 0.5562, "step": 215930 }, { "epoch": 2.39, "learning_rate": 1.0150183069064281e-05, "loss": 0.6145, "step": 215935 }, { "epoch": 2.39, "learning_rate": 1.014926034192577e-05, "loss": 0.6084, "step": 215940 }, { "epoch": 2.39, "learning_rate": 1.0148337614787257e-05, "loss": 0.524, "step": 215945 }, { "epoch": 2.39, "learning_rate": 1.0147414887648745e-05, "loss": 0.5955, "step": 215950 }, { "epoch": 2.39, "learning_rate": 1.0146492160510231e-05, "loss": 0.5951, "step": 215955 }, { "epoch": 2.39, "learning_rate": 1.0145569433371719e-05, "loss": 0.6156, "step": 215960 }, { "epoch": 2.39, "learning_rate": 1.0144646706233207e-05, "loss": 0.5538, "step": 215965 }, { "epoch": 2.39, "learning_rate": 1.0143723979094695e-05, "loss": 0.5998, "step": 215970 }, { "epoch": 2.39, "learning_rate": 1.0142801251956183e-05, "loss": 0.5398, "step": 215975 }, { "epoch": 2.39, "learning_rate": 1.0141878524817669e-05, "loss": 0.5985, "step": 215980 }, { "epoch": 2.39, "learning_rate": 1.0140955797679157e-05, "loss": 0.5941, "step": 215985 }, { "epoch": 2.39, "learning_rate": 1.0140033070540644e-05, "loss": 0.6385, "step": 215990 }, { "epoch": 2.39, "learning_rate": 1.0139110343402132e-05, "loss": 0.6014, "step": 215995 }, { "epoch": 2.39, "learning_rate": 1.013818761626362e-05, "loss": 0.6226, "step": 216000 }, { "epoch": 2.39, "eval_loss": 0.5407325625419617, "eval_runtime": 69.2145, "eval_samples_per_second": 28.896, "eval_steps_per_second": 14.448, "step": 216000 }, { "epoch": 2.39, "learning_rate": 1.0137264889125108e-05, "loss": 0.5593, "step": 216005 }, { "epoch": 2.39, "learning_rate": 1.0136342161986596e-05, "loss": 0.5987, "step": 216010 }, { "epoch": 2.39, "learning_rate": 1.0135419434848082e-05, "loss": 0.5665, "step": 216015 }, { "epoch": 2.39, "learning_rate": 1.013449670770957e-05, "loss": 0.5735, "step": 216020 }, { "epoch": 2.39, "learning_rate": 1.0133573980571058e-05, "loss": 0.6032, "step": 216025 }, { "epoch": 2.39, "learning_rate": 1.0132651253432546e-05, "loss": 0.6491, "step": 216030 }, { "epoch": 2.39, "learning_rate": 1.0131728526294033e-05, "loss": 0.5448, "step": 216035 }, { "epoch": 2.39, "learning_rate": 1.0130805799155521e-05, "loss": 0.5687, "step": 216040 }, { "epoch": 2.39, "learning_rate": 1.0129883072017009e-05, "loss": 0.5296, "step": 216045 }, { "epoch": 2.39, "learning_rate": 1.0128960344878497e-05, "loss": 0.5598, "step": 216050 }, { "epoch": 2.39, "learning_rate": 1.0128037617739983e-05, "loss": 0.5795, "step": 216055 }, { "epoch": 2.39, "learning_rate": 1.0127114890601471e-05, "loss": 0.6259, "step": 216060 }, { "epoch": 2.39, "learning_rate": 1.0126192163462959e-05, "loss": 0.5856, "step": 216065 }, { "epoch": 2.39, "learning_rate": 1.0125269436324445e-05, "loss": 0.6112, "step": 216070 }, { "epoch": 2.39, "learning_rate": 1.0124346709185933e-05, "loss": 0.5914, "step": 216075 }, { "epoch": 2.39, "learning_rate": 1.012342398204742e-05, "loss": 0.5914, "step": 216080 }, { "epoch": 2.39, "learning_rate": 1.0122501254908908e-05, "loss": 0.6058, "step": 216085 }, { "epoch": 2.39, "learning_rate": 1.0121578527770396e-05, "loss": 0.6265, "step": 216090 }, { "epoch": 2.39, "learning_rate": 1.0120655800631884e-05, "loss": 0.6098, "step": 216095 }, { "epoch": 2.39, "learning_rate": 1.0119733073493372e-05, "loss": 0.6142, "step": 216100 }, { "epoch": 2.39, "learning_rate": 1.011881034635486e-05, "loss": 0.5424, "step": 216105 }, { "epoch": 2.39, "learning_rate": 1.0117887619216348e-05, "loss": 0.6049, "step": 216110 }, { "epoch": 2.39, "learning_rate": 1.0116964892077836e-05, "loss": 0.5497, "step": 216115 }, { "epoch": 2.39, "learning_rate": 1.0116042164939322e-05, "loss": 0.6531, "step": 216120 }, { "epoch": 2.39, "learning_rate": 1.011511943780081e-05, "loss": 0.5805, "step": 216125 }, { "epoch": 2.39, "learning_rate": 1.0114196710662296e-05, "loss": 0.5618, "step": 216130 }, { "epoch": 2.39, "learning_rate": 1.0113273983523784e-05, "loss": 0.607, "step": 216135 }, { "epoch": 2.39, "learning_rate": 1.0112351256385271e-05, "loss": 0.5983, "step": 216140 }, { "epoch": 2.39, "learning_rate": 1.011142852924676e-05, "loss": 0.6127, "step": 216145 }, { "epoch": 2.39, "learning_rate": 1.0110505802108247e-05, "loss": 0.6417, "step": 216150 }, { "epoch": 2.39, "learning_rate": 1.0109583074969735e-05, "loss": 0.5364, "step": 216155 }, { "epoch": 2.39, "learning_rate": 1.0108660347831223e-05, "loss": 0.5891, "step": 216160 }, { "epoch": 2.39, "learning_rate": 1.010773762069271e-05, "loss": 0.6067, "step": 216165 }, { "epoch": 2.39, "learning_rate": 1.0106814893554198e-05, "loss": 0.5836, "step": 216170 }, { "epoch": 2.39, "learning_rate": 1.0105892166415686e-05, "loss": 0.5598, "step": 216175 }, { "epoch": 2.39, "learning_rate": 1.0104969439277172e-05, "loss": 0.5899, "step": 216180 }, { "epoch": 2.39, "learning_rate": 1.010404671213866e-05, "loss": 0.53, "step": 216185 }, { "epoch": 2.39, "learning_rate": 1.0103123985000148e-05, "loss": 0.6096, "step": 216190 }, { "epoch": 2.39, "learning_rate": 1.0102201257861636e-05, "loss": 0.6398, "step": 216195 }, { "epoch": 2.39, "learning_rate": 1.0101278530723124e-05, "loss": 0.6019, "step": 216200 }, { "epoch": 2.39, "learning_rate": 1.010035580358461e-05, "loss": 0.5766, "step": 216205 }, { "epoch": 2.39, "learning_rate": 1.0099433076446098e-05, "loss": 0.5762, "step": 216210 }, { "epoch": 2.39, "learning_rate": 1.0098510349307586e-05, "loss": 0.5661, "step": 216215 }, { "epoch": 2.39, "learning_rate": 1.0097587622169074e-05, "loss": 0.595, "step": 216220 }, { "epoch": 2.39, "learning_rate": 1.0096664895030561e-05, "loss": 0.5522, "step": 216225 }, { "epoch": 2.39, "learning_rate": 1.009574216789205e-05, "loss": 0.6344, "step": 216230 }, { "epoch": 2.39, "learning_rate": 1.0094819440753535e-05, "loss": 0.6168, "step": 216235 }, { "epoch": 2.39, "learning_rate": 1.0093896713615023e-05, "loss": 0.5975, "step": 216240 }, { "epoch": 2.39, "learning_rate": 1.0092973986476511e-05, "loss": 0.6278, "step": 216245 }, { "epoch": 2.39, "learning_rate": 1.0092051259337999e-05, "loss": 0.6048, "step": 216250 }, { "epoch": 2.39, "learning_rate": 1.0091128532199487e-05, "loss": 0.5869, "step": 216255 }, { "epoch": 2.39, "learning_rate": 1.0090205805060975e-05, "loss": 0.5795, "step": 216260 }, { "epoch": 2.39, "learning_rate": 1.0089283077922463e-05, "loss": 0.6066, "step": 216265 }, { "epoch": 2.39, "learning_rate": 1.008836035078395e-05, "loss": 0.6361, "step": 216270 }, { "epoch": 2.39, "learning_rate": 1.0087437623645438e-05, "loss": 0.5921, "step": 216275 }, { "epoch": 2.39, "learning_rate": 1.0086514896506924e-05, "loss": 0.5441, "step": 216280 }, { "epoch": 2.39, "learning_rate": 1.0085592169368412e-05, "loss": 0.6592, "step": 216285 }, { "epoch": 2.39, "learning_rate": 1.00846694422299e-05, "loss": 0.6499, "step": 216290 }, { "epoch": 2.39, "learning_rate": 1.0083746715091386e-05, "loss": 0.5661, "step": 216295 }, { "epoch": 2.4, "learning_rate": 1.0082823987952874e-05, "loss": 0.6304, "step": 216300 }, { "epoch": 2.4, "learning_rate": 1.0081901260814362e-05, "loss": 0.5663, "step": 216305 }, { "epoch": 2.4, "learning_rate": 1.008097853367585e-05, "loss": 0.5908, "step": 216310 }, { "epoch": 2.4, "learning_rate": 1.0080055806537338e-05, "loss": 0.5508, "step": 216315 }, { "epoch": 2.4, "learning_rate": 1.0079133079398825e-05, "loss": 0.5784, "step": 216320 }, { "epoch": 2.4, "learning_rate": 1.0078210352260313e-05, "loss": 0.6167, "step": 216325 }, { "epoch": 2.4, "learning_rate": 1.0077287625121801e-05, "loss": 0.6336, "step": 216330 }, { "epoch": 2.4, "learning_rate": 1.0076364897983289e-05, "loss": 0.5755, "step": 216335 }, { "epoch": 2.4, "learning_rate": 1.0075442170844777e-05, "loss": 0.6141, "step": 216340 }, { "epoch": 2.4, "learning_rate": 1.0074519443706263e-05, "loss": 0.5798, "step": 216345 }, { "epoch": 2.4, "learning_rate": 1.0073596716567751e-05, "loss": 0.5339, "step": 216350 }, { "epoch": 2.4, "learning_rate": 1.0072673989429239e-05, "loss": 0.6137, "step": 216355 }, { "epoch": 2.4, "learning_rate": 1.0071751262290725e-05, "loss": 0.6248, "step": 216360 }, { "epoch": 2.4, "learning_rate": 1.0070828535152213e-05, "loss": 0.5942, "step": 216365 }, { "epoch": 2.4, "learning_rate": 1.00699058080137e-05, "loss": 0.6252, "step": 216370 }, { "epoch": 2.4, "learning_rate": 1.0068983080875188e-05, "loss": 0.5949, "step": 216375 }, { "epoch": 2.4, "learning_rate": 1.0068060353736676e-05, "loss": 0.5839, "step": 216380 }, { "epoch": 2.4, "learning_rate": 1.0067137626598164e-05, "loss": 0.5819, "step": 216385 }, { "epoch": 2.4, "learning_rate": 1.0066214899459652e-05, "loss": 0.5987, "step": 216390 }, { "epoch": 2.4, "learning_rate": 1.006529217232114e-05, "loss": 0.5897, "step": 216395 }, { "epoch": 2.4, "learning_rate": 1.0064369445182626e-05, "loss": 0.5832, "step": 216400 }, { "epoch": 2.4, "learning_rate": 1.0063446718044114e-05, "loss": 0.5598, "step": 216405 }, { "epoch": 2.4, "learning_rate": 1.0062523990905602e-05, "loss": 0.566, "step": 216410 }, { "epoch": 2.4, "learning_rate": 1.006160126376709e-05, "loss": 0.5442, "step": 216415 }, { "epoch": 2.4, "learning_rate": 1.0060678536628577e-05, "loss": 0.5992, "step": 216420 }, { "epoch": 2.4, "learning_rate": 1.0059755809490065e-05, "loss": 0.6069, "step": 216425 }, { "epoch": 2.4, "learning_rate": 1.0058833082351553e-05, "loss": 0.5704, "step": 216430 }, { "epoch": 2.4, "learning_rate": 1.005791035521304e-05, "loss": 0.6271, "step": 216435 }, { "epoch": 2.4, "learning_rate": 1.0056987628074527e-05, "loss": 0.5214, "step": 216440 }, { "epoch": 2.4, "learning_rate": 1.0056064900936015e-05, "loss": 0.5676, "step": 216445 }, { "epoch": 2.4, "learning_rate": 1.0055142173797503e-05, "loss": 0.6181, "step": 216450 }, { "epoch": 2.4, "learning_rate": 1.005421944665899e-05, "loss": 0.5864, "step": 216455 }, { "epoch": 2.4, "learning_rate": 1.0053296719520477e-05, "loss": 0.5178, "step": 216460 }, { "epoch": 2.4, "learning_rate": 1.0052373992381965e-05, "loss": 0.6204, "step": 216465 }, { "epoch": 2.4, "learning_rate": 1.0051451265243452e-05, "loss": 0.5673, "step": 216470 }, { "epoch": 2.4, "learning_rate": 1.005052853810494e-05, "loss": 0.5688, "step": 216475 }, { "epoch": 2.4, "learning_rate": 1.0049605810966428e-05, "loss": 0.5683, "step": 216480 }, { "epoch": 2.4, "learning_rate": 1.0048683083827916e-05, "loss": 0.6073, "step": 216485 }, { "epoch": 2.4, "learning_rate": 1.0047760356689404e-05, "loss": 0.5896, "step": 216490 }, { "epoch": 2.4, "learning_rate": 1.0046837629550892e-05, "loss": 0.5744, "step": 216495 }, { "epoch": 2.4, "learning_rate": 1.004591490241238e-05, "loss": 0.5942, "step": 216500 }, { "epoch": 2.4, "learning_rate": 1.0044992175273866e-05, "loss": 0.5955, "step": 216505 }, { "epoch": 2.4, "learning_rate": 1.0044069448135354e-05, "loss": 0.5619, "step": 216510 }, { "epoch": 2.4, "learning_rate": 1.004314672099684e-05, "loss": 0.5851, "step": 216515 }, { "epoch": 2.4, "learning_rate": 1.0042223993858328e-05, "loss": 0.6002, "step": 216520 }, { "epoch": 2.4, "learning_rate": 1.0041301266719815e-05, "loss": 0.5632, "step": 216525 }, { "epoch": 2.4, "learning_rate": 1.0040378539581303e-05, "loss": 0.6108, "step": 216530 }, { "epoch": 2.4, "learning_rate": 1.0039455812442791e-05, "loss": 0.5697, "step": 216535 }, { "epoch": 2.4, "learning_rate": 1.0038533085304279e-05, "loss": 0.5672, "step": 216540 }, { "epoch": 2.4, "learning_rate": 1.0037610358165767e-05, "loss": 0.6089, "step": 216545 }, { "epoch": 2.4, "learning_rate": 1.0036687631027255e-05, "loss": 0.5714, "step": 216550 }, { "epoch": 2.4, "learning_rate": 1.0035764903888743e-05, "loss": 0.6026, "step": 216555 }, { "epoch": 2.4, "learning_rate": 1.003484217675023e-05, "loss": 0.656, "step": 216560 }, { "epoch": 2.4, "learning_rate": 1.0033919449611717e-05, "loss": 0.5421, "step": 216565 }, { "epoch": 2.4, "learning_rate": 1.0032996722473204e-05, "loss": 0.5922, "step": 216570 }, { "epoch": 2.4, "learning_rate": 1.0032073995334692e-05, "loss": 0.5767, "step": 216575 }, { "epoch": 2.4, "learning_rate": 1.003115126819618e-05, "loss": 0.5858, "step": 216580 }, { "epoch": 2.4, "learning_rate": 1.0030228541057666e-05, "loss": 0.6273, "step": 216585 }, { "epoch": 2.4, "learning_rate": 1.0029305813919154e-05, "loss": 0.5764, "step": 216590 }, { "epoch": 2.4, "learning_rate": 1.0028383086780642e-05, "loss": 0.5503, "step": 216595 }, { "epoch": 2.4, "learning_rate": 1.002746035964213e-05, "loss": 0.5726, "step": 216600 }, { "epoch": 2.4, "learning_rate": 1.0026537632503618e-05, "loss": 0.5915, "step": 216605 }, { "epoch": 2.4, "learning_rate": 1.0025614905365105e-05, "loss": 0.586, "step": 216610 }, { "epoch": 2.4, "learning_rate": 1.0024692178226593e-05, "loss": 0.5531, "step": 216615 }, { "epoch": 2.4, "learning_rate": 1.002376945108808e-05, "loss": 0.6256, "step": 216620 }, { "epoch": 2.4, "learning_rate": 1.0022846723949567e-05, "loss": 0.5905, "step": 216625 }, { "epoch": 2.4, "learning_rate": 1.0021923996811055e-05, "loss": 0.5859, "step": 216630 }, { "epoch": 2.4, "learning_rate": 1.0021001269672543e-05, "loss": 0.5851, "step": 216635 }, { "epoch": 2.4, "learning_rate": 1.0020078542534031e-05, "loss": 0.5542, "step": 216640 }, { "epoch": 2.4, "learning_rate": 1.0019155815395519e-05, "loss": 0.5601, "step": 216645 }, { "epoch": 2.4, "learning_rate": 1.0018233088257007e-05, "loss": 0.5964, "step": 216650 }, { "epoch": 2.4, "learning_rate": 1.0017310361118494e-05, "loss": 0.6384, "step": 216655 }, { "epoch": 2.4, "learning_rate": 1.001638763397998e-05, "loss": 0.5973, "step": 216660 }, { "epoch": 2.4, "learning_rate": 1.0015464906841468e-05, "loss": 0.5981, "step": 216665 }, { "epoch": 2.4, "learning_rate": 1.0014542179702956e-05, "loss": 0.5821, "step": 216670 }, { "epoch": 2.4, "learning_rate": 1.0013619452564444e-05, "loss": 0.6327, "step": 216675 }, { "epoch": 2.4, "learning_rate": 1.001269672542593e-05, "loss": 0.5588, "step": 216680 }, { "epoch": 2.4, "learning_rate": 1.0011773998287418e-05, "loss": 0.5905, "step": 216685 }, { "epoch": 2.4, "learning_rate": 1.0010851271148906e-05, "loss": 0.5731, "step": 216690 }, { "epoch": 2.4, "learning_rate": 1.0009928544010394e-05, "loss": 0.5969, "step": 216695 }, { "epoch": 2.4, "learning_rate": 1.0009005816871882e-05, "loss": 0.5551, "step": 216700 }, { "epoch": 2.4, "learning_rate": 1.000808308973337e-05, "loss": 0.5756, "step": 216705 }, { "epoch": 2.4, "learning_rate": 1.0007160362594857e-05, "loss": 0.5868, "step": 216710 }, { "epoch": 2.4, "learning_rate": 1.0006237635456345e-05, "loss": 0.6143, "step": 216715 }, { "epoch": 2.4, "learning_rate": 1.0005314908317833e-05, "loss": 0.5086, "step": 216720 }, { "epoch": 2.4, "learning_rate": 1.0004392181179321e-05, "loss": 0.5613, "step": 216725 }, { "epoch": 2.4, "learning_rate": 1.0003469454040807e-05, "loss": 0.6218, "step": 216730 }, { "epoch": 2.4, "learning_rate": 1.0002546726902295e-05, "loss": 0.5833, "step": 216735 }, { "epoch": 2.4, "learning_rate": 1.0001623999763781e-05, "loss": 0.6048, "step": 216740 }, { "epoch": 2.4, "learning_rate": 1.0000701272625269e-05, "loss": 0.5559, "step": 216745 }, { "epoch": 2.4, "learning_rate": 9.999778545486757e-06, "loss": 0.5525, "step": 216750 }, { "epoch": 2.4, "learning_rate": 9.998855818348245e-06, "loss": 0.5834, "step": 216755 }, { "epoch": 2.4, "learning_rate": 9.997933091209732e-06, "loss": 0.5757, "step": 216760 }, { "epoch": 2.4, "learning_rate": 9.99701036407122e-06, "loss": 0.6074, "step": 216765 }, { "epoch": 2.4, "learning_rate": 9.996087636932708e-06, "loss": 0.5892, "step": 216770 }, { "epoch": 2.4, "learning_rate": 9.995164909794196e-06, "loss": 0.6377, "step": 216775 }, { "epoch": 2.4, "learning_rate": 9.994242182655684e-06, "loss": 0.5494, "step": 216780 }, { "epoch": 2.4, "learning_rate": 9.99331945551717e-06, "loss": 0.6098, "step": 216785 }, { "epoch": 2.4, "learning_rate": 9.992396728378658e-06, "loss": 0.5831, "step": 216790 }, { "epoch": 2.4, "learning_rate": 9.991474001240146e-06, "loss": 0.6055, "step": 216795 }, { "epoch": 2.4, "learning_rate": 9.990551274101634e-06, "loss": 0.6475, "step": 216800 }, { "epoch": 2.4, "learning_rate": 9.989628546963121e-06, "loss": 0.5705, "step": 216805 }, { "epoch": 2.4, "learning_rate": 9.98870581982461e-06, "loss": 0.5273, "step": 216810 }, { "epoch": 2.4, "learning_rate": 9.987783092686095e-06, "loss": 0.6204, "step": 216815 }, { "epoch": 2.4, "learning_rate": 9.986860365547583e-06, "loss": 0.6408, "step": 216820 }, { "epoch": 2.4, "learning_rate": 9.985937638409071e-06, "loss": 0.6642, "step": 216825 }, { "epoch": 2.4, "learning_rate": 9.985014911270559e-06, "loss": 0.6056, "step": 216830 }, { "epoch": 2.4, "learning_rate": 9.984092184132047e-06, "loss": 0.5702, "step": 216835 }, { "epoch": 2.4, "learning_rate": 9.983169456993535e-06, "loss": 0.5994, "step": 216840 }, { "epoch": 2.4, "learning_rate": 9.98224672985502e-06, "loss": 0.6247, "step": 216845 }, { "epoch": 2.4, "learning_rate": 9.981324002716509e-06, "loss": 0.559, "step": 216850 }, { "epoch": 2.4, "learning_rate": 9.980401275577996e-06, "loss": 0.6504, "step": 216855 }, { "epoch": 2.4, "learning_rate": 9.979478548439484e-06, "loss": 0.6023, "step": 216860 }, { "epoch": 2.4, "learning_rate": 9.978555821300972e-06, "loss": 0.5789, "step": 216865 }, { "epoch": 2.4, "learning_rate": 9.97763309416246e-06, "loss": 0.5539, "step": 216870 }, { "epoch": 2.4, "learning_rate": 9.976710367023948e-06, "loss": 0.5715, "step": 216875 }, { "epoch": 2.4, "learning_rate": 9.975787639885436e-06, "loss": 0.5764, "step": 216880 }, { "epoch": 2.4, "learning_rate": 9.974864912746924e-06, "loss": 0.6078, "step": 216885 }, { "epoch": 2.4, "learning_rate": 9.97394218560841e-06, "loss": 0.6482, "step": 216890 }, { "epoch": 2.4, "learning_rate": 9.973019458469898e-06, "loss": 0.6212, "step": 216895 }, { "epoch": 2.4, "learning_rate": 9.972096731331384e-06, "loss": 0.5331, "step": 216900 }, { "epoch": 2.4, "learning_rate": 9.971174004192872e-06, "loss": 0.6194, "step": 216905 }, { "epoch": 2.4, "learning_rate": 9.97025127705436e-06, "loss": 0.5675, "step": 216910 }, { "epoch": 2.4, "learning_rate": 9.969328549915847e-06, "loss": 0.6011, "step": 216915 }, { "epoch": 2.4, "learning_rate": 9.968405822777335e-06, "loss": 0.588, "step": 216920 }, { "epoch": 2.4, "learning_rate": 9.967483095638823e-06, "loss": 0.5379, "step": 216925 }, { "epoch": 2.4, "learning_rate": 9.96656036850031e-06, "loss": 0.6063, "step": 216930 }, { "epoch": 2.4, "learning_rate": 9.965637641361799e-06, "loss": 0.6046, "step": 216935 }, { "epoch": 2.4, "learning_rate": 9.964714914223287e-06, "loss": 0.5753, "step": 216940 }, { "epoch": 2.4, "learning_rate": 9.963792187084774e-06, "loss": 0.5747, "step": 216945 }, { "epoch": 2.4, "learning_rate": 9.96286945994626e-06, "loss": 0.5803, "step": 216950 }, { "epoch": 2.4, "learning_rate": 9.961946732807748e-06, "loss": 0.5165, "step": 216955 }, { "epoch": 2.4, "learning_rate": 9.961024005669236e-06, "loss": 0.6043, "step": 216960 }, { "epoch": 2.4, "learning_rate": 9.960101278530722e-06, "loss": 0.5987, "step": 216965 }, { "epoch": 2.4, "learning_rate": 9.95917855139221e-06, "loss": 0.5969, "step": 216970 }, { "epoch": 2.4, "learning_rate": 9.958255824253698e-06, "loss": 0.5372, "step": 216975 }, { "epoch": 2.4, "learning_rate": 9.957333097115186e-06, "loss": 0.6657, "step": 216980 }, { "epoch": 2.4, "learning_rate": 9.956410369976674e-06, "loss": 0.6102, "step": 216985 }, { "epoch": 2.4, "learning_rate": 9.955487642838162e-06, "loss": 0.5716, "step": 216990 }, { "epoch": 2.4, "learning_rate": 9.95456491569965e-06, "loss": 0.6153, "step": 216995 }, { "epoch": 2.4, "learning_rate": 9.953642188561137e-06, "loss": 0.5733, "step": 217000 }, { "epoch": 2.4, "eval_loss": 0.5758907794952393, "eval_runtime": 69.2853, "eval_samples_per_second": 28.866, "eval_steps_per_second": 14.433, "step": 217000 }, { "epoch": 2.4, "learning_rate": 9.952719461422625e-06, "loss": 0.5827, "step": 217005 }, { "epoch": 2.4, "learning_rate": 9.951796734284111e-06, "loss": 0.5919, "step": 217010 }, { "epoch": 2.4, "learning_rate": 9.9508740071456e-06, "loss": 0.6267, "step": 217015 }, { "epoch": 2.4, "learning_rate": 9.949951280007087e-06, "loss": 0.5842, "step": 217020 }, { "epoch": 2.4, "learning_rate": 9.949028552868575e-06, "loss": 0.5464, "step": 217025 }, { "epoch": 2.4, "learning_rate": 9.948105825730063e-06, "loss": 0.5913, "step": 217030 }, { "epoch": 2.4, "learning_rate": 9.94718309859155e-06, "loss": 0.5246, "step": 217035 }, { "epoch": 2.4, "learning_rate": 9.946260371453037e-06, "loss": 0.6155, "step": 217040 }, { "epoch": 2.4, "learning_rate": 9.945337644314525e-06, "loss": 0.5937, "step": 217045 }, { "epoch": 2.4, "learning_rate": 9.944414917176012e-06, "loss": 0.5972, "step": 217050 }, { "epoch": 2.4, "learning_rate": 9.9434921900375e-06, "loss": 0.5746, "step": 217055 }, { "epoch": 2.4, "learning_rate": 9.942569462898988e-06, "loss": 0.68, "step": 217060 }, { "epoch": 2.4, "learning_rate": 9.941646735760474e-06, "loss": 0.6159, "step": 217065 }, { "epoch": 2.4, "learning_rate": 9.940724008621962e-06, "loss": 0.5615, "step": 217070 }, { "epoch": 2.4, "learning_rate": 9.93980128148345e-06, "loss": 0.6222, "step": 217075 }, { "epoch": 2.4, "learning_rate": 9.938878554344938e-06, "loss": 0.5935, "step": 217080 }, { "epoch": 2.4, "learning_rate": 9.937955827206426e-06, "loss": 0.5881, "step": 217085 }, { "epoch": 2.4, "learning_rate": 9.937033100067914e-06, "loss": 0.5925, "step": 217090 }, { "epoch": 2.4, "learning_rate": 9.936110372929401e-06, "loss": 0.5606, "step": 217095 }, { "epoch": 2.4, "learning_rate": 9.93518764579089e-06, "loss": 0.6328, "step": 217100 }, { "epoch": 2.4, "learning_rate": 9.934264918652377e-06, "loss": 0.5824, "step": 217105 }, { "epoch": 2.4, "learning_rate": 9.933342191513865e-06, "loss": 0.5706, "step": 217110 }, { "epoch": 2.4, "learning_rate": 9.932419464375351e-06, "loss": 0.632, "step": 217115 }, { "epoch": 2.4, "learning_rate": 9.931496737236839e-06, "loss": 0.6214, "step": 217120 }, { "epoch": 2.4, "learning_rate": 9.930574010098325e-06, "loss": 0.6155, "step": 217125 }, { "epoch": 2.4, "learning_rate": 9.929651282959813e-06, "loss": 0.6109, "step": 217130 }, { "epoch": 2.4, "learning_rate": 9.9287285558213e-06, "loss": 0.6501, "step": 217135 }, { "epoch": 2.4, "learning_rate": 9.927805828682789e-06, "loss": 0.6029, "step": 217140 }, { "epoch": 2.4, "learning_rate": 9.926883101544276e-06, "loss": 0.5391, "step": 217145 }, { "epoch": 2.4, "learning_rate": 9.925960374405764e-06, "loss": 0.6313, "step": 217150 }, { "epoch": 2.4, "learning_rate": 9.925037647267252e-06, "loss": 0.5828, "step": 217155 }, { "epoch": 2.4, "learning_rate": 9.92411492012874e-06, "loss": 0.6312, "step": 217160 }, { "epoch": 2.4, "learning_rate": 9.923192192990228e-06, "loss": 0.5791, "step": 217165 }, { "epoch": 2.4, "learning_rate": 9.922269465851714e-06, "loss": 0.6159, "step": 217170 }, { "epoch": 2.4, "learning_rate": 9.921346738713202e-06, "loss": 0.5393, "step": 217175 }, { "epoch": 2.4, "learning_rate": 9.92042401157469e-06, "loss": 0.5231, "step": 217180 }, { "epoch": 2.4, "learning_rate": 9.919501284436178e-06, "loss": 0.564, "step": 217185 }, { "epoch": 2.4, "learning_rate": 9.918578557297665e-06, "loss": 0.5894, "step": 217190 }, { "epoch": 2.4, "learning_rate": 9.917655830159152e-06, "loss": 0.5957, "step": 217195 }, { "epoch": 2.4, "learning_rate": 9.91673310302064e-06, "loss": 0.6034, "step": 217200 }, { "epoch": 2.41, "learning_rate": 9.915810375882127e-06, "loss": 0.5584, "step": 217205 }, { "epoch": 2.41, "learning_rate": 9.914887648743615e-06, "loss": 0.6004, "step": 217210 }, { "epoch": 2.41, "learning_rate": 9.913964921605103e-06, "loss": 0.5929, "step": 217215 }, { "epoch": 2.41, "learning_rate": 9.91304219446659e-06, "loss": 0.6305, "step": 217220 }, { "epoch": 2.41, "learning_rate": 9.912119467328079e-06, "loss": 0.5494, "step": 217225 }, { "epoch": 2.41, "learning_rate": 9.911196740189565e-06, "loss": 0.5938, "step": 217230 }, { "epoch": 2.41, "learning_rate": 9.910274013051053e-06, "loss": 0.5876, "step": 217235 }, { "epoch": 2.41, "learning_rate": 9.90935128591254e-06, "loss": 0.5821, "step": 217240 }, { "epoch": 2.41, "learning_rate": 9.908428558774028e-06, "loss": 0.5825, "step": 217245 }, { "epoch": 2.41, "learning_rate": 9.907505831635516e-06, "loss": 0.5867, "step": 217250 }, { "epoch": 2.41, "learning_rate": 9.906583104497004e-06, "loss": 0.5941, "step": 217255 }, { "epoch": 2.41, "learning_rate": 9.905660377358492e-06, "loss": 0.5492, "step": 217260 }, { "epoch": 2.41, "learning_rate": 9.90473765021998e-06, "loss": 0.5807, "step": 217265 }, { "epoch": 2.41, "learning_rate": 9.903814923081466e-06, "loss": 0.5523, "step": 217270 }, { "epoch": 2.41, "learning_rate": 9.902892195942954e-06, "loss": 0.538, "step": 217275 }, { "epoch": 2.41, "learning_rate": 9.901969468804442e-06, "loss": 0.5851, "step": 217280 }, { "epoch": 2.41, "learning_rate": 9.901046741665928e-06, "loss": 0.5588, "step": 217285 }, { "epoch": 2.41, "learning_rate": 9.900124014527416e-06, "loss": 0.5931, "step": 217290 }, { "epoch": 2.41, "learning_rate": 9.899201287388903e-06, "loss": 0.5838, "step": 217295 }, { "epoch": 2.41, "learning_rate": 9.898278560250391e-06, "loss": 0.5956, "step": 217300 }, { "epoch": 2.41, "learning_rate": 9.89735583311188e-06, "loss": 0.6669, "step": 217305 }, { "epoch": 2.41, "learning_rate": 9.896433105973367e-06, "loss": 0.5401, "step": 217310 }, { "epoch": 2.41, "learning_rate": 9.895510378834855e-06, "loss": 0.635, "step": 217315 }, { "epoch": 2.41, "learning_rate": 9.894587651696343e-06, "loss": 0.6027, "step": 217320 }, { "epoch": 2.41, "learning_rate": 9.89366492455783e-06, "loss": 0.6243, "step": 217325 }, { "epoch": 2.41, "learning_rate": 9.892742197419318e-06, "loss": 0.6019, "step": 217330 }, { "epoch": 2.41, "learning_rate": 9.891819470280805e-06, "loss": 0.636, "step": 217335 }, { "epoch": 2.41, "learning_rate": 9.890896743142292e-06, "loss": 0.5657, "step": 217340 }, { "epoch": 2.41, "learning_rate": 9.889974016003779e-06, "loss": 0.6302, "step": 217345 }, { "epoch": 2.41, "learning_rate": 9.889051288865266e-06, "loss": 0.6279, "step": 217350 }, { "epoch": 2.41, "learning_rate": 9.888128561726754e-06, "loss": 0.6076, "step": 217355 }, { "epoch": 2.41, "learning_rate": 9.887205834588242e-06, "loss": 0.5984, "step": 217360 }, { "epoch": 2.41, "learning_rate": 9.88628310744973e-06, "loss": 0.5885, "step": 217365 }, { "epoch": 2.41, "learning_rate": 9.885360380311218e-06, "loss": 0.5892, "step": 217370 }, { "epoch": 2.41, "learning_rate": 9.884437653172706e-06, "loss": 0.5896, "step": 217375 }, { "epoch": 2.41, "learning_rate": 9.883514926034193e-06, "loss": 0.612, "step": 217380 }, { "epoch": 2.41, "learning_rate": 9.882592198895681e-06, "loss": 0.5846, "step": 217385 }, { "epoch": 2.41, "learning_rate": 9.88166947175717e-06, "loss": 0.603, "step": 217390 }, { "epoch": 2.41, "learning_rate": 9.880746744618655e-06, "loss": 0.5599, "step": 217395 }, { "epoch": 2.41, "learning_rate": 9.879824017480143e-06, "loss": 0.6237, "step": 217400 }, { "epoch": 2.41, "learning_rate": 9.878901290341631e-06, "loss": 0.5796, "step": 217405 }, { "epoch": 2.41, "learning_rate": 9.877978563203119e-06, "loss": 0.5827, "step": 217410 }, { "epoch": 2.41, "learning_rate": 9.877055836064607e-06, "loss": 0.6193, "step": 217415 }, { "epoch": 2.41, "learning_rate": 9.876133108926093e-06, "loss": 0.5776, "step": 217420 }, { "epoch": 2.41, "learning_rate": 9.87521038178758e-06, "loss": 0.5621, "step": 217425 }, { "epoch": 2.41, "learning_rate": 9.874287654649069e-06, "loss": 0.5939, "step": 217430 }, { "epoch": 2.41, "learning_rate": 9.873364927510556e-06, "loss": 0.607, "step": 217435 }, { "epoch": 2.41, "learning_rate": 9.872442200372044e-06, "loss": 0.5729, "step": 217440 }, { "epoch": 2.41, "learning_rate": 9.871519473233532e-06, "loss": 0.5752, "step": 217445 }, { "epoch": 2.41, "learning_rate": 9.870596746095018e-06, "loss": 0.6115, "step": 217450 }, { "epoch": 2.41, "learning_rate": 9.869674018956506e-06, "loss": 0.6193, "step": 217455 }, { "epoch": 2.41, "learning_rate": 9.868751291817994e-06, "loss": 0.6374, "step": 217460 }, { "epoch": 2.41, "learning_rate": 9.867828564679482e-06, "loss": 0.5346, "step": 217465 }, { "epoch": 2.41, "learning_rate": 9.86690583754097e-06, "loss": 0.6203, "step": 217470 }, { "epoch": 2.41, "learning_rate": 9.865983110402458e-06, "loss": 0.6142, "step": 217475 }, { "epoch": 2.41, "learning_rate": 9.865060383263945e-06, "loss": 0.6008, "step": 217480 }, { "epoch": 2.41, "learning_rate": 9.864137656125433e-06, "loss": 0.5185, "step": 217485 }, { "epoch": 2.41, "learning_rate": 9.863214928986921e-06, "loss": 0.6, "step": 217490 }, { "epoch": 2.41, "learning_rate": 9.862292201848407e-06, "loss": 0.5753, "step": 217495 }, { "epoch": 2.41, "learning_rate": 9.861369474709895e-06, "loss": 0.6257, "step": 217500 }, { "epoch": 2.41, "learning_rate": 9.860446747571383e-06, "loss": 0.612, "step": 217505 }, { "epoch": 2.41, "learning_rate": 9.859524020432869e-06, "loss": 0.5834, "step": 217510 }, { "epoch": 2.41, "learning_rate": 9.858601293294357e-06, "loss": 0.6209, "step": 217515 }, { "epoch": 2.41, "learning_rate": 9.857678566155845e-06, "loss": 0.5746, "step": 217520 }, { "epoch": 2.41, "learning_rate": 9.856755839017333e-06, "loss": 0.5772, "step": 217525 }, { "epoch": 2.41, "learning_rate": 9.85583311187882e-06, "loss": 0.5807, "step": 217530 }, { "epoch": 2.41, "learning_rate": 9.854910384740308e-06, "loss": 0.5755, "step": 217535 }, { "epoch": 2.41, "learning_rate": 9.853987657601796e-06, "loss": 0.5779, "step": 217540 }, { "epoch": 2.41, "learning_rate": 9.853064930463284e-06, "loss": 0.5342, "step": 217545 }, { "epoch": 2.41, "learning_rate": 9.852142203324772e-06, "loss": 0.6173, "step": 217550 }, { "epoch": 2.41, "learning_rate": 9.85121947618626e-06, "loss": 0.6376, "step": 217555 }, { "epoch": 2.41, "learning_rate": 9.850296749047746e-06, "loss": 0.5617, "step": 217560 }, { "epoch": 2.41, "learning_rate": 9.849374021909234e-06, "loss": 0.6084, "step": 217565 }, { "epoch": 2.41, "learning_rate": 9.848451294770722e-06, "loss": 0.5742, "step": 217570 }, { "epoch": 2.41, "learning_rate": 9.847528567632208e-06, "loss": 0.6001, "step": 217575 }, { "epoch": 2.41, "learning_rate": 9.846605840493696e-06, "loss": 0.5842, "step": 217580 }, { "epoch": 2.41, "learning_rate": 9.845683113355183e-06, "loss": 0.5473, "step": 217585 }, { "epoch": 2.41, "learning_rate": 9.844760386216671e-06, "loss": 0.5782, "step": 217590 }, { "epoch": 2.41, "learning_rate": 9.843837659078159e-06, "loss": 0.6192, "step": 217595 }, { "epoch": 2.41, "learning_rate": 9.842914931939647e-06, "loss": 0.5929, "step": 217600 }, { "epoch": 2.41, "learning_rate": 9.841992204801135e-06, "loss": 0.6202, "step": 217605 }, { "epoch": 2.41, "learning_rate": 9.841069477662623e-06, "loss": 0.5753, "step": 217610 }, { "epoch": 2.41, "learning_rate": 9.840146750524109e-06, "loss": 0.6235, "step": 217615 }, { "epoch": 2.41, "learning_rate": 9.839224023385597e-06, "loss": 0.6713, "step": 217620 }, { "epoch": 2.41, "learning_rate": 9.838301296247085e-06, "loss": 0.6494, "step": 217625 }, { "epoch": 2.41, "learning_rate": 9.837378569108572e-06, "loss": 0.5791, "step": 217630 }, { "epoch": 2.41, "learning_rate": 9.83645584197006e-06, "loss": 0.578, "step": 217635 }, { "epoch": 2.41, "learning_rate": 9.835533114831548e-06, "loss": 0.6147, "step": 217640 }, { "epoch": 2.41, "learning_rate": 9.834610387693036e-06, "loss": 0.5762, "step": 217645 }, { "epoch": 2.41, "learning_rate": 9.833687660554522e-06, "loss": 0.5794, "step": 217650 }, { "epoch": 2.41, "learning_rate": 9.83276493341601e-06, "loss": 0.5745, "step": 217655 }, { "epoch": 2.41, "learning_rate": 9.831842206277498e-06, "loss": 0.5594, "step": 217660 }, { "epoch": 2.41, "learning_rate": 9.830919479138986e-06, "loss": 0.6029, "step": 217665 }, { "epoch": 2.41, "learning_rate": 9.829996752000473e-06, "loss": 0.5569, "step": 217670 }, { "epoch": 2.41, "learning_rate": 9.82907402486196e-06, "loss": 0.5499, "step": 217675 }, { "epoch": 2.41, "learning_rate": 9.828151297723447e-06, "loss": 0.6359, "step": 217680 }, { "epoch": 2.41, "learning_rate": 9.827228570584935e-06, "loss": 0.5665, "step": 217685 }, { "epoch": 2.41, "learning_rate": 9.826305843446423e-06, "loss": 0.5881, "step": 217690 }, { "epoch": 2.41, "learning_rate": 9.825383116307911e-06, "loss": 0.584, "step": 217695 }, { "epoch": 2.41, "learning_rate": 9.824460389169399e-06, "loss": 0.5747, "step": 217700 }, { "epoch": 2.41, "learning_rate": 9.823537662030887e-06, "loss": 0.6572, "step": 217705 }, { "epoch": 2.41, "learning_rate": 9.822614934892375e-06, "loss": 0.6034, "step": 217710 }, { "epoch": 2.41, "learning_rate": 9.821692207753862e-06, "loss": 0.5665, "step": 217715 }, { "epoch": 2.41, "learning_rate": 9.820769480615349e-06, "loss": 0.6293, "step": 217720 }, { "epoch": 2.41, "learning_rate": 9.819846753476836e-06, "loss": 0.5805, "step": 217725 }, { "epoch": 2.41, "learning_rate": 9.818924026338323e-06, "loss": 0.6005, "step": 217730 }, { "epoch": 2.41, "learning_rate": 9.81800129919981e-06, "loss": 0.5978, "step": 217735 }, { "epoch": 2.41, "learning_rate": 9.817078572061298e-06, "loss": 0.5703, "step": 217740 }, { "epoch": 2.41, "learning_rate": 9.816155844922786e-06, "loss": 0.6145, "step": 217745 }, { "epoch": 2.41, "learning_rate": 9.815233117784274e-06, "loss": 0.5897, "step": 217750 }, { "epoch": 2.41, "learning_rate": 9.814310390645762e-06, "loss": 0.6059, "step": 217755 }, { "epoch": 2.41, "learning_rate": 9.81338766350725e-06, "loss": 0.6, "step": 217760 }, { "epoch": 2.41, "learning_rate": 9.812464936368738e-06, "loss": 0.5551, "step": 217765 }, { "epoch": 2.41, "learning_rate": 9.811542209230225e-06, "loss": 0.6116, "step": 217770 }, { "epoch": 2.41, "learning_rate": 9.810619482091713e-06, "loss": 0.5833, "step": 217775 }, { "epoch": 2.41, "learning_rate": 9.8096967549532e-06, "loss": 0.6155, "step": 217780 }, { "epoch": 2.41, "learning_rate": 9.808774027814687e-06, "loss": 0.6027, "step": 217785 }, { "epoch": 2.41, "learning_rate": 9.807851300676175e-06, "loss": 0.6185, "step": 217790 }, { "epoch": 2.41, "learning_rate": 9.806928573537663e-06, "loss": 0.5742, "step": 217795 }, { "epoch": 2.41, "learning_rate": 9.806005846399149e-06, "loss": 0.5527, "step": 217800 }, { "epoch": 2.41, "learning_rate": 9.805083119260637e-06, "loss": 0.6616, "step": 217805 }, { "epoch": 2.41, "learning_rate": 9.804160392122125e-06, "loss": 0.5983, "step": 217810 }, { "epoch": 2.41, "learning_rate": 9.803237664983613e-06, "loss": 0.5988, "step": 217815 }, { "epoch": 2.41, "learning_rate": 9.8023149378451e-06, "loss": 0.6011, "step": 217820 }, { "epoch": 2.41, "learning_rate": 9.801392210706588e-06, "loss": 0.5523, "step": 217825 }, { "epoch": 2.41, "learning_rate": 9.800469483568076e-06, "loss": 0.5625, "step": 217830 }, { "epoch": 2.41, "learning_rate": 9.799546756429562e-06, "loss": 0.5342, "step": 217835 }, { "epoch": 2.41, "learning_rate": 9.79862402929105e-06, "loss": 0.5959, "step": 217840 }, { "epoch": 2.41, "learning_rate": 9.797701302152538e-06, "loss": 0.6001, "step": 217845 }, { "epoch": 2.41, "learning_rate": 9.796778575014026e-06, "loss": 0.6084, "step": 217850 }, { "epoch": 2.41, "learning_rate": 9.795855847875514e-06, "loss": 0.5731, "step": 217855 }, { "epoch": 2.41, "learning_rate": 9.794933120737002e-06, "loss": 0.6386, "step": 217860 }, { "epoch": 2.41, "learning_rate": 9.79401039359849e-06, "loss": 0.549, "step": 217865 }, { "epoch": 2.41, "learning_rate": 9.793087666459977e-06, "loss": 0.5657, "step": 217870 }, { "epoch": 2.41, "learning_rate": 9.792164939321463e-06, "loss": 0.6061, "step": 217875 }, { "epoch": 2.41, "learning_rate": 9.791242212182951e-06, "loss": 0.6198, "step": 217880 }, { "epoch": 2.41, "learning_rate": 9.790319485044439e-06, "loss": 0.6068, "step": 217885 }, { "epoch": 2.41, "learning_rate": 9.789396757905927e-06, "loss": 0.5615, "step": 217890 }, { "epoch": 2.41, "learning_rate": 9.788474030767413e-06, "loss": 0.6498, "step": 217895 }, { "epoch": 2.41, "learning_rate": 9.787551303628901e-06, "loss": 0.5575, "step": 217900 }, { "epoch": 2.41, "learning_rate": 9.786628576490389e-06, "loss": 0.6264, "step": 217905 }, { "epoch": 2.41, "learning_rate": 9.785705849351877e-06, "loss": 0.5619, "step": 217910 }, { "epoch": 2.41, "learning_rate": 9.784783122213365e-06, "loss": 0.597, "step": 217915 }, { "epoch": 2.41, "learning_rate": 9.783860395074852e-06, "loss": 0.6039, "step": 217920 }, { "epoch": 2.41, "learning_rate": 9.78293766793634e-06, "loss": 0.6276, "step": 217925 }, { "epoch": 2.41, "learning_rate": 9.782014940797828e-06, "loss": 0.5945, "step": 217930 }, { "epoch": 2.41, "learning_rate": 9.781092213659316e-06, "loss": 0.5571, "step": 217935 }, { "epoch": 2.41, "learning_rate": 9.780169486520804e-06, "loss": 0.5832, "step": 217940 }, { "epoch": 2.41, "learning_rate": 9.77924675938229e-06, "loss": 0.6125, "step": 217945 }, { "epoch": 2.41, "learning_rate": 9.778324032243778e-06, "loss": 0.6148, "step": 217950 }, { "epoch": 2.41, "learning_rate": 9.777401305105264e-06, "loss": 0.5236, "step": 217955 }, { "epoch": 2.41, "learning_rate": 9.776478577966752e-06, "loss": 0.6343, "step": 217960 }, { "epoch": 2.41, "learning_rate": 9.77555585082824e-06, "loss": 0.6032, "step": 217965 }, { "epoch": 2.41, "learning_rate": 9.774633123689727e-06, "loss": 0.6031, "step": 217970 }, { "epoch": 2.41, "learning_rate": 9.773710396551215e-06, "loss": 0.5746, "step": 217975 }, { "epoch": 2.41, "learning_rate": 9.772787669412703e-06, "loss": 0.6085, "step": 217980 }, { "epoch": 2.41, "learning_rate": 9.771864942274191e-06, "loss": 0.5298, "step": 217985 }, { "epoch": 2.41, "learning_rate": 9.770942215135679e-06, "loss": 0.5774, "step": 217990 }, { "epoch": 2.41, "learning_rate": 9.770019487997167e-06, "loss": 0.6305, "step": 217995 }, { "epoch": 2.41, "learning_rate": 9.769096760858653e-06, "loss": 0.6268, "step": 218000 }, { "epoch": 2.41, "eval_loss": 0.5725258588790894, "eval_runtime": 70.9102, "eval_samples_per_second": 28.205, "eval_steps_per_second": 14.102, "step": 218000 }, { "epoch": 2.41, "learning_rate": 9.76817403372014e-06, "loss": 0.6084, "step": 218005 }, { "epoch": 2.41, "learning_rate": 9.767251306581629e-06, "loss": 0.4981, "step": 218010 }, { "epoch": 2.41, "learning_rate": 9.766328579443116e-06, "loss": 0.5903, "step": 218015 }, { "epoch": 2.41, "learning_rate": 9.765405852304604e-06, "loss": 0.5733, "step": 218020 }, { "epoch": 2.41, "learning_rate": 9.764483125166092e-06, "loss": 0.5887, "step": 218025 }, { "epoch": 2.41, "learning_rate": 9.763560398027578e-06, "loss": 0.5589, "step": 218030 }, { "epoch": 2.41, "learning_rate": 9.762637670889066e-06, "loss": 0.5855, "step": 218035 }, { "epoch": 2.41, "learning_rate": 9.761714943750554e-06, "loss": 0.5589, "step": 218040 }, { "epoch": 2.41, "learning_rate": 9.760792216612042e-06, "loss": 0.6128, "step": 218045 }, { "epoch": 2.41, "learning_rate": 9.75986948947353e-06, "loss": 0.5843, "step": 218050 }, { "epoch": 2.41, "learning_rate": 9.758946762335017e-06, "loss": 0.5709, "step": 218055 }, { "epoch": 2.41, "learning_rate": 9.758024035196504e-06, "loss": 0.5978, "step": 218060 }, { "epoch": 2.41, "learning_rate": 9.757101308057991e-06, "loss": 0.5761, "step": 218065 }, { "epoch": 2.41, "learning_rate": 9.75617858091948e-06, "loss": 0.6069, "step": 218070 }, { "epoch": 2.41, "learning_rate": 9.755255853780967e-06, "loss": 0.6057, "step": 218075 }, { "epoch": 2.41, "learning_rate": 9.754333126642455e-06, "loss": 0.6096, "step": 218080 }, { "epoch": 2.41, "learning_rate": 9.753410399503943e-06, "loss": 0.6434, "step": 218085 }, { "epoch": 2.41, "learning_rate": 9.75248767236543e-06, "loss": 0.6056, "step": 218090 }, { "epoch": 2.41, "learning_rate": 9.751564945226919e-06, "loss": 0.5938, "step": 218095 }, { "epoch": 2.41, "learning_rate": 9.750642218088406e-06, "loss": 0.5941, "step": 218100 }, { "epoch": 2.42, "learning_rate": 9.749719490949893e-06, "loss": 0.5292, "step": 218105 }, { "epoch": 2.42, "learning_rate": 9.74879676381138e-06, "loss": 0.6327, "step": 218110 }, { "epoch": 2.42, "learning_rate": 9.747874036672867e-06, "loss": 0.5746, "step": 218115 }, { "epoch": 2.42, "learning_rate": 9.746951309534354e-06, "loss": 0.65, "step": 218120 }, { "epoch": 2.42, "learning_rate": 9.746028582395842e-06, "loss": 0.6012, "step": 218125 }, { "epoch": 2.42, "learning_rate": 9.74510585525733e-06, "loss": 0.613, "step": 218130 }, { "epoch": 2.42, "learning_rate": 9.744183128118818e-06, "loss": 0.5884, "step": 218135 }, { "epoch": 2.42, "learning_rate": 9.743260400980306e-06, "loss": 0.5629, "step": 218140 }, { "epoch": 2.42, "learning_rate": 9.742337673841794e-06, "loss": 0.5789, "step": 218145 }, { "epoch": 2.42, "learning_rate": 9.741414946703282e-06, "loss": 0.62, "step": 218150 }, { "epoch": 2.42, "learning_rate": 9.74049221956477e-06, "loss": 0.5797, "step": 218155 }, { "epoch": 2.42, "learning_rate": 9.739569492426257e-06, "loss": 0.5726, "step": 218160 }, { "epoch": 2.42, "learning_rate": 9.738646765287743e-06, "loss": 0.629, "step": 218165 }, { "epoch": 2.42, "learning_rate": 9.737724038149231e-06, "loss": 0.608, "step": 218170 }, { "epoch": 2.42, "learning_rate": 9.736801311010719e-06, "loss": 0.658, "step": 218175 }, { "epoch": 2.42, "learning_rate": 9.735878583872205e-06, "loss": 0.59, "step": 218180 }, { "epoch": 2.42, "learning_rate": 9.734955856733693e-06, "loss": 0.5995, "step": 218185 }, { "epoch": 2.42, "learning_rate": 9.734033129595181e-06, "loss": 0.626, "step": 218190 }, { "epoch": 2.42, "learning_rate": 9.733110402456669e-06, "loss": 0.6124, "step": 218195 }, { "epoch": 2.42, "learning_rate": 9.732187675318157e-06, "loss": 0.6311, "step": 218200 }, { "epoch": 2.42, "learning_rate": 9.731264948179644e-06, "loss": 0.608, "step": 218205 }, { "epoch": 2.42, "learning_rate": 9.730342221041132e-06, "loss": 0.5719, "step": 218210 }, { "epoch": 2.42, "learning_rate": 9.72941949390262e-06, "loss": 0.651, "step": 218215 }, { "epoch": 2.42, "learning_rate": 9.728496766764108e-06, "loss": 0.5899, "step": 218220 }, { "epoch": 2.42, "learning_rate": 9.727574039625594e-06, "loss": 0.6161, "step": 218225 }, { "epoch": 2.42, "learning_rate": 9.726651312487082e-06, "loss": 0.5944, "step": 218230 }, { "epoch": 2.42, "learning_rate": 9.72572858534857e-06, "loss": 0.6026, "step": 218235 }, { "epoch": 2.42, "learning_rate": 9.724805858210058e-06, "loss": 0.6217, "step": 218240 }, { "epoch": 2.42, "learning_rate": 9.723883131071546e-06, "loss": 0.5812, "step": 218245 }, { "epoch": 2.42, "learning_rate": 9.722960403933033e-06, "loss": 0.6086, "step": 218250 }, { "epoch": 2.42, "learning_rate": 9.72203767679452e-06, "loss": 0.617, "step": 218255 }, { "epoch": 2.42, "learning_rate": 9.721114949656007e-06, "loss": 0.5633, "step": 218260 }, { "epoch": 2.42, "learning_rate": 9.720192222517495e-06, "loss": 0.5336, "step": 218265 }, { "epoch": 2.42, "learning_rate": 9.719269495378983e-06, "loss": 0.6637, "step": 218270 }, { "epoch": 2.42, "learning_rate": 9.718346768240471e-06, "loss": 0.559, "step": 218275 }, { "epoch": 2.42, "learning_rate": 9.717424041101957e-06, "loss": 0.6256, "step": 218280 }, { "epoch": 2.42, "learning_rate": 9.716501313963445e-06, "loss": 0.6124, "step": 218285 }, { "epoch": 2.42, "learning_rate": 9.715578586824933e-06, "loss": 0.5918, "step": 218290 }, { "epoch": 2.42, "learning_rate": 9.71465585968642e-06, "loss": 0.6102, "step": 218295 }, { "epoch": 2.42, "learning_rate": 9.713733132547909e-06, "loss": 0.5764, "step": 218300 }, { "epoch": 2.42, "learning_rate": 9.712810405409396e-06, "loss": 0.5756, "step": 218305 }, { "epoch": 2.42, "learning_rate": 9.711887678270884e-06, "loss": 0.5788, "step": 218310 }, { "epoch": 2.42, "learning_rate": 9.710964951132372e-06, "loss": 0.6475, "step": 218315 }, { "epoch": 2.42, "learning_rate": 9.71004222399386e-06, "loss": 0.5433, "step": 218320 }, { "epoch": 2.42, "learning_rate": 9.709119496855348e-06, "loss": 0.5909, "step": 218325 }, { "epoch": 2.42, "learning_rate": 9.708196769716834e-06, "loss": 0.5489, "step": 218330 }, { "epoch": 2.42, "learning_rate": 9.707274042578322e-06, "loss": 0.6471, "step": 218335 }, { "epoch": 2.42, "learning_rate": 9.706351315439808e-06, "loss": 0.6316, "step": 218340 }, { "epoch": 2.42, "learning_rate": 9.705428588301296e-06, "loss": 0.6632, "step": 218345 }, { "epoch": 2.42, "learning_rate": 9.704505861162784e-06, "loss": 0.5482, "step": 218350 }, { "epoch": 2.42, "learning_rate": 9.703583134024271e-06, "loss": 0.6011, "step": 218355 }, { "epoch": 2.42, "learning_rate": 9.70266040688576e-06, "loss": 0.578, "step": 218360 }, { "epoch": 2.42, "learning_rate": 9.701737679747247e-06, "loss": 0.576, "step": 218365 }, { "epoch": 2.42, "learning_rate": 9.700814952608735e-06, "loss": 0.5513, "step": 218370 }, { "epoch": 2.42, "learning_rate": 9.699892225470223e-06, "loss": 0.5875, "step": 218375 }, { "epoch": 2.42, "learning_rate": 9.69896949833171e-06, "loss": 0.5937, "step": 218380 }, { "epoch": 2.42, "learning_rate": 9.698046771193197e-06, "loss": 0.6304, "step": 218385 }, { "epoch": 2.42, "learning_rate": 9.697124044054685e-06, "loss": 0.603, "step": 218390 }, { "epoch": 2.42, "learning_rate": 9.696201316916173e-06, "loss": 0.5414, "step": 218395 }, { "epoch": 2.42, "learning_rate": 9.69527858977766e-06, "loss": 0.5635, "step": 218400 }, { "epoch": 2.42, "learning_rate": 9.694355862639147e-06, "loss": 0.5367, "step": 218405 }, { "epoch": 2.42, "learning_rate": 9.693433135500634e-06, "loss": 0.5885, "step": 218410 }, { "epoch": 2.42, "learning_rate": 9.692510408362122e-06, "loss": 0.6333, "step": 218415 }, { "epoch": 2.42, "learning_rate": 9.69158768122361e-06, "loss": 0.5862, "step": 218420 }, { "epoch": 2.42, "learning_rate": 9.690664954085098e-06, "loss": 0.5888, "step": 218425 }, { "epoch": 2.42, "learning_rate": 9.689742226946586e-06, "loss": 0.5583, "step": 218430 }, { "epoch": 2.42, "learning_rate": 9.688819499808074e-06, "loss": 0.5675, "step": 218435 }, { "epoch": 2.42, "learning_rate": 9.687896772669562e-06, "loss": 0.6071, "step": 218440 }, { "epoch": 2.42, "learning_rate": 9.686974045531048e-06, "loss": 0.5683, "step": 218445 }, { "epoch": 2.42, "learning_rate": 9.686051318392536e-06, "loss": 0.5784, "step": 218450 }, { "epoch": 2.42, "learning_rate": 9.685128591254023e-06, "loss": 0.6091, "step": 218455 }, { "epoch": 2.42, "learning_rate": 9.684205864115511e-06, "loss": 0.6281, "step": 218460 }, { "epoch": 2.42, "learning_rate": 9.683283136976999e-06, "loss": 0.6198, "step": 218465 }, { "epoch": 2.42, "learning_rate": 9.682360409838487e-06, "loss": 0.6125, "step": 218470 }, { "epoch": 2.42, "learning_rate": 9.681437682699975e-06, "loss": 0.5823, "step": 218475 }, { "epoch": 2.42, "learning_rate": 9.680514955561463e-06, "loss": 0.5621, "step": 218480 }, { "epoch": 2.42, "learning_rate": 9.679592228422949e-06, "loss": 0.5812, "step": 218485 }, { "epoch": 2.42, "learning_rate": 9.678669501284437e-06, "loss": 0.565, "step": 218490 }, { "epoch": 2.42, "learning_rate": 9.677746774145924e-06, "loss": 0.6008, "step": 218495 }, { "epoch": 2.42, "learning_rate": 9.676824047007412e-06, "loss": 0.5995, "step": 218500 }, { "epoch": 2.42, "learning_rate": 9.675901319868898e-06, "loss": 0.6214, "step": 218505 }, { "epoch": 2.42, "learning_rate": 9.674978592730386e-06, "loss": 0.6172, "step": 218510 }, { "epoch": 2.42, "learning_rate": 9.674055865591874e-06, "loss": 0.5479, "step": 218515 }, { "epoch": 2.42, "learning_rate": 9.673133138453362e-06, "loss": 0.5844, "step": 218520 }, { "epoch": 2.42, "learning_rate": 9.67221041131485e-06, "loss": 0.5655, "step": 218525 }, { "epoch": 2.42, "learning_rate": 9.671287684176338e-06, "loss": 0.5351, "step": 218530 }, { "epoch": 2.42, "learning_rate": 9.670364957037826e-06, "loss": 0.531, "step": 218535 }, { "epoch": 2.42, "learning_rate": 9.669442229899313e-06, "loss": 0.6185, "step": 218540 }, { "epoch": 2.42, "learning_rate": 9.668519502760801e-06, "loss": 0.6039, "step": 218545 }, { "epoch": 2.42, "learning_rate": 9.667596775622287e-06, "loss": 0.5403, "step": 218550 }, { "epoch": 2.42, "learning_rate": 9.666674048483775e-06, "loss": 0.5594, "step": 218555 }, { "epoch": 2.42, "learning_rate": 9.665751321345261e-06, "loss": 0.6197, "step": 218560 }, { "epoch": 2.42, "learning_rate": 9.66482859420675e-06, "loss": 0.5708, "step": 218565 }, { "epoch": 2.42, "learning_rate": 9.663905867068237e-06, "loss": 0.571, "step": 218570 }, { "epoch": 2.42, "learning_rate": 9.662983139929725e-06, "loss": 0.5771, "step": 218575 }, { "epoch": 2.42, "learning_rate": 9.662060412791213e-06, "loss": 0.587, "step": 218580 }, { "epoch": 2.42, "learning_rate": 9.6611376856527e-06, "loss": 0.5936, "step": 218585 }, { "epoch": 2.42, "learning_rate": 9.660214958514189e-06, "loss": 0.5944, "step": 218590 }, { "epoch": 2.42, "learning_rate": 9.659292231375676e-06, "loss": 0.5131, "step": 218595 }, { "epoch": 2.42, "learning_rate": 9.658369504237164e-06, "loss": 0.6019, "step": 218600 }, { "epoch": 2.42, "learning_rate": 9.657446777098652e-06, "loss": 0.5854, "step": 218605 }, { "epoch": 2.42, "learning_rate": 9.656524049960138e-06, "loss": 0.5907, "step": 218610 }, { "epoch": 2.42, "learning_rate": 9.655601322821626e-06, "loss": 0.5562, "step": 218615 }, { "epoch": 2.42, "learning_rate": 9.654678595683114e-06, "loss": 0.6076, "step": 218620 }, { "epoch": 2.42, "learning_rate": 9.653755868544602e-06, "loss": 0.5903, "step": 218625 }, { "epoch": 2.42, "learning_rate": 9.65283314140609e-06, "loss": 0.6258, "step": 218630 }, { "epoch": 2.42, "learning_rate": 9.651910414267576e-06, "loss": 0.5879, "step": 218635 }, { "epoch": 2.42, "learning_rate": 9.650987687129064e-06, "loss": 0.5592, "step": 218640 }, { "epoch": 2.42, "learning_rate": 9.650064959990551e-06, "loss": 0.5769, "step": 218645 }, { "epoch": 2.42, "learning_rate": 9.64914223285204e-06, "loss": 0.6111, "step": 218650 }, { "epoch": 2.42, "learning_rate": 9.648219505713527e-06, "loss": 0.588, "step": 218655 }, { "epoch": 2.42, "learning_rate": 9.647296778575015e-06, "loss": 0.5618, "step": 218660 }, { "epoch": 2.42, "learning_rate": 9.646374051436501e-06, "loss": 0.5745, "step": 218665 }, { "epoch": 2.42, "learning_rate": 9.645451324297989e-06, "loss": 0.5832, "step": 218670 }, { "epoch": 2.42, "learning_rate": 9.644528597159477e-06, "loss": 0.6107, "step": 218675 }, { "epoch": 2.42, "learning_rate": 9.643605870020965e-06, "loss": 0.6214, "step": 218680 }, { "epoch": 2.42, "learning_rate": 9.642683142882453e-06, "loss": 0.6513, "step": 218685 }, { "epoch": 2.42, "learning_rate": 9.64176041574394e-06, "loss": 0.5477, "step": 218690 }, { "epoch": 2.42, "learning_rate": 9.640837688605428e-06, "loss": 0.6002, "step": 218695 }, { "epoch": 2.42, "learning_rate": 9.639914961466916e-06, "loss": 0.6326, "step": 218700 }, { "epoch": 2.42, "learning_rate": 9.638992234328404e-06, "loss": 0.63, "step": 218705 }, { "epoch": 2.42, "learning_rate": 9.63806950718989e-06, "loss": 0.6069, "step": 218710 }, { "epoch": 2.42, "learning_rate": 9.637146780051378e-06, "loss": 0.6021, "step": 218715 }, { "epoch": 2.42, "learning_rate": 9.636224052912866e-06, "loss": 0.6357, "step": 218720 }, { "epoch": 2.42, "learning_rate": 9.635301325774352e-06, "loss": 0.5985, "step": 218725 }, { "epoch": 2.42, "learning_rate": 9.63437859863584e-06, "loss": 0.559, "step": 218730 }, { "epoch": 2.42, "learning_rate": 9.633455871497328e-06, "loss": 0.5992, "step": 218735 }, { "epoch": 2.42, "learning_rate": 9.632533144358815e-06, "loss": 0.5681, "step": 218740 }, { "epoch": 2.42, "learning_rate": 9.631610417220303e-06, "loss": 0.5955, "step": 218745 }, { "epoch": 2.42, "learning_rate": 9.630687690081791e-06, "loss": 0.5641, "step": 218750 }, { "epoch": 2.42, "learning_rate": 9.629764962943279e-06, "loss": 0.5852, "step": 218755 }, { "epoch": 2.42, "learning_rate": 9.628842235804767e-06, "loss": 0.5768, "step": 218760 }, { "epoch": 2.42, "learning_rate": 9.627919508666255e-06, "loss": 0.573, "step": 218765 }, { "epoch": 2.42, "learning_rate": 9.626996781527743e-06, "loss": 0.5751, "step": 218770 }, { "epoch": 2.42, "learning_rate": 9.626074054389229e-06, "loss": 0.5718, "step": 218775 }, { "epoch": 2.42, "learning_rate": 9.625151327250717e-06, "loss": 0.5698, "step": 218780 }, { "epoch": 2.42, "learning_rate": 9.624228600112203e-06, "loss": 0.5625, "step": 218785 }, { "epoch": 2.42, "learning_rate": 9.62330587297369e-06, "loss": 0.6277, "step": 218790 }, { "epoch": 2.42, "learning_rate": 9.622383145835178e-06, "loss": 0.6718, "step": 218795 }, { "epoch": 2.42, "learning_rate": 9.621460418696666e-06, "loss": 0.5611, "step": 218800 }, { "epoch": 2.42, "learning_rate": 9.620537691558154e-06, "loss": 0.6073, "step": 218805 }, { "epoch": 2.42, "learning_rate": 9.619614964419642e-06, "loss": 0.5723, "step": 218810 }, { "epoch": 2.42, "learning_rate": 9.61869223728113e-06, "loss": 0.6024, "step": 218815 }, { "epoch": 2.42, "learning_rate": 9.617769510142618e-06, "loss": 0.5977, "step": 218820 }, { "epoch": 2.42, "learning_rate": 9.616846783004106e-06, "loss": 0.5959, "step": 218825 }, { "epoch": 2.42, "learning_rate": 9.615924055865592e-06, "loss": 0.5649, "step": 218830 }, { "epoch": 2.42, "learning_rate": 9.61500132872708e-06, "loss": 0.5929, "step": 218835 }, { "epoch": 2.42, "learning_rate": 9.614078601588567e-06, "loss": 0.614, "step": 218840 }, { "epoch": 2.42, "learning_rate": 9.613155874450055e-06, "loss": 0.624, "step": 218845 }, { "epoch": 2.42, "learning_rate": 9.612233147311543e-06, "loss": 0.5679, "step": 218850 }, { "epoch": 2.42, "learning_rate": 9.611310420173031e-06, "loss": 0.5955, "step": 218855 }, { "epoch": 2.42, "learning_rate": 9.610387693034519e-06, "loss": 0.6402, "step": 218860 }, { "epoch": 2.42, "learning_rate": 9.609464965896005e-06, "loss": 0.5546, "step": 218865 }, { "epoch": 2.42, "learning_rate": 9.608542238757493e-06, "loss": 0.6303, "step": 218870 }, { "epoch": 2.42, "learning_rate": 9.60761951161898e-06, "loss": 0.5953, "step": 218875 }, { "epoch": 2.42, "learning_rate": 9.606696784480468e-06, "loss": 0.6127, "step": 218880 }, { "epoch": 2.42, "learning_rate": 9.605774057341956e-06, "loss": 0.5582, "step": 218885 }, { "epoch": 2.42, "learning_rate": 9.604851330203442e-06, "loss": 0.5519, "step": 218890 }, { "epoch": 2.42, "learning_rate": 9.60392860306493e-06, "loss": 0.5624, "step": 218895 }, { "epoch": 2.42, "learning_rate": 9.603005875926418e-06, "loss": 0.6267, "step": 218900 }, { "epoch": 2.42, "learning_rate": 9.602083148787906e-06, "loss": 0.6049, "step": 218905 }, { "epoch": 2.42, "learning_rate": 9.601160421649394e-06, "loss": 0.5884, "step": 218910 }, { "epoch": 2.42, "learning_rate": 9.600237694510882e-06, "loss": 0.6297, "step": 218915 }, { "epoch": 2.42, "learning_rate": 9.59931496737237e-06, "loss": 0.5741, "step": 218920 }, { "epoch": 2.42, "learning_rate": 9.598392240233857e-06, "loss": 0.5662, "step": 218925 }, { "epoch": 2.42, "learning_rate": 9.597469513095345e-06, "loss": 0.5772, "step": 218930 }, { "epoch": 2.42, "learning_rate": 9.596546785956831e-06, "loss": 0.5398, "step": 218935 }, { "epoch": 2.42, "learning_rate": 9.59562405881832e-06, "loss": 0.5952, "step": 218940 }, { "epoch": 2.42, "learning_rate": 9.594701331679805e-06, "loss": 0.577, "step": 218945 }, { "epoch": 2.42, "learning_rate": 9.593778604541293e-06, "loss": 0.5621, "step": 218950 }, { "epoch": 2.42, "learning_rate": 9.592855877402781e-06, "loss": 0.5539, "step": 218955 }, { "epoch": 2.42, "learning_rate": 9.591933150264269e-06, "loss": 0.5818, "step": 218960 }, { "epoch": 2.42, "learning_rate": 9.591010423125757e-06, "loss": 0.5677, "step": 218965 }, { "epoch": 2.42, "learning_rate": 9.590087695987245e-06, "loss": 0.5641, "step": 218970 }, { "epoch": 2.42, "learning_rate": 9.589164968848733e-06, "loss": 0.6146, "step": 218975 }, { "epoch": 2.42, "learning_rate": 9.58824224171022e-06, "loss": 0.6027, "step": 218980 }, { "epoch": 2.42, "learning_rate": 9.587319514571708e-06, "loss": 0.5802, "step": 218985 }, { "epoch": 2.42, "learning_rate": 9.586396787433196e-06, "loss": 0.585, "step": 218990 }, { "epoch": 2.42, "learning_rate": 9.585474060294682e-06, "loss": 0.5978, "step": 218995 }, { "epoch": 2.42, "learning_rate": 9.58455133315617e-06, "loss": 0.5885, "step": 219000 }, { "epoch": 2.42, "eval_loss": 0.5771087408065796, "eval_runtime": 69.3553, "eval_samples_per_second": 28.837, "eval_steps_per_second": 14.419, "step": 219000 }, { "epoch": 2.42, "learning_rate": 9.583628606017658e-06, "loss": 0.5636, "step": 219005 }, { "epoch": 2.43, "learning_rate": 9.582705878879146e-06, "loss": 0.5491, "step": 219010 }, { "epoch": 2.43, "learning_rate": 9.581783151740632e-06, "loss": 0.5808, "step": 219015 }, { "epoch": 2.43, "learning_rate": 9.58086042460212e-06, "loss": 0.5652, "step": 219020 }, { "epoch": 2.43, "learning_rate": 9.579937697463608e-06, "loss": 0.6045, "step": 219025 }, { "epoch": 2.43, "learning_rate": 9.579014970325095e-06, "loss": 0.5678, "step": 219030 }, { "epoch": 2.43, "learning_rate": 9.578092243186583e-06, "loss": 0.568, "step": 219035 }, { "epoch": 2.43, "learning_rate": 9.577169516048071e-06, "loss": 0.5808, "step": 219040 }, { "epoch": 2.43, "learning_rate": 9.576246788909559e-06, "loss": 0.6027, "step": 219045 }, { "epoch": 2.43, "learning_rate": 9.575324061771047e-06, "loss": 0.572, "step": 219050 }, { "epoch": 2.43, "learning_rate": 9.574401334632533e-06, "loss": 0.576, "step": 219055 }, { "epoch": 2.43, "learning_rate": 9.573478607494021e-06, "loss": 0.6, "step": 219060 }, { "epoch": 2.43, "learning_rate": 9.572555880355509e-06, "loss": 0.5894, "step": 219065 }, { "epoch": 2.43, "learning_rate": 9.571633153216997e-06, "loss": 0.579, "step": 219070 }, { "epoch": 2.43, "learning_rate": 9.570710426078484e-06, "loss": 0.5736, "step": 219075 }, { "epoch": 2.43, "learning_rate": 9.569787698939972e-06, "loss": 0.638, "step": 219080 }, { "epoch": 2.43, "learning_rate": 9.56886497180146e-06, "loss": 0.5788, "step": 219085 }, { "epoch": 2.43, "learning_rate": 9.567942244662946e-06, "loss": 0.6092, "step": 219090 }, { "epoch": 2.43, "learning_rate": 9.567019517524434e-06, "loss": 0.6108, "step": 219095 }, { "epoch": 2.43, "learning_rate": 9.566096790385922e-06, "loss": 0.5853, "step": 219100 }, { "epoch": 2.43, "learning_rate": 9.56517406324741e-06, "loss": 0.5705, "step": 219105 }, { "epoch": 2.43, "learning_rate": 9.564251336108896e-06, "loss": 0.5377, "step": 219110 }, { "epoch": 2.43, "learning_rate": 9.563328608970384e-06, "loss": 0.5599, "step": 219115 }, { "epoch": 2.43, "learning_rate": 9.562405881831872e-06, "loss": 0.6554, "step": 219120 }, { "epoch": 2.43, "learning_rate": 9.56148315469336e-06, "loss": 0.6116, "step": 219125 }, { "epoch": 2.43, "learning_rate": 9.560560427554847e-06, "loss": 0.6068, "step": 219130 }, { "epoch": 2.43, "learning_rate": 9.559637700416335e-06, "loss": 0.5752, "step": 219135 }, { "epoch": 2.43, "learning_rate": 9.558714973277823e-06, "loss": 0.5332, "step": 219140 }, { "epoch": 2.43, "learning_rate": 9.557792246139311e-06, "loss": 0.5683, "step": 219145 }, { "epoch": 2.43, "learning_rate": 9.556869519000799e-06, "loss": 0.6308, "step": 219150 }, { "epoch": 2.43, "learning_rate": 9.555946791862287e-06, "loss": 0.6434, "step": 219155 }, { "epoch": 2.43, "learning_rate": 9.555024064723773e-06, "loss": 0.5718, "step": 219160 }, { "epoch": 2.43, "learning_rate": 9.55410133758526e-06, "loss": 0.6493, "step": 219165 }, { "epoch": 2.43, "learning_rate": 9.553178610446747e-06, "loss": 0.5976, "step": 219170 }, { "epoch": 2.43, "learning_rate": 9.552255883308235e-06, "loss": 0.5653, "step": 219175 }, { "epoch": 2.43, "learning_rate": 9.551333156169722e-06, "loss": 0.6063, "step": 219180 }, { "epoch": 2.43, "learning_rate": 9.55041042903121e-06, "loss": 0.5859, "step": 219185 }, { "epoch": 2.43, "learning_rate": 9.549487701892698e-06, "loss": 0.5573, "step": 219190 }, { "epoch": 2.43, "learning_rate": 9.548564974754186e-06, "loss": 0.6008, "step": 219195 }, { "epoch": 2.43, "learning_rate": 9.547642247615674e-06, "loss": 0.5722, "step": 219200 }, { "epoch": 2.43, "learning_rate": 9.546719520477162e-06, "loss": 0.5566, "step": 219205 }, { "epoch": 2.43, "learning_rate": 9.54579679333865e-06, "loss": 0.6057, "step": 219210 }, { "epoch": 2.43, "learning_rate": 9.544874066200136e-06, "loss": 0.6159, "step": 219215 }, { "epoch": 2.43, "learning_rate": 9.543951339061624e-06, "loss": 0.5824, "step": 219220 }, { "epoch": 2.43, "learning_rate": 9.543028611923111e-06, "loss": 0.574, "step": 219225 }, { "epoch": 2.43, "learning_rate": 9.5421058847846e-06, "loss": 0.6051, "step": 219230 }, { "epoch": 2.43, "learning_rate": 9.541183157646087e-06, "loss": 0.6052, "step": 219235 }, { "epoch": 2.43, "learning_rate": 9.540260430507573e-06, "loss": 0.6172, "step": 219240 }, { "epoch": 2.43, "learning_rate": 9.539337703369061e-06, "loss": 0.6015, "step": 219245 }, { "epoch": 2.43, "learning_rate": 9.538414976230549e-06, "loss": 0.5579, "step": 219250 }, { "epoch": 2.43, "learning_rate": 9.537492249092037e-06, "loss": 0.5842, "step": 219255 }, { "epoch": 2.43, "learning_rate": 9.536569521953525e-06, "loss": 0.611, "step": 219260 }, { "epoch": 2.43, "learning_rate": 9.535646794815012e-06, "loss": 0.6041, "step": 219265 }, { "epoch": 2.43, "learning_rate": 9.5347240676765e-06, "loss": 0.5778, "step": 219270 }, { "epoch": 2.43, "learning_rate": 9.533801340537987e-06, "loss": 0.6262, "step": 219275 }, { "epoch": 2.43, "learning_rate": 9.532878613399474e-06, "loss": 0.5903, "step": 219280 }, { "epoch": 2.43, "learning_rate": 9.531955886260962e-06, "loss": 0.5937, "step": 219285 }, { "epoch": 2.43, "learning_rate": 9.53103315912245e-06, "loss": 0.5796, "step": 219290 }, { "epoch": 2.43, "learning_rate": 9.530110431983938e-06, "loss": 0.513, "step": 219295 }, { "epoch": 2.43, "learning_rate": 9.529187704845426e-06, "loss": 0.5805, "step": 219300 }, { "epoch": 2.43, "learning_rate": 9.528264977706914e-06, "loss": 0.5706, "step": 219305 }, { "epoch": 2.43, "learning_rate": 9.527342250568401e-06, "loss": 0.6211, "step": 219310 }, { "epoch": 2.43, "learning_rate": 9.52641952342989e-06, "loss": 0.6018, "step": 219315 }, { "epoch": 2.43, "learning_rate": 9.525496796291375e-06, "loss": 0.641, "step": 219320 }, { "epoch": 2.43, "learning_rate": 9.524574069152863e-06, "loss": 0.587, "step": 219325 }, { "epoch": 2.43, "learning_rate": 9.52365134201435e-06, "loss": 0.5896, "step": 219330 }, { "epoch": 2.43, "learning_rate": 9.522728614875837e-06, "loss": 0.5741, "step": 219335 }, { "epoch": 2.43, "learning_rate": 9.521805887737325e-06, "loss": 0.6208, "step": 219340 }, { "epoch": 2.43, "learning_rate": 9.520883160598813e-06, "loss": 0.5996, "step": 219345 }, { "epoch": 2.43, "learning_rate": 9.5199604334603e-06, "loss": 0.5974, "step": 219350 }, { "epoch": 2.43, "learning_rate": 9.519037706321789e-06, "loss": 0.5926, "step": 219355 }, { "epoch": 2.43, "learning_rate": 9.518114979183277e-06, "loss": 0.6266, "step": 219360 }, { "epoch": 2.43, "learning_rate": 9.517192252044764e-06, "loss": 0.5095, "step": 219365 }, { "epoch": 2.43, "learning_rate": 9.516269524906252e-06, "loss": 0.5533, "step": 219370 }, { "epoch": 2.43, "learning_rate": 9.51534679776774e-06, "loss": 0.5715, "step": 219375 }, { "epoch": 2.43, "learning_rate": 9.514424070629226e-06, "loss": 0.6055, "step": 219380 }, { "epoch": 2.43, "learning_rate": 9.513501343490714e-06, "loss": 0.5657, "step": 219385 }, { "epoch": 2.43, "learning_rate": 9.512578616352202e-06, "loss": 0.6125, "step": 219390 }, { "epoch": 2.43, "learning_rate": 9.511655889213688e-06, "loss": 0.6122, "step": 219395 }, { "epoch": 2.43, "learning_rate": 9.510733162075176e-06, "loss": 0.6361, "step": 219400 }, { "epoch": 2.43, "learning_rate": 9.509810434936664e-06, "loss": 0.6175, "step": 219405 }, { "epoch": 2.43, "learning_rate": 9.508887707798152e-06, "loss": 0.5683, "step": 219410 }, { "epoch": 2.43, "learning_rate": 9.50796498065964e-06, "loss": 0.6169, "step": 219415 }, { "epoch": 2.43, "learning_rate": 9.507042253521127e-06, "loss": 0.6035, "step": 219420 }, { "epoch": 2.43, "learning_rate": 9.506119526382615e-06, "loss": 0.5609, "step": 219425 }, { "epoch": 2.43, "learning_rate": 9.505196799244103e-06, "loss": 0.6738, "step": 219430 }, { "epoch": 2.43, "learning_rate": 9.504274072105591e-06, "loss": 0.5805, "step": 219435 }, { "epoch": 2.43, "learning_rate": 9.503351344967077e-06, "loss": 0.5549, "step": 219440 }, { "epoch": 2.43, "learning_rate": 9.502428617828565e-06, "loss": 0.6337, "step": 219445 }, { "epoch": 2.43, "learning_rate": 9.501505890690053e-06, "loss": 0.5883, "step": 219450 }, { "epoch": 2.43, "learning_rate": 9.50058316355154e-06, "loss": 0.6088, "step": 219455 }, { "epoch": 2.43, "learning_rate": 9.499660436413028e-06, "loss": 0.574, "step": 219460 }, { "epoch": 2.43, "learning_rate": 9.498737709274516e-06, "loss": 0.606, "step": 219465 }, { "epoch": 2.43, "learning_rate": 9.497814982136002e-06, "loss": 0.6085, "step": 219470 }, { "epoch": 2.43, "learning_rate": 9.49689225499749e-06, "loss": 0.6077, "step": 219475 }, { "epoch": 2.43, "learning_rate": 9.495969527858978e-06, "loss": 0.6298, "step": 219480 }, { "epoch": 2.43, "learning_rate": 9.495046800720466e-06, "loss": 0.5608, "step": 219485 }, { "epoch": 2.43, "learning_rate": 9.494124073581954e-06, "loss": 0.5794, "step": 219490 }, { "epoch": 2.43, "learning_rate": 9.49320134644344e-06, "loss": 0.5627, "step": 219495 }, { "epoch": 2.43, "learning_rate": 9.492278619304928e-06, "loss": 0.6341, "step": 219500 }, { "epoch": 2.43, "learning_rate": 9.491355892166416e-06, "loss": 0.5953, "step": 219505 }, { "epoch": 2.43, "learning_rate": 9.490433165027904e-06, "loss": 0.6573, "step": 219510 }, { "epoch": 2.43, "learning_rate": 9.489510437889391e-06, "loss": 0.5546, "step": 219515 }, { "epoch": 2.43, "learning_rate": 9.48858771075088e-06, "loss": 0.5685, "step": 219520 }, { "epoch": 2.43, "learning_rate": 9.487664983612367e-06, "loss": 0.6073, "step": 219525 }, { "epoch": 2.43, "learning_rate": 9.486742256473855e-06, "loss": 0.58, "step": 219530 }, { "epoch": 2.43, "learning_rate": 9.485819529335343e-06, "loss": 0.5513, "step": 219535 }, { "epoch": 2.43, "learning_rate": 9.48489680219683e-06, "loss": 0.5733, "step": 219540 }, { "epoch": 2.43, "learning_rate": 9.483974075058317e-06, "loss": 0.5963, "step": 219545 }, { "epoch": 2.43, "learning_rate": 9.483051347919805e-06, "loss": 0.58, "step": 219550 }, { "epoch": 2.43, "learning_rate": 9.48212862078129e-06, "loss": 0.606, "step": 219555 }, { "epoch": 2.43, "learning_rate": 9.481205893642779e-06, "loss": 0.6044, "step": 219560 }, { "epoch": 2.43, "learning_rate": 9.480283166504266e-06, "loss": 0.5941, "step": 219565 }, { "epoch": 2.43, "learning_rate": 9.479360439365754e-06, "loss": 0.5452, "step": 219570 }, { "epoch": 2.43, "learning_rate": 9.478437712227242e-06, "loss": 0.5478, "step": 219575 }, { "epoch": 2.43, "learning_rate": 9.47751498508873e-06, "loss": 0.6255, "step": 219580 }, { "epoch": 2.43, "learning_rate": 9.476592257950218e-06, "loss": 0.5865, "step": 219585 }, { "epoch": 2.43, "learning_rate": 9.475669530811706e-06, "loss": 0.5669, "step": 219590 }, { "epoch": 2.43, "learning_rate": 9.474746803673194e-06, "loss": 0.5857, "step": 219595 }, { "epoch": 2.43, "learning_rate": 9.47382407653468e-06, "loss": 0.5669, "step": 219600 }, { "epoch": 2.43, "learning_rate": 9.472901349396168e-06, "loss": 0.5146, "step": 219605 }, { "epoch": 2.43, "learning_rate": 9.471978622257655e-06, "loss": 0.5989, "step": 219610 }, { "epoch": 2.43, "learning_rate": 9.471055895119143e-06, "loss": 0.5489, "step": 219615 }, { "epoch": 2.43, "learning_rate": 9.47013316798063e-06, "loss": 0.5839, "step": 219620 }, { "epoch": 2.43, "learning_rate": 9.469210440842117e-06, "loss": 0.6186, "step": 219625 }, { "epoch": 2.43, "learning_rate": 9.468287713703605e-06, "loss": 0.5808, "step": 219630 }, { "epoch": 2.43, "learning_rate": 9.467364986565093e-06, "loss": 0.5863, "step": 219635 }, { "epoch": 2.43, "learning_rate": 9.46644225942658e-06, "loss": 0.6289, "step": 219640 }, { "epoch": 2.43, "learning_rate": 9.465519532288069e-06, "loss": 0.601, "step": 219645 }, { "epoch": 2.43, "learning_rate": 9.464596805149557e-06, "loss": 0.5402, "step": 219650 }, { "epoch": 2.43, "learning_rate": 9.463674078011044e-06, "loss": 0.5731, "step": 219655 }, { "epoch": 2.43, "learning_rate": 9.46275135087253e-06, "loss": 0.6723, "step": 219660 }, { "epoch": 2.43, "learning_rate": 9.461828623734018e-06, "loss": 0.5784, "step": 219665 }, { "epoch": 2.43, "learning_rate": 9.460905896595506e-06, "loss": 0.5838, "step": 219670 }, { "epoch": 2.43, "learning_rate": 9.459983169456994e-06, "loss": 0.5749, "step": 219675 }, { "epoch": 2.43, "learning_rate": 9.459060442318482e-06, "loss": 0.6528, "step": 219680 }, { "epoch": 2.43, "learning_rate": 9.45813771517997e-06, "loss": 0.582, "step": 219685 }, { "epoch": 2.43, "learning_rate": 9.457214988041458e-06, "loss": 0.5881, "step": 219690 }, { "epoch": 2.43, "learning_rate": 9.456292260902945e-06, "loss": 0.5303, "step": 219695 }, { "epoch": 2.43, "learning_rate": 9.455369533764432e-06, "loss": 0.5808, "step": 219700 }, { "epoch": 2.43, "learning_rate": 9.45444680662592e-06, "loss": 0.5854, "step": 219705 }, { "epoch": 2.43, "learning_rate": 9.453524079487407e-06, "loss": 0.6226, "step": 219710 }, { "epoch": 2.43, "learning_rate": 9.452601352348895e-06, "loss": 0.5751, "step": 219715 }, { "epoch": 2.43, "learning_rate": 9.451678625210381e-06, "loss": 0.6358, "step": 219720 }, { "epoch": 2.43, "learning_rate": 9.45075589807187e-06, "loss": 0.5412, "step": 219725 }, { "epoch": 2.43, "learning_rate": 9.449833170933357e-06, "loss": 0.6154, "step": 219730 }, { "epoch": 2.43, "learning_rate": 9.448910443794845e-06, "loss": 0.6111, "step": 219735 }, { "epoch": 2.43, "learning_rate": 9.447987716656333e-06, "loss": 0.5828, "step": 219740 }, { "epoch": 2.43, "learning_rate": 9.44706498951782e-06, "loss": 0.5707, "step": 219745 }, { "epoch": 2.43, "learning_rate": 9.446142262379308e-06, "loss": 0.5683, "step": 219750 }, { "epoch": 2.43, "learning_rate": 9.445219535240796e-06, "loss": 0.5877, "step": 219755 }, { "epoch": 2.43, "learning_rate": 9.444296808102284e-06, "loss": 0.6022, "step": 219760 }, { "epoch": 2.43, "learning_rate": 9.44337408096377e-06, "loss": 0.6123, "step": 219765 }, { "epoch": 2.43, "learning_rate": 9.442451353825258e-06, "loss": 0.5652, "step": 219770 }, { "epoch": 2.43, "learning_rate": 9.441528626686744e-06, "loss": 0.6058, "step": 219775 }, { "epoch": 2.43, "learning_rate": 9.440605899548232e-06, "loss": 0.5643, "step": 219780 }, { "epoch": 2.43, "learning_rate": 9.43968317240972e-06, "loss": 0.565, "step": 219785 }, { "epoch": 2.43, "learning_rate": 9.438760445271208e-06, "loss": 0.5961, "step": 219790 }, { "epoch": 2.43, "learning_rate": 9.437837718132696e-06, "loss": 0.5644, "step": 219795 }, { "epoch": 2.43, "learning_rate": 9.436914990994184e-06, "loss": 0.5558, "step": 219800 }, { "epoch": 2.43, "learning_rate": 9.435992263855671e-06, "loss": 0.598, "step": 219805 }, { "epoch": 2.43, "learning_rate": 9.43506953671716e-06, "loss": 0.6062, "step": 219810 }, { "epoch": 2.43, "learning_rate": 9.434146809578647e-06, "loss": 0.6094, "step": 219815 }, { "epoch": 2.43, "learning_rate": 9.433224082440135e-06, "loss": 0.6077, "step": 219820 }, { "epoch": 2.43, "learning_rate": 9.432301355301621e-06, "loss": 0.5815, "step": 219825 }, { "epoch": 2.43, "learning_rate": 9.431378628163109e-06, "loss": 0.5816, "step": 219830 }, { "epoch": 2.43, "learning_rate": 9.430455901024597e-06, "loss": 0.5663, "step": 219835 }, { "epoch": 2.43, "learning_rate": 9.429533173886085e-06, "loss": 0.5874, "step": 219840 }, { "epoch": 2.43, "learning_rate": 9.428610446747572e-06, "loss": 0.6267, "step": 219845 }, { "epoch": 2.43, "learning_rate": 9.427687719609059e-06, "loss": 0.6028, "step": 219850 }, { "epoch": 2.43, "learning_rate": 9.426764992470546e-06, "loss": 0.5567, "step": 219855 }, { "epoch": 2.43, "learning_rate": 9.425842265332034e-06, "loss": 0.6265, "step": 219860 }, { "epoch": 2.43, "learning_rate": 9.424919538193522e-06, "loss": 0.567, "step": 219865 }, { "epoch": 2.43, "learning_rate": 9.42399681105501e-06, "loss": 0.5703, "step": 219870 }, { "epoch": 2.43, "learning_rate": 9.423074083916498e-06, "loss": 0.5739, "step": 219875 }, { "epoch": 2.43, "learning_rate": 9.422151356777984e-06, "loss": 0.545, "step": 219880 }, { "epoch": 2.43, "learning_rate": 9.421228629639472e-06, "loss": 0.6148, "step": 219885 }, { "epoch": 2.43, "learning_rate": 9.42030590250096e-06, "loss": 0.6228, "step": 219890 }, { "epoch": 2.43, "learning_rate": 9.419383175362448e-06, "loss": 0.6012, "step": 219895 }, { "epoch": 2.43, "learning_rate": 9.418460448223935e-06, "loss": 0.6305, "step": 219900 }, { "epoch": 2.43, "learning_rate": 9.417537721085423e-06, "loss": 0.579, "step": 219905 }, { "epoch": 2.44, "learning_rate": 9.416614993946911e-06, "loss": 0.6009, "step": 219910 }, { "epoch": 2.44, "learning_rate": 9.415692266808399e-06, "loss": 0.5422, "step": 219915 }, { "epoch": 2.44, "learning_rate": 9.414769539669887e-06, "loss": 0.6026, "step": 219920 }, { "epoch": 2.44, "learning_rate": 9.413846812531373e-06, "loss": 0.5699, "step": 219925 }, { "epoch": 2.44, "learning_rate": 9.41292408539286e-06, "loss": 0.6486, "step": 219930 }, { "epoch": 2.44, "learning_rate": 9.412001358254349e-06, "loss": 0.5888, "step": 219935 }, { "epoch": 2.44, "learning_rate": 9.411078631115835e-06, "loss": 0.6199, "step": 219940 }, { "epoch": 2.44, "learning_rate": 9.410155903977323e-06, "loss": 0.6433, "step": 219945 }, { "epoch": 2.44, "learning_rate": 9.40923317683881e-06, "loss": 0.5834, "step": 219950 }, { "epoch": 2.44, "learning_rate": 9.408310449700298e-06, "loss": 0.5849, "step": 219955 }, { "epoch": 2.44, "learning_rate": 9.407387722561786e-06, "loss": 0.6052, "step": 219960 }, { "epoch": 2.44, "learning_rate": 9.406464995423274e-06, "loss": 0.5692, "step": 219965 }, { "epoch": 2.44, "learning_rate": 9.405542268284762e-06, "loss": 0.6014, "step": 219970 }, { "epoch": 2.44, "learning_rate": 9.40461954114625e-06, "loss": 0.6142, "step": 219975 }, { "epoch": 2.44, "learning_rate": 9.403696814007738e-06, "loss": 0.5921, "step": 219980 }, { "epoch": 2.44, "learning_rate": 9.402774086869225e-06, "loss": 0.5536, "step": 219985 }, { "epoch": 2.44, "learning_rate": 9.401851359730712e-06, "loss": 0.6177, "step": 219990 }, { "epoch": 2.44, "learning_rate": 9.4009286325922e-06, "loss": 0.6159, "step": 219995 }, { "epoch": 2.44, "learning_rate": 9.400005905453686e-06, "loss": 0.5708, "step": 220000 }, { "epoch": 2.44, "eval_loss": 0.5653793811798096, "eval_runtime": 69.514, "eval_samples_per_second": 28.771, "eval_steps_per_second": 14.386, "step": 220000 }, { "epoch": 2.44, "learning_rate": 9.399083178315173e-06, "loss": 0.5939, "step": 220005 }, { "epoch": 2.44, "learning_rate": 9.398160451176661e-06, "loss": 0.5651, "step": 220010 }, { "epoch": 2.44, "learning_rate": 9.397237724038149e-06, "loss": 0.5982, "step": 220015 }, { "epoch": 2.44, "learning_rate": 9.396314996899637e-06, "loss": 0.5658, "step": 220020 }, { "epoch": 2.44, "learning_rate": 9.395392269761125e-06, "loss": 0.5918, "step": 220025 }, { "epoch": 2.44, "learning_rate": 9.394469542622613e-06, "loss": 0.6341, "step": 220030 }, { "epoch": 2.44, "learning_rate": 9.3935468154841e-06, "loss": 0.569, "step": 220035 }, { "epoch": 2.44, "learning_rate": 9.392624088345588e-06, "loss": 0.577, "step": 220040 }, { "epoch": 2.44, "learning_rate": 9.391701361207075e-06, "loss": 0.557, "step": 220045 }, { "epoch": 2.44, "learning_rate": 9.390778634068562e-06, "loss": 0.5857, "step": 220050 }, { "epoch": 2.44, "learning_rate": 9.38985590693005e-06, "loss": 0.6036, "step": 220055 }, { "epoch": 2.44, "learning_rate": 9.388933179791538e-06, "loss": 0.577, "step": 220060 }, { "epoch": 2.44, "learning_rate": 9.388010452653026e-06, "loss": 0.6099, "step": 220065 }, { "epoch": 2.44, "learning_rate": 9.387087725514514e-06, "loss": 0.5915, "step": 220070 }, { "epoch": 2.44, "learning_rate": 9.386164998376e-06, "loss": 0.567, "step": 220075 }, { "epoch": 2.44, "learning_rate": 9.385242271237488e-06, "loss": 0.5767, "step": 220080 }, { "epoch": 2.44, "learning_rate": 9.384319544098976e-06, "loss": 0.5736, "step": 220085 }, { "epoch": 2.44, "learning_rate": 9.383396816960463e-06, "loss": 0.5618, "step": 220090 }, { "epoch": 2.44, "learning_rate": 9.382474089821951e-06, "loss": 0.5803, "step": 220095 }, { "epoch": 2.44, "learning_rate": 9.38155136268344e-06, "loss": 0.5765, "step": 220100 }, { "epoch": 2.44, "learning_rate": 9.380628635544925e-06, "loss": 0.6149, "step": 220105 }, { "epoch": 2.44, "learning_rate": 9.379705908406413e-06, "loss": 0.5691, "step": 220110 }, { "epoch": 2.44, "learning_rate": 9.378783181267901e-06, "loss": 0.6146, "step": 220115 }, { "epoch": 2.44, "learning_rate": 9.377860454129389e-06, "loss": 0.5598, "step": 220120 }, { "epoch": 2.44, "learning_rate": 9.376937726990877e-06, "loss": 0.6256, "step": 220125 }, { "epoch": 2.44, "learning_rate": 9.376014999852365e-06, "loss": 0.599, "step": 220130 }, { "epoch": 2.44, "learning_rate": 9.375092272713852e-06, "loss": 0.5509, "step": 220135 }, { "epoch": 2.44, "learning_rate": 9.37416954557534e-06, "loss": 0.5434, "step": 220140 }, { "epoch": 2.44, "learning_rate": 9.373246818436828e-06, "loss": 0.6169, "step": 220145 }, { "epoch": 2.44, "learning_rate": 9.372324091298314e-06, "loss": 0.5333, "step": 220150 }, { "epoch": 2.44, "learning_rate": 9.371401364159802e-06, "loss": 0.5784, "step": 220155 }, { "epoch": 2.44, "learning_rate": 9.370478637021288e-06, "loss": 0.5983, "step": 220160 }, { "epoch": 2.44, "learning_rate": 9.369555909882776e-06, "loss": 0.5687, "step": 220165 }, { "epoch": 2.44, "learning_rate": 9.368633182744264e-06, "loss": 0.6139, "step": 220170 }, { "epoch": 2.44, "learning_rate": 9.367710455605752e-06, "loss": 0.5918, "step": 220175 }, { "epoch": 2.44, "learning_rate": 9.36678772846724e-06, "loss": 0.5756, "step": 220180 }, { "epoch": 2.44, "learning_rate": 9.365865001328728e-06, "loss": 0.587, "step": 220185 }, { "epoch": 2.44, "learning_rate": 9.364942274190215e-06, "loss": 0.5709, "step": 220190 }, { "epoch": 2.44, "learning_rate": 9.364019547051703e-06, "loss": 0.5408, "step": 220195 }, { "epoch": 2.44, "learning_rate": 9.363096819913191e-06, "loss": 0.5844, "step": 220200 }, { "epoch": 2.44, "learning_rate": 9.362174092774679e-06, "loss": 0.5594, "step": 220205 }, { "epoch": 2.44, "learning_rate": 9.361251365636165e-06, "loss": 0.6198, "step": 220210 }, { "epoch": 2.44, "learning_rate": 9.360328638497653e-06, "loss": 0.586, "step": 220215 }, { "epoch": 2.44, "learning_rate": 9.35940591135914e-06, "loss": 0.5808, "step": 220220 }, { "epoch": 2.44, "learning_rate": 9.358483184220629e-06, "loss": 0.5719, "step": 220225 }, { "epoch": 2.44, "learning_rate": 9.357560457082115e-06, "loss": 0.6106, "step": 220230 }, { "epoch": 2.44, "learning_rate": 9.356637729943603e-06, "loss": 0.6233, "step": 220235 }, { "epoch": 2.44, "learning_rate": 9.35571500280509e-06, "loss": 0.6014, "step": 220240 }, { "epoch": 2.44, "learning_rate": 9.354792275666578e-06, "loss": 0.6206, "step": 220245 }, { "epoch": 2.44, "learning_rate": 9.353869548528066e-06, "loss": 0.6086, "step": 220250 }, { "epoch": 2.44, "learning_rate": 9.352946821389554e-06, "loss": 0.6511, "step": 220255 }, { "epoch": 2.44, "learning_rate": 9.352024094251042e-06, "loss": 0.5954, "step": 220260 }, { "epoch": 2.44, "learning_rate": 9.35110136711253e-06, "loss": 0.5966, "step": 220265 }, { "epoch": 2.44, "learning_rate": 9.350178639974016e-06, "loss": 0.5883, "step": 220270 }, { "epoch": 2.44, "learning_rate": 9.349255912835504e-06, "loss": 0.6558, "step": 220275 }, { "epoch": 2.44, "learning_rate": 9.348333185696992e-06, "loss": 0.6035, "step": 220280 }, { "epoch": 2.44, "learning_rate": 9.34741045855848e-06, "loss": 0.5888, "step": 220285 }, { "epoch": 2.44, "learning_rate": 9.346487731419967e-06, "loss": 0.6064, "step": 220290 }, { "epoch": 2.44, "learning_rate": 9.345565004281455e-06, "loss": 0.6427, "step": 220295 }, { "epoch": 2.44, "learning_rate": 9.344642277142943e-06, "loss": 0.592, "step": 220300 }, { "epoch": 2.44, "learning_rate": 9.343719550004429e-06, "loss": 0.5318, "step": 220305 }, { "epoch": 2.44, "learning_rate": 9.342796822865917e-06, "loss": 0.5424, "step": 220310 }, { "epoch": 2.44, "learning_rate": 9.341874095727405e-06, "loss": 0.5567, "step": 220315 }, { "epoch": 2.44, "learning_rate": 9.340951368588893e-06, "loss": 0.6055, "step": 220320 }, { "epoch": 2.44, "learning_rate": 9.340028641450379e-06, "loss": 0.571, "step": 220325 }, { "epoch": 2.44, "learning_rate": 9.339105914311867e-06, "loss": 0.5611, "step": 220330 }, { "epoch": 2.44, "learning_rate": 9.338183187173355e-06, "loss": 0.573, "step": 220335 }, { "epoch": 2.44, "learning_rate": 9.337260460034842e-06, "loss": 0.5545, "step": 220340 }, { "epoch": 2.44, "learning_rate": 9.33633773289633e-06, "loss": 0.5794, "step": 220345 }, { "epoch": 2.44, "learning_rate": 9.335415005757818e-06, "loss": 0.5321, "step": 220350 }, { "epoch": 2.44, "learning_rate": 9.334492278619306e-06, "loss": 0.6935, "step": 220355 }, { "epoch": 2.44, "learning_rate": 9.333569551480794e-06, "loss": 0.6293, "step": 220360 }, { "epoch": 2.44, "learning_rate": 9.332646824342282e-06, "loss": 0.6204, "step": 220365 }, { "epoch": 2.44, "learning_rate": 9.33172409720377e-06, "loss": 0.6042, "step": 220370 }, { "epoch": 2.44, "learning_rate": 9.330801370065256e-06, "loss": 0.5436, "step": 220375 }, { "epoch": 2.44, "learning_rate": 9.329878642926743e-06, "loss": 0.5372, "step": 220380 }, { "epoch": 2.44, "learning_rate": 9.32895591578823e-06, "loss": 0.6243, "step": 220385 }, { "epoch": 2.44, "learning_rate": 9.328033188649717e-06, "loss": 0.5563, "step": 220390 }, { "epoch": 2.44, "learning_rate": 9.327110461511205e-06, "loss": 0.5173, "step": 220395 }, { "epoch": 2.44, "learning_rate": 9.326187734372693e-06, "loss": 0.5943, "step": 220400 }, { "epoch": 2.44, "learning_rate": 9.325265007234181e-06, "loss": 0.5765, "step": 220405 }, { "epoch": 2.44, "learning_rate": 9.324342280095669e-06, "loss": 0.6159, "step": 220410 }, { "epoch": 2.44, "learning_rate": 9.323419552957157e-06, "loss": 0.6387, "step": 220415 }, { "epoch": 2.44, "learning_rate": 9.322496825818645e-06, "loss": 0.6368, "step": 220420 }, { "epoch": 2.44, "learning_rate": 9.321574098680132e-06, "loss": 0.6336, "step": 220425 }, { "epoch": 2.44, "learning_rate": 9.320651371541619e-06, "loss": 0.6217, "step": 220430 }, { "epoch": 2.44, "learning_rate": 9.319728644403106e-06, "loss": 0.6139, "step": 220435 }, { "epoch": 2.44, "learning_rate": 9.318805917264594e-06, "loss": 0.5788, "step": 220440 }, { "epoch": 2.44, "learning_rate": 9.317883190126082e-06, "loss": 0.5512, "step": 220445 }, { "epoch": 2.44, "learning_rate": 9.31696046298757e-06, "loss": 0.5673, "step": 220450 }, { "epoch": 2.44, "learning_rate": 9.316037735849056e-06, "loss": 0.5673, "step": 220455 }, { "epoch": 2.44, "learning_rate": 9.315115008710544e-06, "loss": 0.5625, "step": 220460 }, { "epoch": 2.44, "learning_rate": 9.314192281572032e-06, "loss": 0.5892, "step": 220465 }, { "epoch": 2.44, "learning_rate": 9.31326955443352e-06, "loss": 0.5774, "step": 220470 }, { "epoch": 2.44, "learning_rate": 9.312346827295008e-06, "loss": 0.616, "step": 220475 }, { "epoch": 2.44, "learning_rate": 9.311424100156495e-06, "loss": 0.6116, "step": 220480 }, { "epoch": 2.44, "learning_rate": 9.310501373017983e-06, "loss": 0.5551, "step": 220485 }, { "epoch": 2.44, "learning_rate": 9.30957864587947e-06, "loss": 0.5934, "step": 220490 }, { "epoch": 2.44, "learning_rate": 9.308655918740957e-06, "loss": 0.5534, "step": 220495 }, { "epoch": 2.44, "learning_rate": 9.307733191602445e-06, "loss": 0.5578, "step": 220500 }, { "epoch": 2.44, "learning_rate": 9.306810464463933e-06, "loss": 0.6902, "step": 220505 }, { "epoch": 2.44, "learning_rate": 9.30588773732542e-06, "loss": 0.5899, "step": 220510 }, { "epoch": 2.44, "learning_rate": 9.304965010186909e-06, "loss": 0.5635, "step": 220515 }, { "epoch": 2.44, "learning_rate": 9.304042283048396e-06, "loss": 0.5759, "step": 220520 }, { "epoch": 2.44, "learning_rate": 9.303119555909884e-06, "loss": 0.5524, "step": 220525 }, { "epoch": 2.44, "learning_rate": 9.302196828771372e-06, "loss": 0.5604, "step": 220530 }, { "epoch": 2.44, "learning_rate": 9.301274101632858e-06, "loss": 0.6388, "step": 220535 }, { "epoch": 2.44, "learning_rate": 9.300351374494346e-06, "loss": 0.5938, "step": 220540 }, { "epoch": 2.44, "learning_rate": 9.299428647355832e-06, "loss": 0.5894, "step": 220545 }, { "epoch": 2.44, "learning_rate": 9.29850592021732e-06, "loss": 0.6207, "step": 220550 }, { "epoch": 2.44, "learning_rate": 9.297583193078808e-06, "loss": 0.5908, "step": 220555 }, { "epoch": 2.44, "learning_rate": 9.296660465940296e-06, "loss": 0.5628, "step": 220560 }, { "epoch": 2.44, "learning_rate": 9.295737738801784e-06, "loss": 0.5725, "step": 220565 }, { "epoch": 2.44, "learning_rate": 9.294815011663272e-06, "loss": 0.5489, "step": 220570 }, { "epoch": 2.44, "learning_rate": 9.29389228452476e-06, "loss": 0.5829, "step": 220575 }, { "epoch": 2.44, "learning_rate": 9.292969557386247e-06, "loss": 0.5809, "step": 220580 }, { "epoch": 2.44, "learning_rate": 9.292046830247735e-06, "loss": 0.5556, "step": 220585 }, { "epoch": 2.44, "learning_rate": 9.291124103109223e-06, "loss": 0.6165, "step": 220590 }, { "epoch": 2.44, "learning_rate": 9.290201375970709e-06, "loss": 0.6161, "step": 220595 }, { "epoch": 2.44, "learning_rate": 9.289278648832197e-06, "loss": 0.5835, "step": 220600 }, { "epoch": 2.44, "learning_rate": 9.288355921693685e-06, "loss": 0.6188, "step": 220605 }, { "epoch": 2.44, "learning_rate": 9.287433194555171e-06, "loss": 0.5895, "step": 220610 }, { "epoch": 2.44, "learning_rate": 9.286510467416659e-06, "loss": 0.6187, "step": 220615 }, { "epoch": 2.44, "learning_rate": 9.285587740278147e-06, "loss": 0.6045, "step": 220620 }, { "epoch": 2.44, "learning_rate": 9.284665013139634e-06, "loss": 0.5848, "step": 220625 }, { "epoch": 2.44, "learning_rate": 9.283742286001122e-06, "loss": 0.5956, "step": 220630 }, { "epoch": 2.44, "learning_rate": 9.28281955886261e-06, "loss": 0.5548, "step": 220635 }, { "epoch": 2.44, "learning_rate": 9.281896831724098e-06, "loss": 0.5879, "step": 220640 }, { "epoch": 2.44, "learning_rate": 9.280974104585586e-06, "loss": 0.5483, "step": 220645 }, { "epoch": 2.44, "learning_rate": 9.280051377447074e-06, "loss": 0.6007, "step": 220650 }, { "epoch": 2.44, "learning_rate": 9.27912865030856e-06, "loss": 0.6251, "step": 220655 }, { "epoch": 2.44, "learning_rate": 9.278205923170048e-06, "loss": 0.5793, "step": 220660 }, { "epoch": 2.44, "learning_rate": 9.277283196031536e-06, "loss": 0.5792, "step": 220665 }, { "epoch": 2.44, "learning_rate": 9.276360468893023e-06, "loss": 0.5642, "step": 220670 }, { "epoch": 2.44, "learning_rate": 9.275437741754511e-06, "loss": 0.5847, "step": 220675 }, { "epoch": 2.44, "learning_rate": 9.274515014615999e-06, "loss": 0.5143, "step": 220680 }, { "epoch": 2.44, "learning_rate": 9.273592287477485e-06, "loss": 0.583, "step": 220685 }, { "epoch": 2.44, "learning_rate": 9.272669560338973e-06, "loss": 0.581, "step": 220690 }, { "epoch": 2.44, "learning_rate": 9.271746833200461e-06, "loss": 0.6218, "step": 220695 }, { "epoch": 2.44, "learning_rate": 9.270824106061949e-06, "loss": 0.558, "step": 220700 }, { "epoch": 2.44, "learning_rate": 9.269901378923437e-06, "loss": 0.6039, "step": 220705 }, { "epoch": 2.44, "learning_rate": 9.268978651784923e-06, "loss": 0.5827, "step": 220710 }, { "epoch": 2.44, "learning_rate": 9.26805592464641e-06, "loss": 0.6027, "step": 220715 }, { "epoch": 2.44, "learning_rate": 9.267133197507899e-06, "loss": 0.6078, "step": 220720 }, { "epoch": 2.44, "learning_rate": 9.266210470369386e-06, "loss": 0.617, "step": 220725 }, { "epoch": 2.44, "learning_rate": 9.265287743230874e-06, "loss": 0.6151, "step": 220730 }, { "epoch": 2.44, "learning_rate": 9.264365016092362e-06, "loss": 0.5792, "step": 220735 }, { "epoch": 2.44, "learning_rate": 9.26344228895385e-06, "loss": 0.642, "step": 220740 }, { "epoch": 2.44, "learning_rate": 9.262519561815338e-06, "loss": 0.5542, "step": 220745 }, { "epoch": 2.44, "learning_rate": 9.261596834676826e-06, "loss": 0.5325, "step": 220750 }, { "epoch": 2.44, "learning_rate": 9.260674107538313e-06, "loss": 0.5841, "step": 220755 }, { "epoch": 2.44, "learning_rate": 9.2597513803998e-06, "loss": 0.5489, "step": 220760 }, { "epoch": 2.44, "learning_rate": 9.258828653261287e-06, "loss": 0.5705, "step": 220765 }, { "epoch": 2.44, "learning_rate": 9.257905926122774e-06, "loss": 0.5249, "step": 220770 }, { "epoch": 2.44, "learning_rate": 9.256983198984261e-06, "loss": 0.6005, "step": 220775 }, { "epoch": 2.44, "learning_rate": 9.25606047184575e-06, "loss": 0.5294, "step": 220780 }, { "epoch": 2.44, "learning_rate": 9.255137744707237e-06, "loss": 0.6013, "step": 220785 }, { "epoch": 2.44, "learning_rate": 9.254215017568725e-06, "loss": 0.5747, "step": 220790 }, { "epoch": 2.44, "learning_rate": 9.253292290430213e-06, "loss": 0.5998, "step": 220795 }, { "epoch": 2.44, "learning_rate": 9.2523695632917e-06, "loss": 0.5709, "step": 220800 }, { "epoch": 2.44, "learning_rate": 9.251446836153189e-06, "loss": 0.6097, "step": 220805 }, { "epoch": 2.44, "learning_rate": 9.250524109014676e-06, "loss": 0.5902, "step": 220810 }, { "epoch": 2.45, "learning_rate": 9.249601381876164e-06, "loss": 0.5958, "step": 220815 }, { "epoch": 2.45, "learning_rate": 9.24867865473765e-06, "loss": 0.6378, "step": 220820 }, { "epoch": 2.45, "learning_rate": 9.247755927599138e-06, "loss": 0.6139, "step": 220825 }, { "epoch": 2.45, "learning_rate": 9.246833200460626e-06, "loss": 0.5957, "step": 220830 }, { "epoch": 2.45, "learning_rate": 9.245910473322112e-06, "loss": 0.575, "step": 220835 }, { "epoch": 2.45, "learning_rate": 9.2449877461836e-06, "loss": 0.5768, "step": 220840 }, { "epoch": 2.45, "learning_rate": 9.244065019045088e-06, "loss": 0.594, "step": 220845 }, { "epoch": 2.45, "learning_rate": 9.243142291906576e-06, "loss": 0.613, "step": 220850 }, { "epoch": 2.45, "learning_rate": 9.242219564768064e-06, "loss": 0.643, "step": 220855 }, { "epoch": 2.45, "learning_rate": 9.241296837629552e-06, "loss": 0.5448, "step": 220860 }, { "epoch": 2.45, "learning_rate": 9.24037411049104e-06, "loss": 0.5731, "step": 220865 }, { "epoch": 2.45, "learning_rate": 9.239451383352527e-06, "loss": 0.6111, "step": 220870 }, { "epoch": 2.45, "learning_rate": 9.238528656214013e-06, "loss": 0.5688, "step": 220875 }, { "epoch": 2.45, "learning_rate": 9.237605929075501e-06, "loss": 0.5788, "step": 220880 }, { "epoch": 2.45, "learning_rate": 9.236683201936989e-06, "loss": 0.5427, "step": 220885 }, { "epoch": 2.45, "learning_rate": 9.235760474798477e-06, "loss": 0.5878, "step": 220890 }, { "epoch": 2.45, "learning_rate": 9.234837747659965e-06, "loss": 0.6037, "step": 220895 }, { "epoch": 2.45, "learning_rate": 9.233915020521453e-06, "loss": 0.6302, "step": 220900 }, { "epoch": 2.45, "learning_rate": 9.23299229338294e-06, "loss": 0.5291, "step": 220905 }, { "epoch": 2.45, "learning_rate": 9.232069566244427e-06, "loss": 0.5658, "step": 220910 }, { "epoch": 2.45, "learning_rate": 9.231146839105914e-06, "loss": 0.6066, "step": 220915 }, { "epoch": 2.45, "learning_rate": 9.230224111967402e-06, "loss": 0.6043, "step": 220920 }, { "epoch": 2.45, "learning_rate": 9.22930138482889e-06, "loss": 0.5753, "step": 220925 }, { "epoch": 2.45, "learning_rate": 9.228378657690378e-06, "loss": 0.557, "step": 220930 }, { "epoch": 2.45, "learning_rate": 9.227455930551864e-06, "loss": 0.6245, "step": 220935 }, { "epoch": 2.45, "learning_rate": 9.226533203413352e-06, "loss": 0.577, "step": 220940 }, { "epoch": 2.45, "learning_rate": 9.22561047627484e-06, "loss": 0.6309, "step": 220945 }, { "epoch": 2.45, "learning_rate": 9.224687749136328e-06, "loss": 0.6416, "step": 220950 }, { "epoch": 2.45, "learning_rate": 9.223765021997816e-06, "loss": 0.572, "step": 220955 }, { "epoch": 2.45, "learning_rate": 9.222842294859303e-06, "loss": 0.6131, "step": 220960 }, { "epoch": 2.45, "learning_rate": 9.221919567720791e-06, "loss": 0.5795, "step": 220965 }, { "epoch": 2.45, "learning_rate": 9.220996840582279e-06, "loss": 0.5952, "step": 220970 }, { "epoch": 2.45, "learning_rate": 9.220074113443767e-06, "loss": 0.5536, "step": 220975 }, { "epoch": 2.45, "learning_rate": 9.219151386305253e-06, "loss": 0.6423, "step": 220980 }, { "epoch": 2.45, "learning_rate": 9.218228659166741e-06, "loss": 0.5683, "step": 220985 }, { "epoch": 2.45, "learning_rate": 9.217305932028227e-06, "loss": 0.5552, "step": 220990 }, { "epoch": 2.45, "learning_rate": 9.216383204889715e-06, "loss": 0.585, "step": 220995 }, { "epoch": 2.45, "learning_rate": 9.215460477751203e-06, "loss": 0.5783, "step": 221000 }, { "epoch": 2.45, "eval_loss": 0.5755901336669922, "eval_runtime": 70.1881, "eval_samples_per_second": 28.495, "eval_steps_per_second": 14.247, "step": 221000 }, { "epoch": 2.45, "learning_rate": 9.21453775061269e-06, "loss": 0.548, "step": 221005 }, { "epoch": 2.45, "learning_rate": 9.213615023474179e-06, "loss": 0.5899, "step": 221010 }, { "epoch": 2.45, "learning_rate": 9.212692296335666e-06, "loss": 0.5358, "step": 221015 }, { "epoch": 2.45, "learning_rate": 9.211769569197154e-06, "loss": 0.5686, "step": 221020 }, { "epoch": 2.45, "learning_rate": 9.210846842058642e-06, "loss": 0.5622, "step": 221025 }, { "epoch": 2.45, "learning_rate": 9.20992411492013e-06, "loss": 0.5866, "step": 221030 }, { "epoch": 2.45, "learning_rate": 9.209001387781618e-06, "loss": 0.5645, "step": 221035 }, { "epoch": 2.45, "learning_rate": 9.208078660643104e-06, "loss": 0.6148, "step": 221040 }, { "epoch": 2.45, "learning_rate": 9.207155933504592e-06, "loss": 0.5771, "step": 221045 }, { "epoch": 2.45, "learning_rate": 9.20623320636608e-06, "loss": 0.5881, "step": 221050 }, { "epoch": 2.45, "learning_rate": 9.205310479227567e-06, "loss": 0.5681, "step": 221055 }, { "epoch": 2.45, "learning_rate": 9.204387752089055e-06, "loss": 0.5363, "step": 221060 }, { "epoch": 2.45, "learning_rate": 9.203465024950541e-06, "loss": 0.6302, "step": 221065 }, { "epoch": 2.45, "learning_rate": 9.20254229781203e-06, "loss": 0.5848, "step": 221070 }, { "epoch": 2.45, "learning_rate": 9.201619570673517e-06, "loss": 0.5696, "step": 221075 }, { "epoch": 2.45, "learning_rate": 9.200696843535005e-06, "loss": 0.5877, "step": 221080 }, { "epoch": 2.45, "learning_rate": 9.199774116396493e-06, "loss": 0.6082, "step": 221085 }, { "epoch": 2.45, "learning_rate": 9.19885138925798e-06, "loss": 0.5987, "step": 221090 }, { "epoch": 2.45, "learning_rate": 9.197928662119467e-06, "loss": 0.5716, "step": 221095 }, { "epoch": 2.45, "learning_rate": 9.197005934980955e-06, "loss": 0.6082, "step": 221100 }, { "epoch": 2.45, "learning_rate": 9.196083207842443e-06, "loss": 0.5816, "step": 221105 }, { "epoch": 2.45, "learning_rate": 9.19516048070393e-06, "loss": 0.6314, "step": 221110 }, { "epoch": 2.45, "learning_rate": 9.194237753565418e-06, "loss": 0.5919, "step": 221115 }, { "epoch": 2.45, "learning_rate": 9.193315026426906e-06, "loss": 0.5662, "step": 221120 }, { "epoch": 2.45, "learning_rate": 9.192392299288394e-06, "loss": 0.6391, "step": 221125 }, { "epoch": 2.45, "learning_rate": 9.191469572149882e-06, "loss": 0.6031, "step": 221130 }, { "epoch": 2.45, "learning_rate": 9.19054684501137e-06, "loss": 0.5827, "step": 221135 }, { "epoch": 2.45, "learning_rate": 9.189624117872856e-06, "loss": 0.627, "step": 221140 }, { "epoch": 2.45, "learning_rate": 9.188701390734344e-06, "loss": 0.5933, "step": 221145 }, { "epoch": 2.45, "learning_rate": 9.187778663595831e-06, "loss": 0.6262, "step": 221150 }, { "epoch": 2.45, "learning_rate": 9.186855936457318e-06, "loss": 0.6473, "step": 221155 }, { "epoch": 2.45, "learning_rate": 9.185933209318806e-06, "loss": 0.6529, "step": 221160 }, { "epoch": 2.45, "learning_rate": 9.185010482180293e-06, "loss": 0.5492, "step": 221165 }, { "epoch": 2.45, "learning_rate": 9.184087755041781e-06, "loss": 0.5765, "step": 221170 }, { "epoch": 2.45, "learning_rate": 9.183165027903269e-06, "loss": 0.6166, "step": 221175 }, { "epoch": 2.45, "learning_rate": 9.182242300764757e-06, "loss": 0.5883, "step": 221180 }, { "epoch": 2.45, "learning_rate": 9.181319573626245e-06, "loss": 0.5722, "step": 221185 }, { "epoch": 2.45, "learning_rate": 9.180396846487733e-06, "loss": 0.6225, "step": 221190 }, { "epoch": 2.45, "learning_rate": 9.17947411934922e-06, "loss": 0.5775, "step": 221195 }, { "epoch": 2.45, "learning_rate": 9.178551392210708e-06, "loss": 0.5202, "step": 221200 }, { "epoch": 2.45, "learning_rate": 9.177628665072194e-06, "loss": 0.5371, "step": 221205 }, { "epoch": 2.45, "learning_rate": 9.176705937933682e-06, "loss": 0.5805, "step": 221210 }, { "epoch": 2.45, "learning_rate": 9.175783210795168e-06, "loss": 0.5656, "step": 221215 }, { "epoch": 2.45, "learning_rate": 9.174860483656656e-06, "loss": 0.6167, "step": 221220 }, { "epoch": 2.45, "learning_rate": 9.173937756518144e-06, "loss": 0.573, "step": 221225 }, { "epoch": 2.45, "learning_rate": 9.173015029379632e-06, "loss": 0.5894, "step": 221230 }, { "epoch": 2.45, "learning_rate": 9.17209230224112e-06, "loss": 0.5742, "step": 221235 }, { "epoch": 2.45, "learning_rate": 9.171169575102608e-06, "loss": 0.6276, "step": 221240 }, { "epoch": 2.45, "learning_rate": 9.170246847964096e-06, "loss": 0.5514, "step": 221245 }, { "epoch": 2.45, "learning_rate": 9.169324120825583e-06, "loss": 0.5692, "step": 221250 }, { "epoch": 2.45, "learning_rate": 9.168401393687071e-06, "loss": 0.5723, "step": 221255 }, { "epoch": 2.45, "learning_rate": 9.167478666548557e-06, "loss": 0.5862, "step": 221260 }, { "epoch": 2.45, "learning_rate": 9.166555939410045e-06, "loss": 0.5705, "step": 221265 }, { "epoch": 2.45, "learning_rate": 9.165633212271533e-06, "loss": 0.5818, "step": 221270 }, { "epoch": 2.45, "learning_rate": 9.164710485133021e-06, "loss": 0.5624, "step": 221275 }, { "epoch": 2.45, "learning_rate": 9.163787757994509e-06, "loss": 0.5916, "step": 221280 }, { "epoch": 2.45, "learning_rate": 9.162865030855997e-06, "loss": 0.5647, "step": 221285 }, { "epoch": 2.45, "learning_rate": 9.161942303717483e-06, "loss": 0.568, "step": 221290 }, { "epoch": 2.45, "learning_rate": 9.16101957657897e-06, "loss": 0.5929, "step": 221295 }, { "epoch": 2.45, "learning_rate": 9.160096849440458e-06, "loss": 0.5924, "step": 221300 }, { "epoch": 2.45, "learning_rate": 9.159174122301946e-06, "loss": 0.6031, "step": 221305 }, { "epoch": 2.45, "learning_rate": 9.158251395163434e-06, "loss": 0.6202, "step": 221310 }, { "epoch": 2.45, "learning_rate": 9.157328668024922e-06, "loss": 0.6195, "step": 221315 }, { "epoch": 2.45, "learning_rate": 9.156405940886408e-06, "loss": 0.5783, "step": 221320 }, { "epoch": 2.45, "learning_rate": 9.155483213747896e-06, "loss": 0.6127, "step": 221325 }, { "epoch": 2.45, "learning_rate": 9.154560486609384e-06, "loss": 0.5809, "step": 221330 }, { "epoch": 2.45, "learning_rate": 9.153637759470872e-06, "loss": 0.5704, "step": 221335 }, { "epoch": 2.45, "learning_rate": 9.15271503233236e-06, "loss": 0.542, "step": 221340 }, { "epoch": 2.45, "learning_rate": 9.151792305193847e-06, "loss": 0.5678, "step": 221345 }, { "epoch": 2.45, "learning_rate": 9.150869578055335e-06, "loss": 0.5967, "step": 221350 }, { "epoch": 2.45, "learning_rate": 9.149946850916823e-06, "loss": 0.59, "step": 221355 }, { "epoch": 2.45, "learning_rate": 9.149024123778311e-06, "loss": 0.6215, "step": 221360 }, { "epoch": 2.45, "learning_rate": 9.148101396639799e-06, "loss": 0.5982, "step": 221365 }, { "epoch": 2.45, "learning_rate": 9.147178669501285e-06, "loss": 0.5925, "step": 221370 }, { "epoch": 2.45, "learning_rate": 9.146255942362771e-06, "loss": 0.5584, "step": 221375 }, { "epoch": 2.45, "learning_rate": 9.145333215224259e-06, "loss": 0.5646, "step": 221380 }, { "epoch": 2.45, "learning_rate": 9.144410488085747e-06, "loss": 0.5799, "step": 221385 }, { "epoch": 2.45, "learning_rate": 9.143487760947235e-06, "loss": 0.5918, "step": 221390 }, { "epoch": 2.45, "learning_rate": 9.142565033808723e-06, "loss": 0.5778, "step": 221395 }, { "epoch": 2.45, "learning_rate": 9.14164230667021e-06, "loss": 0.6136, "step": 221400 }, { "epoch": 2.45, "learning_rate": 9.140719579531698e-06, "loss": 0.5838, "step": 221405 }, { "epoch": 2.45, "learning_rate": 9.139796852393186e-06, "loss": 0.5922, "step": 221410 }, { "epoch": 2.45, "learning_rate": 9.138874125254674e-06, "loss": 0.6589, "step": 221415 }, { "epoch": 2.45, "learning_rate": 9.137951398116162e-06, "loss": 0.5749, "step": 221420 }, { "epoch": 2.45, "learning_rate": 9.137028670977648e-06, "loss": 0.6085, "step": 221425 }, { "epoch": 2.45, "learning_rate": 9.136105943839136e-06, "loss": 0.5579, "step": 221430 }, { "epoch": 2.45, "learning_rate": 9.135183216700624e-06, "loss": 0.5805, "step": 221435 }, { "epoch": 2.45, "learning_rate": 9.134260489562111e-06, "loss": 0.5712, "step": 221440 }, { "epoch": 2.45, "learning_rate": 9.133337762423598e-06, "loss": 0.598, "step": 221445 }, { "epoch": 2.45, "learning_rate": 9.132415035285085e-06, "loss": 0.5779, "step": 221450 }, { "epoch": 2.45, "learning_rate": 9.131492308146573e-06, "loss": 0.5726, "step": 221455 }, { "epoch": 2.45, "learning_rate": 9.130569581008061e-06, "loss": 0.6195, "step": 221460 }, { "epoch": 2.45, "learning_rate": 9.129646853869549e-06, "loss": 0.5993, "step": 221465 }, { "epoch": 2.45, "learning_rate": 9.128724126731037e-06, "loss": 0.5866, "step": 221470 }, { "epoch": 2.45, "learning_rate": 9.127801399592525e-06, "loss": 0.6015, "step": 221475 }, { "epoch": 2.45, "learning_rate": 9.126878672454013e-06, "loss": 0.5762, "step": 221480 }, { "epoch": 2.45, "learning_rate": 9.125955945315499e-06, "loss": 0.602, "step": 221485 }, { "epoch": 2.45, "learning_rate": 9.125033218176987e-06, "loss": 0.5831, "step": 221490 }, { "epoch": 2.45, "learning_rate": 9.124110491038474e-06, "loss": 0.5779, "step": 221495 }, { "epoch": 2.45, "learning_rate": 9.123187763899962e-06, "loss": 0.6002, "step": 221500 }, { "epoch": 2.45, "learning_rate": 9.12226503676145e-06, "loss": 0.6086, "step": 221505 }, { "epoch": 2.45, "learning_rate": 9.121342309622938e-06, "loss": 0.5952, "step": 221510 }, { "epoch": 2.45, "learning_rate": 9.120419582484426e-06, "loss": 0.6036, "step": 221515 }, { "epoch": 2.45, "learning_rate": 9.119496855345912e-06, "loss": 0.6101, "step": 221520 }, { "epoch": 2.45, "learning_rate": 9.1185741282074e-06, "loss": 0.6008, "step": 221525 }, { "epoch": 2.45, "learning_rate": 9.117651401068888e-06, "loss": 0.5697, "step": 221530 }, { "epoch": 2.45, "learning_rate": 9.116728673930376e-06, "loss": 0.6031, "step": 221535 }, { "epoch": 2.45, "learning_rate": 9.115805946791862e-06, "loss": 0.571, "step": 221540 }, { "epoch": 2.45, "learning_rate": 9.11488321965335e-06, "loss": 0.6071, "step": 221545 }, { "epoch": 2.45, "learning_rate": 9.113960492514837e-06, "loss": 0.6118, "step": 221550 }, { "epoch": 2.45, "learning_rate": 9.113037765376325e-06, "loss": 0.5823, "step": 221555 }, { "epoch": 2.45, "learning_rate": 9.112115038237813e-06, "loss": 0.5875, "step": 221560 }, { "epoch": 2.45, "learning_rate": 9.111192311099301e-06, "loss": 0.6075, "step": 221565 }, { "epoch": 2.45, "learning_rate": 9.110269583960789e-06, "loss": 0.5677, "step": 221570 }, { "epoch": 2.45, "learning_rate": 9.109346856822277e-06, "loss": 0.5783, "step": 221575 }, { "epoch": 2.45, "learning_rate": 9.108424129683764e-06, "loss": 0.6067, "step": 221580 }, { "epoch": 2.45, "learning_rate": 9.107501402545252e-06, "loss": 0.5667, "step": 221585 }, { "epoch": 2.45, "learning_rate": 9.106578675406738e-06, "loss": 0.5598, "step": 221590 }, { "epoch": 2.45, "learning_rate": 9.105655948268226e-06, "loss": 0.6153, "step": 221595 }, { "epoch": 2.45, "learning_rate": 9.104733221129712e-06, "loss": 0.6273, "step": 221600 }, { "epoch": 2.45, "learning_rate": 9.1038104939912e-06, "loss": 0.5428, "step": 221605 }, { "epoch": 2.45, "learning_rate": 9.102887766852688e-06, "loss": 0.6054, "step": 221610 }, { "epoch": 2.45, "learning_rate": 9.101965039714176e-06, "loss": 0.5659, "step": 221615 }, { "epoch": 2.45, "learning_rate": 9.101042312575664e-06, "loss": 0.5538, "step": 221620 }, { "epoch": 2.45, "learning_rate": 9.100119585437152e-06, "loss": 0.5704, "step": 221625 }, { "epoch": 2.45, "learning_rate": 9.09919685829864e-06, "loss": 0.5555, "step": 221630 }, { "epoch": 2.45, "learning_rate": 9.098274131160127e-06, "loss": 0.6128, "step": 221635 }, { "epoch": 2.45, "learning_rate": 9.097351404021615e-06, "loss": 0.5629, "step": 221640 }, { "epoch": 2.45, "learning_rate": 9.096428676883101e-06, "loss": 0.5295, "step": 221645 }, { "epoch": 2.45, "learning_rate": 9.09550594974459e-06, "loss": 0.5104, "step": 221650 }, { "epoch": 2.45, "learning_rate": 9.094583222606077e-06, "loss": 0.5822, "step": 221655 }, { "epoch": 2.45, "learning_rate": 9.093660495467565e-06, "loss": 0.5717, "step": 221660 }, { "epoch": 2.45, "learning_rate": 9.092737768329053e-06, "loss": 0.6183, "step": 221665 }, { "epoch": 2.45, "learning_rate": 9.091815041190539e-06, "loss": 0.5785, "step": 221670 }, { "epoch": 2.45, "learning_rate": 9.090892314052027e-06, "loss": 0.59, "step": 221675 }, { "epoch": 2.45, "learning_rate": 9.089969586913515e-06, "loss": 0.574, "step": 221680 }, { "epoch": 2.45, "learning_rate": 9.089046859775003e-06, "loss": 0.5289, "step": 221685 }, { "epoch": 2.45, "learning_rate": 9.08812413263649e-06, "loss": 0.5939, "step": 221690 }, { "epoch": 2.45, "learning_rate": 9.087201405497978e-06, "loss": 0.5532, "step": 221695 }, { "epoch": 2.45, "learning_rate": 9.086278678359466e-06, "loss": 0.6421, "step": 221700 }, { "epoch": 2.45, "learning_rate": 9.085355951220952e-06, "loss": 0.5716, "step": 221705 }, { "epoch": 2.45, "learning_rate": 9.08443322408244e-06, "loss": 0.5897, "step": 221710 }, { "epoch": 2.45, "learning_rate": 9.083510496943928e-06, "loss": 0.5753, "step": 221715 }, { "epoch": 2.46, "learning_rate": 9.082587769805416e-06, "loss": 0.5475, "step": 221720 }, { "epoch": 2.46, "learning_rate": 9.081665042666904e-06, "loss": 0.604, "step": 221725 }, { "epoch": 2.46, "learning_rate": 9.080742315528391e-06, "loss": 0.5933, "step": 221730 }, { "epoch": 2.46, "learning_rate": 9.07981958838988e-06, "loss": 0.6227, "step": 221735 }, { "epoch": 2.46, "learning_rate": 9.078896861251367e-06, "loss": 0.5985, "step": 221740 }, { "epoch": 2.46, "learning_rate": 9.077974134112853e-06, "loss": 0.6175, "step": 221745 }, { "epoch": 2.46, "learning_rate": 9.077051406974341e-06, "loss": 0.5889, "step": 221750 }, { "epoch": 2.46, "learning_rate": 9.076128679835829e-06, "loss": 0.6303, "step": 221755 }, { "epoch": 2.46, "learning_rate": 9.075205952697315e-06, "loss": 0.6143, "step": 221760 }, { "epoch": 2.46, "learning_rate": 9.074283225558803e-06, "loss": 0.6188, "step": 221765 }, { "epoch": 2.46, "learning_rate": 9.073360498420291e-06, "loss": 0.5807, "step": 221770 }, { "epoch": 2.46, "learning_rate": 9.072437771281779e-06, "loss": 0.6176, "step": 221775 }, { "epoch": 2.46, "learning_rate": 9.071515044143267e-06, "loss": 0.6164, "step": 221780 }, { "epoch": 2.46, "learning_rate": 9.070592317004754e-06, "loss": 0.5818, "step": 221785 }, { "epoch": 2.46, "learning_rate": 9.069669589866242e-06, "loss": 0.6136, "step": 221790 }, { "epoch": 2.46, "learning_rate": 9.06874686272773e-06, "loss": 0.5867, "step": 221795 }, { "epoch": 2.46, "learning_rate": 9.067824135589218e-06, "loss": 0.6031, "step": 221800 }, { "epoch": 2.46, "learning_rate": 9.066901408450706e-06, "loss": 0.5381, "step": 221805 }, { "epoch": 2.46, "learning_rate": 9.065978681312192e-06, "loss": 0.5678, "step": 221810 }, { "epoch": 2.46, "learning_rate": 9.06505595417368e-06, "loss": 0.5561, "step": 221815 }, { "epoch": 2.46, "learning_rate": 9.064133227035168e-06, "loss": 0.5738, "step": 221820 }, { "epoch": 2.46, "learning_rate": 9.063210499896654e-06, "loss": 0.6439, "step": 221825 }, { "epoch": 2.46, "learning_rate": 9.062287772758142e-06, "loss": 0.6331, "step": 221830 }, { "epoch": 2.46, "learning_rate": 9.06136504561963e-06, "loss": 0.588, "step": 221835 }, { "epoch": 2.46, "learning_rate": 9.060442318481117e-06, "loss": 0.621, "step": 221840 }, { "epoch": 2.46, "learning_rate": 9.059519591342605e-06, "loss": 0.6558, "step": 221845 }, { "epoch": 2.46, "learning_rate": 9.058596864204093e-06, "loss": 0.6228, "step": 221850 }, { "epoch": 2.46, "learning_rate": 9.057674137065581e-06, "loss": 0.6015, "step": 221855 }, { "epoch": 2.46, "learning_rate": 9.056751409927069e-06, "loss": 0.6372, "step": 221860 }, { "epoch": 2.46, "learning_rate": 9.055828682788557e-06, "loss": 0.5476, "step": 221865 }, { "epoch": 2.46, "learning_rate": 9.054905955650043e-06, "loss": 0.6277, "step": 221870 }, { "epoch": 2.46, "learning_rate": 9.05398322851153e-06, "loss": 0.5957, "step": 221875 }, { "epoch": 2.46, "learning_rate": 9.053060501373018e-06, "loss": 0.5489, "step": 221880 }, { "epoch": 2.46, "learning_rate": 9.052137774234506e-06, "loss": 0.631, "step": 221885 }, { "epoch": 2.46, "learning_rate": 9.051215047095994e-06, "loss": 0.6019, "step": 221890 }, { "epoch": 2.46, "learning_rate": 9.050292319957482e-06, "loss": 0.6374, "step": 221895 }, { "epoch": 2.46, "learning_rate": 9.049369592818968e-06, "loss": 0.5996, "step": 221900 }, { "epoch": 2.46, "learning_rate": 9.048446865680456e-06, "loss": 0.6048, "step": 221905 }, { "epoch": 2.46, "learning_rate": 9.047524138541944e-06, "loss": 0.606, "step": 221910 }, { "epoch": 2.46, "learning_rate": 9.046601411403432e-06, "loss": 0.5967, "step": 221915 }, { "epoch": 2.46, "learning_rate": 9.04567868426492e-06, "loss": 0.5801, "step": 221920 }, { "epoch": 2.46, "learning_rate": 9.044755957126406e-06, "loss": 0.5127, "step": 221925 }, { "epoch": 2.46, "learning_rate": 9.043833229987894e-06, "loss": 0.579, "step": 221930 }, { "epoch": 2.46, "learning_rate": 9.042910502849381e-06, "loss": 0.5538, "step": 221935 }, { "epoch": 2.46, "learning_rate": 9.04198777571087e-06, "loss": 0.5921, "step": 221940 }, { "epoch": 2.46, "learning_rate": 9.041065048572357e-06, "loss": 0.6388, "step": 221945 }, { "epoch": 2.46, "learning_rate": 9.040142321433845e-06, "loss": 0.5835, "step": 221950 }, { "epoch": 2.46, "learning_rate": 9.039219594295333e-06, "loss": 0.5857, "step": 221955 }, { "epoch": 2.46, "learning_rate": 9.03829686715682e-06, "loss": 0.6187, "step": 221960 }, { "epoch": 2.46, "learning_rate": 9.037374140018308e-06, "loss": 0.5778, "step": 221965 }, { "epoch": 2.46, "learning_rate": 9.036451412879796e-06, "loss": 0.5832, "step": 221970 }, { "epoch": 2.46, "learning_rate": 9.035528685741282e-06, "loss": 0.594, "step": 221975 }, { "epoch": 2.46, "learning_rate": 9.03460595860277e-06, "loss": 0.6166, "step": 221980 }, { "epoch": 2.46, "learning_rate": 9.033683231464256e-06, "loss": 0.6102, "step": 221985 }, { "epoch": 2.46, "learning_rate": 9.032760504325744e-06, "loss": 0.6114, "step": 221990 }, { "epoch": 2.46, "learning_rate": 9.031837777187232e-06, "loss": 0.5104, "step": 221995 }, { "epoch": 2.46, "learning_rate": 9.03091505004872e-06, "loss": 0.61, "step": 222000 }, { "epoch": 2.46, "eval_loss": 0.564653217792511, "eval_runtime": 69.2115, "eval_samples_per_second": 28.897, "eval_steps_per_second": 14.448, "step": 222000 }, { "epoch": 2.46, "learning_rate": 9.029992322910208e-06, "loss": 0.5316, "step": 222005 }, { "epoch": 2.46, "learning_rate": 9.029069595771696e-06, "loss": 0.6118, "step": 222010 }, { "epoch": 2.46, "learning_rate": 9.028146868633184e-06, "loss": 0.5928, "step": 222015 }, { "epoch": 2.46, "learning_rate": 9.027224141494671e-06, "loss": 0.5564, "step": 222020 }, { "epoch": 2.46, "learning_rate": 9.02630141435616e-06, "loss": 0.6219, "step": 222025 }, { "epoch": 2.46, "learning_rate": 9.025378687217647e-06, "loss": 0.6197, "step": 222030 }, { "epoch": 2.46, "learning_rate": 9.024455960079133e-06, "loss": 0.6326, "step": 222035 }, { "epoch": 2.46, "learning_rate": 9.023533232940621e-06, "loss": 0.601, "step": 222040 }, { "epoch": 2.46, "learning_rate": 9.022610505802109e-06, "loss": 0.5339, "step": 222045 }, { "epoch": 2.46, "learning_rate": 9.021687778663595e-06, "loss": 0.5894, "step": 222050 }, { "epoch": 2.46, "learning_rate": 9.020765051525083e-06, "loss": 0.5734, "step": 222055 }, { "epoch": 2.46, "learning_rate": 9.01984232438657e-06, "loss": 0.5692, "step": 222060 }, { "epoch": 2.46, "learning_rate": 9.018919597248059e-06, "loss": 0.6633, "step": 222065 }, { "epoch": 2.46, "learning_rate": 9.017996870109547e-06, "loss": 0.5899, "step": 222070 }, { "epoch": 2.46, "learning_rate": 9.017074142971034e-06, "loss": 0.5802, "step": 222075 }, { "epoch": 2.46, "learning_rate": 9.016151415832522e-06, "loss": 0.5745, "step": 222080 }, { "epoch": 2.46, "learning_rate": 9.01522868869401e-06, "loss": 0.5668, "step": 222085 }, { "epoch": 2.46, "learning_rate": 9.014305961555496e-06, "loss": 0.5862, "step": 222090 }, { "epoch": 2.46, "learning_rate": 9.013383234416984e-06, "loss": 0.5759, "step": 222095 }, { "epoch": 2.46, "learning_rate": 9.012460507278472e-06, "loss": 0.614, "step": 222100 }, { "epoch": 2.46, "learning_rate": 9.01153778013996e-06, "loss": 0.6218, "step": 222105 }, { "epoch": 2.46, "learning_rate": 9.010615053001448e-06, "loss": 0.562, "step": 222110 }, { "epoch": 2.46, "learning_rate": 9.009692325862935e-06, "loss": 0.5731, "step": 222115 }, { "epoch": 2.46, "learning_rate": 9.008769598724423e-06, "loss": 0.5483, "step": 222120 }, { "epoch": 2.46, "learning_rate": 9.00784687158591e-06, "loss": 0.5994, "step": 222125 }, { "epoch": 2.46, "learning_rate": 9.006924144447397e-06, "loss": 0.5422, "step": 222130 }, { "epoch": 2.46, "learning_rate": 9.006001417308885e-06, "loss": 0.6293, "step": 222135 }, { "epoch": 2.46, "learning_rate": 9.005078690170373e-06, "loss": 0.6302, "step": 222140 }, { "epoch": 2.46, "learning_rate": 9.004155963031861e-06, "loss": 0.6095, "step": 222145 }, { "epoch": 2.46, "learning_rate": 9.003233235893347e-06, "loss": 0.6131, "step": 222150 }, { "epoch": 2.46, "learning_rate": 9.002310508754835e-06, "loss": 0.5925, "step": 222155 }, { "epoch": 2.46, "learning_rate": 9.001387781616323e-06, "loss": 0.5851, "step": 222160 }, { "epoch": 2.46, "learning_rate": 9.00046505447781e-06, "loss": 0.5395, "step": 222165 }, { "epoch": 2.46, "learning_rate": 8.999542327339298e-06, "loss": 0.5954, "step": 222170 }, { "epoch": 2.46, "learning_rate": 8.998619600200786e-06, "loss": 0.5733, "step": 222175 }, { "epoch": 2.46, "learning_rate": 8.997696873062274e-06, "loss": 0.6045, "step": 222180 }, { "epoch": 2.46, "learning_rate": 8.996774145923762e-06, "loss": 0.5877, "step": 222185 }, { "epoch": 2.46, "learning_rate": 8.99585141878525e-06, "loss": 0.5994, "step": 222190 }, { "epoch": 2.46, "learning_rate": 8.994928691646736e-06, "loss": 0.5781, "step": 222195 }, { "epoch": 2.46, "learning_rate": 8.994005964508224e-06, "loss": 0.5867, "step": 222200 }, { "epoch": 2.46, "learning_rate": 8.99308323736971e-06, "loss": 0.5713, "step": 222205 }, { "epoch": 2.46, "learning_rate": 8.992160510231198e-06, "loss": 0.5945, "step": 222210 }, { "epoch": 2.46, "learning_rate": 8.991237783092686e-06, "loss": 0.6399, "step": 222215 }, { "epoch": 2.46, "learning_rate": 8.990315055954174e-06, "loss": 0.6014, "step": 222220 }, { "epoch": 2.46, "learning_rate": 8.989392328815661e-06, "loss": 0.575, "step": 222225 }, { "epoch": 2.46, "learning_rate": 8.98846960167715e-06, "loss": 0.5587, "step": 222230 }, { "epoch": 2.46, "learning_rate": 8.987546874538637e-06, "loss": 0.578, "step": 222235 }, { "epoch": 2.46, "learning_rate": 8.986624147400125e-06, "loss": 0.5593, "step": 222240 }, { "epoch": 2.46, "learning_rate": 8.985701420261613e-06, "loss": 0.5863, "step": 222245 }, { "epoch": 2.46, "learning_rate": 8.9847786931231e-06, "loss": 0.6004, "step": 222250 }, { "epoch": 2.46, "learning_rate": 8.983855965984587e-06, "loss": 0.5819, "step": 222255 }, { "epoch": 2.46, "learning_rate": 8.982933238846075e-06, "loss": 0.5596, "step": 222260 }, { "epoch": 2.46, "learning_rate": 8.982010511707562e-06, "loss": 0.5413, "step": 222265 }, { "epoch": 2.46, "learning_rate": 8.98108778456905e-06, "loss": 0.5757, "step": 222270 }, { "epoch": 2.46, "learning_rate": 8.980165057430538e-06, "loss": 0.5649, "step": 222275 }, { "epoch": 2.46, "learning_rate": 8.979242330292024e-06, "loss": 0.5672, "step": 222280 }, { "epoch": 2.46, "learning_rate": 8.978319603153512e-06, "loss": 0.5547, "step": 222285 }, { "epoch": 2.46, "learning_rate": 8.977396876015e-06, "loss": 0.6119, "step": 222290 }, { "epoch": 2.46, "learning_rate": 8.976474148876488e-06, "loss": 0.556, "step": 222295 }, { "epoch": 2.46, "learning_rate": 8.975551421737976e-06, "loss": 0.6288, "step": 222300 }, { "epoch": 2.46, "learning_rate": 8.974628694599464e-06, "loss": 0.5989, "step": 222305 }, { "epoch": 2.46, "learning_rate": 8.97370596746095e-06, "loss": 0.6338, "step": 222310 }, { "epoch": 2.46, "learning_rate": 8.972783240322438e-06, "loss": 0.5986, "step": 222315 }, { "epoch": 2.46, "learning_rate": 8.971860513183925e-06, "loss": 0.6156, "step": 222320 }, { "epoch": 2.46, "learning_rate": 8.970937786045413e-06, "loss": 0.6572, "step": 222325 }, { "epoch": 2.46, "learning_rate": 8.970015058906901e-06, "loss": 0.6317, "step": 222330 }, { "epoch": 2.46, "learning_rate": 8.969092331768389e-06, "loss": 0.5863, "step": 222335 }, { "epoch": 2.46, "learning_rate": 8.968169604629877e-06, "loss": 0.5938, "step": 222340 }, { "epoch": 2.46, "learning_rate": 8.967246877491365e-06, "loss": 0.6219, "step": 222345 }, { "epoch": 2.46, "learning_rate": 8.966324150352852e-06, "loss": 0.5769, "step": 222350 }, { "epoch": 2.46, "learning_rate": 8.965401423214339e-06, "loss": 0.5373, "step": 222355 }, { "epoch": 2.46, "learning_rate": 8.964478696075827e-06, "loss": 0.6296, "step": 222360 }, { "epoch": 2.46, "learning_rate": 8.963555968937314e-06, "loss": 0.6038, "step": 222365 }, { "epoch": 2.46, "learning_rate": 8.9626332417988e-06, "loss": 0.6037, "step": 222370 }, { "epoch": 2.46, "learning_rate": 8.961710514660288e-06, "loss": 0.6095, "step": 222375 }, { "epoch": 2.46, "learning_rate": 8.960787787521776e-06, "loss": 0.5514, "step": 222380 }, { "epoch": 2.46, "learning_rate": 8.959865060383264e-06, "loss": 0.6, "step": 222385 }, { "epoch": 2.46, "learning_rate": 8.958942333244752e-06, "loss": 0.5915, "step": 222390 }, { "epoch": 2.46, "learning_rate": 8.95801960610624e-06, "loss": 0.5761, "step": 222395 }, { "epoch": 2.46, "learning_rate": 8.957096878967728e-06, "loss": 0.5393, "step": 222400 }, { "epoch": 2.46, "learning_rate": 8.956174151829215e-06, "loss": 0.5922, "step": 222405 }, { "epoch": 2.46, "learning_rate": 8.955251424690703e-06, "loss": 0.5679, "step": 222410 }, { "epoch": 2.46, "learning_rate": 8.954328697552191e-06, "loss": 0.5793, "step": 222415 }, { "epoch": 2.46, "learning_rate": 8.953405970413677e-06, "loss": 0.5669, "step": 222420 }, { "epoch": 2.46, "learning_rate": 8.952483243275165e-06, "loss": 0.637, "step": 222425 }, { "epoch": 2.46, "learning_rate": 8.951560516136651e-06, "loss": 0.5497, "step": 222430 }, { "epoch": 2.46, "learning_rate": 8.950637788998139e-06, "loss": 0.6578, "step": 222435 }, { "epoch": 2.46, "learning_rate": 8.949715061859627e-06, "loss": 0.589, "step": 222440 }, { "epoch": 2.46, "learning_rate": 8.948792334721115e-06, "loss": 0.5317, "step": 222445 }, { "epoch": 2.46, "learning_rate": 8.947869607582603e-06, "loss": 0.5349, "step": 222450 }, { "epoch": 2.46, "learning_rate": 8.94694688044409e-06, "loss": 0.6274, "step": 222455 }, { "epoch": 2.46, "learning_rate": 8.946024153305578e-06, "loss": 0.6728, "step": 222460 }, { "epoch": 2.46, "learning_rate": 8.945101426167066e-06, "loss": 0.5386, "step": 222465 }, { "epoch": 2.46, "learning_rate": 8.944178699028554e-06, "loss": 0.5646, "step": 222470 }, { "epoch": 2.46, "learning_rate": 8.94325597189004e-06, "loss": 0.5959, "step": 222475 }, { "epoch": 2.46, "learning_rate": 8.942333244751528e-06, "loss": 0.5592, "step": 222480 }, { "epoch": 2.46, "learning_rate": 8.941410517613016e-06, "loss": 0.5986, "step": 222485 }, { "epoch": 2.46, "learning_rate": 8.940487790474504e-06, "loss": 0.6424, "step": 222490 }, { "epoch": 2.46, "learning_rate": 8.939565063335992e-06, "loss": 0.5954, "step": 222495 }, { "epoch": 2.46, "learning_rate": 8.93864233619748e-06, "loss": 0.5767, "step": 222500 }, { "epoch": 2.46, "learning_rate": 8.937719609058966e-06, "loss": 0.584, "step": 222505 }, { "epoch": 2.46, "learning_rate": 8.936796881920453e-06, "loss": 0.5854, "step": 222510 }, { "epoch": 2.46, "learning_rate": 8.935874154781941e-06, "loss": 0.5775, "step": 222515 }, { "epoch": 2.46, "learning_rate": 8.93495142764343e-06, "loss": 0.5874, "step": 222520 }, { "epoch": 2.46, "learning_rate": 8.934028700504917e-06, "loss": 0.6115, "step": 222525 }, { "epoch": 2.46, "learning_rate": 8.933105973366405e-06, "loss": 0.6033, "step": 222530 }, { "epoch": 2.46, "learning_rate": 8.932183246227891e-06, "loss": 0.608, "step": 222535 }, { "epoch": 2.46, "learning_rate": 8.931260519089379e-06, "loss": 0.6401, "step": 222540 }, { "epoch": 2.46, "learning_rate": 8.930337791950867e-06, "loss": 0.569, "step": 222545 }, { "epoch": 2.46, "learning_rate": 8.929415064812355e-06, "loss": 0.6236, "step": 222550 }, { "epoch": 2.46, "learning_rate": 8.928492337673842e-06, "loss": 0.6414, "step": 222555 }, { "epoch": 2.46, "learning_rate": 8.92756961053533e-06, "loss": 0.6171, "step": 222560 }, { "epoch": 2.46, "learning_rate": 8.926646883396818e-06, "loss": 0.5867, "step": 222565 }, { "epoch": 2.46, "learning_rate": 8.925724156258306e-06, "loss": 0.5969, "step": 222570 }, { "epoch": 2.46, "learning_rate": 8.924801429119794e-06, "loss": 0.5583, "step": 222575 }, { "epoch": 2.46, "learning_rate": 8.92387870198128e-06, "loss": 0.5502, "step": 222580 }, { "epoch": 2.46, "learning_rate": 8.922955974842768e-06, "loss": 0.5359, "step": 222585 }, { "epoch": 2.46, "learning_rate": 8.922033247704254e-06, "loss": 0.5519, "step": 222590 }, { "epoch": 2.46, "learning_rate": 8.921110520565742e-06, "loss": 0.5875, "step": 222595 }, { "epoch": 2.46, "learning_rate": 8.92018779342723e-06, "loss": 0.5868, "step": 222600 }, { "epoch": 2.46, "learning_rate": 8.919265066288718e-06, "loss": 0.5437, "step": 222605 }, { "epoch": 2.46, "learning_rate": 8.918342339150205e-06, "loss": 0.5991, "step": 222610 }, { "epoch": 2.46, "learning_rate": 8.917419612011693e-06, "loss": 0.5794, "step": 222615 }, { "epoch": 2.47, "learning_rate": 8.916496884873181e-06, "loss": 0.5975, "step": 222620 }, { "epoch": 2.47, "learning_rate": 8.915574157734669e-06, "loss": 0.5972, "step": 222625 }, { "epoch": 2.47, "learning_rate": 8.914651430596157e-06, "loss": 0.5639, "step": 222630 }, { "epoch": 2.47, "learning_rate": 8.913728703457645e-06, "loss": 0.5589, "step": 222635 }, { "epoch": 2.47, "learning_rate": 8.91280597631913e-06, "loss": 0.5937, "step": 222640 }, { "epoch": 2.47, "learning_rate": 8.911883249180619e-06, "loss": 0.5691, "step": 222645 }, { "epoch": 2.47, "learning_rate": 8.910960522042106e-06, "loss": 0.5901, "step": 222650 }, { "epoch": 2.47, "learning_rate": 8.910037794903594e-06, "loss": 0.6695, "step": 222655 }, { "epoch": 2.47, "learning_rate": 8.90911506776508e-06, "loss": 0.5801, "step": 222660 }, { "epoch": 2.47, "learning_rate": 8.908192340626568e-06, "loss": 0.6235, "step": 222665 }, { "epoch": 2.47, "learning_rate": 8.907269613488056e-06, "loss": 0.5909, "step": 222670 }, { "epoch": 2.47, "learning_rate": 8.906346886349544e-06, "loss": 0.52, "step": 222675 }, { "epoch": 2.47, "learning_rate": 8.905424159211032e-06, "loss": 0.5851, "step": 222680 }, { "epoch": 2.47, "learning_rate": 8.90450143207252e-06, "loss": 0.5795, "step": 222685 }, { "epoch": 2.47, "learning_rate": 8.903578704934008e-06, "loss": 0.6353, "step": 222690 }, { "epoch": 2.47, "learning_rate": 8.902655977795495e-06, "loss": 0.5898, "step": 222695 }, { "epoch": 2.47, "learning_rate": 8.901733250656982e-06, "loss": 0.5966, "step": 222700 }, { "epoch": 2.47, "learning_rate": 8.90081052351847e-06, "loss": 0.5698, "step": 222705 }, { "epoch": 2.47, "learning_rate": 8.899887796379957e-06, "loss": 0.6027, "step": 222710 }, { "epoch": 2.47, "learning_rate": 8.898965069241445e-06, "loss": 0.5813, "step": 222715 }, { "epoch": 2.47, "learning_rate": 8.898042342102933e-06, "loss": 0.6107, "step": 222720 }, { "epoch": 2.47, "learning_rate": 8.89711961496442e-06, "loss": 0.5857, "step": 222725 }, { "epoch": 2.47, "learning_rate": 8.896196887825909e-06, "loss": 0.609, "step": 222730 }, { "epoch": 2.47, "learning_rate": 8.895274160687395e-06, "loss": 0.6396, "step": 222735 }, { "epoch": 2.47, "learning_rate": 8.894351433548883e-06, "loss": 0.6149, "step": 222740 }, { "epoch": 2.47, "learning_rate": 8.89342870641037e-06, "loss": 0.612, "step": 222745 }, { "epoch": 2.47, "learning_rate": 8.892505979271858e-06, "loss": 0.5808, "step": 222750 }, { "epoch": 2.47, "learning_rate": 8.891583252133345e-06, "loss": 0.6092, "step": 222755 }, { "epoch": 2.47, "learning_rate": 8.890660524994832e-06, "loss": 0.5757, "step": 222760 }, { "epoch": 2.47, "learning_rate": 8.88973779785632e-06, "loss": 0.57, "step": 222765 }, { "epoch": 2.47, "learning_rate": 8.888815070717808e-06, "loss": 0.543, "step": 222770 }, { "epoch": 2.47, "learning_rate": 8.887892343579296e-06, "loss": 0.5959, "step": 222775 }, { "epoch": 2.47, "learning_rate": 8.886969616440784e-06, "loss": 0.5882, "step": 222780 }, { "epoch": 2.47, "learning_rate": 8.886046889302272e-06, "loss": 0.6119, "step": 222785 }, { "epoch": 2.47, "learning_rate": 8.88512416216376e-06, "loss": 0.5566, "step": 222790 }, { "epoch": 2.47, "learning_rate": 8.884201435025247e-06, "loss": 0.57, "step": 222795 }, { "epoch": 2.47, "learning_rate": 8.883278707886735e-06, "loss": 0.5928, "step": 222800 }, { "epoch": 2.47, "learning_rate": 8.882355980748221e-06, "loss": 0.5683, "step": 222805 }, { "epoch": 2.47, "learning_rate": 8.88143325360971e-06, "loss": 0.5531, "step": 222810 }, { "epoch": 2.47, "learning_rate": 8.880510526471195e-06, "loss": 0.623, "step": 222815 }, { "epoch": 2.47, "learning_rate": 8.879587799332683e-06, "loss": 0.5719, "step": 222820 }, { "epoch": 2.47, "learning_rate": 8.878665072194171e-06, "loss": 0.5752, "step": 222825 }, { "epoch": 2.47, "learning_rate": 8.877742345055659e-06, "loss": 0.6201, "step": 222830 }, { "epoch": 2.47, "learning_rate": 8.876819617917147e-06, "loss": 0.5969, "step": 222835 }, { "epoch": 2.47, "learning_rate": 8.875896890778635e-06, "loss": 0.5724, "step": 222840 }, { "epoch": 2.47, "learning_rate": 8.874974163640122e-06, "loss": 0.6155, "step": 222845 }, { "epoch": 2.47, "learning_rate": 8.87405143650161e-06, "loss": 0.5918, "step": 222850 }, { "epoch": 2.47, "learning_rate": 8.873128709363098e-06, "loss": 0.5643, "step": 222855 }, { "epoch": 2.47, "learning_rate": 8.872205982224584e-06, "loss": 0.5712, "step": 222860 }, { "epoch": 2.47, "learning_rate": 8.871283255086072e-06, "loss": 0.6031, "step": 222865 }, { "epoch": 2.47, "learning_rate": 8.87036052794756e-06, "loss": 0.6269, "step": 222870 }, { "epoch": 2.47, "learning_rate": 8.869437800809048e-06, "loss": 0.6159, "step": 222875 }, { "epoch": 2.47, "learning_rate": 8.868515073670536e-06, "loss": 0.5479, "step": 222880 }, { "epoch": 2.47, "learning_rate": 8.867592346532022e-06, "loss": 0.597, "step": 222885 }, { "epoch": 2.47, "learning_rate": 8.86666961939351e-06, "loss": 0.5624, "step": 222890 }, { "epoch": 2.47, "learning_rate": 8.865746892254998e-06, "loss": 0.6659, "step": 222895 }, { "epoch": 2.47, "learning_rate": 8.864824165116485e-06, "loss": 0.5743, "step": 222900 }, { "epoch": 2.47, "learning_rate": 8.863901437977973e-06, "loss": 0.5852, "step": 222905 }, { "epoch": 2.47, "learning_rate": 8.862978710839461e-06, "loss": 0.6181, "step": 222910 }, { "epoch": 2.47, "learning_rate": 8.862055983700949e-06, "loss": 0.5745, "step": 222915 }, { "epoch": 2.47, "learning_rate": 8.861133256562435e-06, "loss": 0.5757, "step": 222920 }, { "epoch": 2.47, "learning_rate": 8.860210529423923e-06, "loss": 0.5624, "step": 222925 }, { "epoch": 2.47, "learning_rate": 8.85928780228541e-06, "loss": 0.6086, "step": 222930 }, { "epoch": 2.47, "learning_rate": 8.858365075146899e-06, "loss": 0.5991, "step": 222935 }, { "epoch": 2.47, "learning_rate": 8.857442348008386e-06, "loss": 0.6335, "step": 222940 }, { "epoch": 2.47, "learning_rate": 8.856519620869874e-06, "loss": 0.5838, "step": 222945 }, { "epoch": 2.47, "learning_rate": 8.855596893731362e-06, "loss": 0.6117, "step": 222950 }, { "epoch": 2.47, "learning_rate": 8.85467416659285e-06, "loss": 0.582, "step": 222955 }, { "epoch": 2.47, "learning_rate": 8.853751439454336e-06, "loss": 0.5779, "step": 222960 }, { "epoch": 2.47, "learning_rate": 8.852828712315824e-06, "loss": 0.6104, "step": 222965 }, { "epoch": 2.47, "learning_rate": 8.851905985177312e-06, "loss": 0.5618, "step": 222970 }, { "epoch": 2.47, "learning_rate": 8.850983258038798e-06, "loss": 0.5571, "step": 222975 }, { "epoch": 2.47, "learning_rate": 8.850060530900286e-06, "loss": 0.6194, "step": 222980 }, { "epoch": 2.47, "learning_rate": 8.849137803761774e-06, "loss": 0.581, "step": 222985 }, { "epoch": 2.47, "learning_rate": 8.848215076623262e-06, "loss": 0.5541, "step": 222990 }, { "epoch": 2.47, "learning_rate": 8.84729234948475e-06, "loss": 0.6123, "step": 222995 }, { "epoch": 2.47, "learning_rate": 8.846369622346237e-06, "loss": 0.5848, "step": 223000 }, { "epoch": 2.47, "eval_loss": 0.553193211555481, "eval_runtime": 69.5694, "eval_samples_per_second": 28.748, "eval_steps_per_second": 14.374, "step": 223000 }, { "epoch": 2.47, "learning_rate": 8.845446895207725e-06, "loss": 0.5854, "step": 223005 }, { "epoch": 2.47, "learning_rate": 8.844524168069213e-06, "loss": 0.5602, "step": 223010 }, { "epoch": 2.47, "learning_rate": 8.8436014409307e-06, "loss": 0.6001, "step": 223015 }, { "epoch": 2.47, "learning_rate": 8.842678713792189e-06, "loss": 0.5374, "step": 223020 }, { "epoch": 2.47, "learning_rate": 8.841755986653675e-06, "loss": 0.5628, "step": 223025 }, { "epoch": 2.47, "learning_rate": 8.840833259515163e-06, "loss": 0.5733, "step": 223030 }, { "epoch": 2.47, "learning_rate": 8.839910532376649e-06, "loss": 0.5981, "step": 223035 }, { "epoch": 2.47, "learning_rate": 8.838987805238137e-06, "loss": 0.5849, "step": 223040 }, { "epoch": 2.47, "learning_rate": 8.838065078099625e-06, "loss": 0.622, "step": 223045 }, { "epoch": 2.47, "learning_rate": 8.837142350961112e-06, "loss": 0.6239, "step": 223050 }, { "epoch": 2.47, "learning_rate": 8.8362196238226e-06, "loss": 0.598, "step": 223055 }, { "epoch": 2.47, "learning_rate": 8.835296896684088e-06, "loss": 0.5835, "step": 223060 }, { "epoch": 2.47, "learning_rate": 8.834374169545576e-06, "loss": 0.595, "step": 223065 }, { "epoch": 2.47, "learning_rate": 8.833451442407064e-06, "loss": 0.598, "step": 223070 }, { "epoch": 2.47, "learning_rate": 8.832528715268552e-06, "loss": 0.6551, "step": 223075 }, { "epoch": 2.47, "learning_rate": 8.83160598813004e-06, "loss": 0.5657, "step": 223080 }, { "epoch": 2.47, "learning_rate": 8.830683260991526e-06, "loss": 0.6032, "step": 223085 }, { "epoch": 2.47, "learning_rate": 8.829760533853013e-06, "loss": 0.5672, "step": 223090 }, { "epoch": 2.47, "learning_rate": 8.828837806714501e-06, "loss": 0.5486, "step": 223095 }, { "epoch": 2.47, "learning_rate": 8.827915079575989e-06, "loss": 0.5291, "step": 223100 }, { "epoch": 2.47, "learning_rate": 8.826992352437477e-06, "loss": 0.5572, "step": 223105 }, { "epoch": 2.47, "learning_rate": 8.826069625298965e-06, "loss": 0.5704, "step": 223110 }, { "epoch": 2.47, "learning_rate": 8.825146898160451e-06, "loss": 0.6057, "step": 223115 }, { "epoch": 2.47, "learning_rate": 8.824224171021939e-06, "loss": 0.6166, "step": 223120 }, { "epoch": 2.47, "learning_rate": 8.823301443883427e-06, "loss": 0.6452, "step": 223125 }, { "epoch": 2.47, "learning_rate": 8.822378716744915e-06, "loss": 0.6123, "step": 223130 }, { "epoch": 2.47, "learning_rate": 8.821455989606402e-06, "loss": 0.555, "step": 223135 }, { "epoch": 2.47, "learning_rate": 8.820533262467889e-06, "loss": 0.5722, "step": 223140 }, { "epoch": 2.47, "learning_rate": 8.819610535329376e-06, "loss": 0.6309, "step": 223145 }, { "epoch": 2.47, "learning_rate": 8.818687808190864e-06, "loss": 0.599, "step": 223150 }, { "epoch": 2.47, "learning_rate": 8.817765081052352e-06, "loss": 0.6315, "step": 223155 }, { "epoch": 2.47, "learning_rate": 8.81684235391384e-06, "loss": 0.6093, "step": 223160 }, { "epoch": 2.47, "learning_rate": 8.815919626775328e-06, "loss": 0.6245, "step": 223165 }, { "epoch": 2.47, "learning_rate": 8.814996899636816e-06, "loss": 0.5593, "step": 223170 }, { "epoch": 2.47, "learning_rate": 8.814074172498303e-06, "loss": 0.5849, "step": 223175 }, { "epoch": 2.47, "learning_rate": 8.813151445359791e-06, "loss": 0.547, "step": 223180 }, { "epoch": 2.47, "learning_rate": 8.81222871822128e-06, "loss": 0.5849, "step": 223185 }, { "epoch": 2.47, "learning_rate": 8.811305991082765e-06, "loss": 0.5962, "step": 223190 }, { "epoch": 2.47, "learning_rate": 8.810383263944253e-06, "loss": 0.6003, "step": 223195 }, { "epoch": 2.47, "learning_rate": 8.80946053680574e-06, "loss": 0.5817, "step": 223200 }, { "epoch": 2.47, "learning_rate": 8.808537809667227e-06, "loss": 0.6292, "step": 223205 }, { "epoch": 2.47, "learning_rate": 8.807615082528715e-06, "loss": 0.6187, "step": 223210 }, { "epoch": 2.47, "learning_rate": 8.806692355390203e-06, "loss": 0.614, "step": 223215 }, { "epoch": 2.47, "learning_rate": 8.80576962825169e-06, "loss": 0.577, "step": 223220 }, { "epoch": 2.47, "learning_rate": 8.804846901113179e-06, "loss": 0.6088, "step": 223225 }, { "epoch": 2.47, "learning_rate": 8.803924173974666e-06, "loss": 0.5771, "step": 223230 }, { "epoch": 2.47, "learning_rate": 8.803001446836154e-06, "loss": 0.6294, "step": 223235 }, { "epoch": 2.47, "learning_rate": 8.802078719697642e-06, "loss": 0.5511, "step": 223240 }, { "epoch": 2.47, "learning_rate": 8.80115599255913e-06, "loss": 0.5704, "step": 223245 }, { "epoch": 2.47, "learning_rate": 8.800233265420616e-06, "loss": 0.5916, "step": 223250 }, { "epoch": 2.47, "learning_rate": 8.799310538282104e-06, "loss": 0.6659, "step": 223255 }, { "epoch": 2.47, "learning_rate": 8.798387811143592e-06, "loss": 0.5777, "step": 223260 }, { "epoch": 2.47, "learning_rate": 8.797465084005078e-06, "loss": 0.6186, "step": 223265 }, { "epoch": 2.47, "learning_rate": 8.796542356866566e-06, "loss": 0.6409, "step": 223270 }, { "epoch": 2.47, "learning_rate": 8.795619629728054e-06, "loss": 0.632, "step": 223275 }, { "epoch": 2.47, "learning_rate": 8.794696902589542e-06, "loss": 0.5468, "step": 223280 }, { "epoch": 2.47, "learning_rate": 8.79377417545103e-06, "loss": 0.5738, "step": 223285 }, { "epoch": 2.47, "learning_rate": 8.792851448312517e-06, "loss": 0.5962, "step": 223290 }, { "epoch": 2.47, "learning_rate": 8.791928721174005e-06, "loss": 0.5785, "step": 223295 }, { "epoch": 2.47, "learning_rate": 8.791005994035493e-06, "loss": 0.5522, "step": 223300 }, { "epoch": 2.47, "learning_rate": 8.790083266896979e-06, "loss": 0.5996, "step": 223305 }, { "epoch": 2.47, "learning_rate": 8.789160539758467e-06, "loss": 0.612, "step": 223310 }, { "epoch": 2.47, "learning_rate": 8.788237812619955e-06, "loss": 0.5786, "step": 223315 }, { "epoch": 2.47, "learning_rate": 8.787315085481443e-06, "loss": 0.5256, "step": 223320 }, { "epoch": 2.47, "learning_rate": 8.78639235834293e-06, "loss": 0.5601, "step": 223325 }, { "epoch": 2.47, "learning_rate": 8.785469631204418e-06, "loss": 0.6265, "step": 223330 }, { "epoch": 2.47, "learning_rate": 8.784546904065906e-06, "loss": 0.5699, "step": 223335 }, { "epoch": 2.47, "learning_rate": 8.783624176927392e-06, "loss": 0.6367, "step": 223340 }, { "epoch": 2.47, "learning_rate": 8.78270144978888e-06, "loss": 0.5942, "step": 223345 }, { "epoch": 2.47, "learning_rate": 8.781778722650368e-06, "loss": 0.5553, "step": 223350 }, { "epoch": 2.47, "learning_rate": 8.780855995511856e-06, "loss": 0.5057, "step": 223355 }, { "epoch": 2.47, "learning_rate": 8.779933268373344e-06, "loss": 0.617, "step": 223360 }, { "epoch": 2.47, "learning_rate": 8.77901054123483e-06, "loss": 0.6189, "step": 223365 }, { "epoch": 2.47, "learning_rate": 8.778087814096318e-06, "loss": 0.6281, "step": 223370 }, { "epoch": 2.47, "learning_rate": 8.777165086957806e-06, "loss": 0.5971, "step": 223375 }, { "epoch": 2.47, "learning_rate": 8.776242359819293e-06, "loss": 0.5699, "step": 223380 }, { "epoch": 2.47, "learning_rate": 8.775319632680781e-06, "loss": 0.6406, "step": 223385 }, { "epoch": 2.47, "learning_rate": 8.774396905542269e-06, "loss": 0.6152, "step": 223390 }, { "epoch": 2.47, "learning_rate": 8.773474178403757e-06, "loss": 0.5791, "step": 223395 }, { "epoch": 2.47, "learning_rate": 8.772551451265245e-06, "loss": 0.5734, "step": 223400 }, { "epoch": 2.47, "learning_rate": 8.771628724126733e-06, "loss": 0.5517, "step": 223405 }, { "epoch": 2.47, "learning_rate": 8.770705996988219e-06, "loss": 0.551, "step": 223410 }, { "epoch": 2.47, "learning_rate": 8.769783269849707e-06, "loss": 0.5665, "step": 223415 }, { "epoch": 2.47, "learning_rate": 8.768860542711193e-06, "loss": 0.565, "step": 223420 }, { "epoch": 2.47, "learning_rate": 8.76793781557268e-06, "loss": 0.5312, "step": 223425 }, { "epoch": 2.47, "learning_rate": 8.767015088434169e-06, "loss": 0.6426, "step": 223430 }, { "epoch": 2.47, "learning_rate": 8.766092361295656e-06, "loss": 0.609, "step": 223435 }, { "epoch": 2.47, "learning_rate": 8.765169634157144e-06, "loss": 0.547, "step": 223440 }, { "epoch": 2.47, "learning_rate": 8.764246907018632e-06, "loss": 0.5859, "step": 223445 }, { "epoch": 2.47, "learning_rate": 8.76332417988012e-06, "loss": 0.5216, "step": 223450 }, { "epoch": 2.47, "learning_rate": 8.762401452741608e-06, "loss": 0.645, "step": 223455 }, { "epoch": 2.47, "learning_rate": 8.761478725603096e-06, "loss": 0.5978, "step": 223460 }, { "epoch": 2.47, "learning_rate": 8.760555998464583e-06, "loss": 0.5501, "step": 223465 }, { "epoch": 2.47, "learning_rate": 8.75963327132607e-06, "loss": 0.5942, "step": 223470 }, { "epoch": 2.47, "learning_rate": 8.758710544187557e-06, "loss": 0.5362, "step": 223475 }, { "epoch": 2.47, "learning_rate": 8.757787817049045e-06, "loss": 0.6022, "step": 223480 }, { "epoch": 2.47, "learning_rate": 8.756865089910533e-06, "loss": 0.6172, "step": 223485 }, { "epoch": 2.47, "learning_rate": 8.755942362772021e-06, "loss": 0.6168, "step": 223490 }, { "epoch": 2.47, "learning_rate": 8.755019635633507e-06, "loss": 0.5723, "step": 223495 }, { "epoch": 2.47, "learning_rate": 8.754096908494995e-06, "loss": 0.5669, "step": 223500 }, { "epoch": 2.47, "learning_rate": 8.753174181356483e-06, "loss": 0.5471, "step": 223505 }, { "epoch": 2.47, "learning_rate": 8.75225145421797e-06, "loss": 0.5655, "step": 223510 }, { "epoch": 2.47, "learning_rate": 8.751328727079459e-06, "loss": 0.5878, "step": 223515 }, { "epoch": 2.47, "learning_rate": 8.750405999940946e-06, "loss": 0.613, "step": 223520 }, { "epoch": 2.48, "learning_rate": 8.749483272802433e-06, "loss": 0.5422, "step": 223525 }, { "epoch": 2.48, "learning_rate": 8.74856054566392e-06, "loss": 0.6069, "step": 223530 }, { "epoch": 2.48, "learning_rate": 8.747637818525408e-06, "loss": 0.549, "step": 223535 }, { "epoch": 2.48, "learning_rate": 8.746715091386896e-06, "loss": 0.5615, "step": 223540 }, { "epoch": 2.48, "learning_rate": 8.745792364248384e-06, "loss": 0.6268, "step": 223545 }, { "epoch": 2.48, "learning_rate": 8.744869637109872e-06, "loss": 0.5784, "step": 223550 }, { "epoch": 2.48, "learning_rate": 8.74394690997136e-06, "loss": 0.5934, "step": 223555 }, { "epoch": 2.48, "learning_rate": 8.743024182832848e-06, "loss": 0.5764, "step": 223560 }, { "epoch": 2.48, "learning_rate": 8.742101455694335e-06, "loss": 0.5977, "step": 223565 }, { "epoch": 2.48, "learning_rate": 8.741178728555822e-06, "loss": 0.615, "step": 223570 }, { "epoch": 2.48, "learning_rate": 8.74025600141731e-06, "loss": 0.5343, "step": 223575 }, { "epoch": 2.48, "learning_rate": 8.739333274278797e-06, "loss": 0.5984, "step": 223580 }, { "epoch": 2.48, "learning_rate": 8.738410547140283e-06, "loss": 0.5139, "step": 223585 }, { "epoch": 2.48, "learning_rate": 8.737487820001771e-06, "loss": 0.5916, "step": 223590 }, { "epoch": 2.48, "learning_rate": 8.736565092863259e-06, "loss": 0.6202, "step": 223595 }, { "epoch": 2.48, "learning_rate": 8.735642365724747e-06, "loss": 0.568, "step": 223600 }, { "epoch": 2.48, "learning_rate": 8.734719638586235e-06, "loss": 0.5153, "step": 223605 }, { "epoch": 2.48, "learning_rate": 8.733796911447723e-06, "loss": 0.5652, "step": 223610 }, { "epoch": 2.48, "learning_rate": 8.73287418430921e-06, "loss": 0.6022, "step": 223615 }, { "epoch": 2.48, "learning_rate": 8.731951457170698e-06, "loss": 0.5863, "step": 223620 }, { "epoch": 2.48, "learning_rate": 8.731028730032186e-06, "loss": 0.5928, "step": 223625 }, { "epoch": 2.48, "learning_rate": 8.730106002893674e-06, "loss": 0.5698, "step": 223630 }, { "epoch": 2.48, "learning_rate": 8.72918327575516e-06, "loss": 0.6082, "step": 223635 }, { "epoch": 2.48, "learning_rate": 8.728260548616648e-06, "loss": 0.5977, "step": 223640 }, { "epoch": 2.48, "learning_rate": 8.727337821478134e-06, "loss": 0.5729, "step": 223645 }, { "epoch": 2.48, "learning_rate": 8.726415094339622e-06, "loss": 0.5615, "step": 223650 }, { "epoch": 2.48, "learning_rate": 8.72549236720111e-06, "loss": 0.5508, "step": 223655 }, { "epoch": 2.48, "learning_rate": 8.724569640062598e-06, "loss": 0.6287, "step": 223660 }, { "epoch": 2.48, "learning_rate": 8.723646912924086e-06, "loss": 0.594, "step": 223665 }, { "epoch": 2.48, "learning_rate": 8.722724185785573e-06, "loss": 0.5797, "step": 223670 }, { "epoch": 2.48, "learning_rate": 8.721801458647061e-06, "loss": 0.6137, "step": 223675 }, { "epoch": 2.48, "learning_rate": 8.720878731508549e-06, "loss": 0.5694, "step": 223680 }, { "epoch": 2.48, "learning_rate": 8.719956004370037e-06, "loss": 0.6075, "step": 223685 }, { "epoch": 2.48, "learning_rate": 8.719033277231523e-06, "loss": 0.5902, "step": 223690 }, { "epoch": 2.48, "learning_rate": 8.718110550093011e-06, "loss": 0.5741, "step": 223695 }, { "epoch": 2.48, "learning_rate": 8.717187822954499e-06, "loss": 0.5813, "step": 223700 }, { "epoch": 2.48, "learning_rate": 8.716265095815987e-06, "loss": 0.6295, "step": 223705 }, { "epoch": 2.48, "learning_rate": 8.715342368677474e-06, "loss": 0.6298, "step": 223710 }, { "epoch": 2.48, "learning_rate": 8.714419641538962e-06, "loss": 0.582, "step": 223715 }, { "epoch": 2.48, "learning_rate": 8.713496914400448e-06, "loss": 0.555, "step": 223720 }, { "epoch": 2.48, "learning_rate": 8.712574187261936e-06, "loss": 0.5978, "step": 223725 }, { "epoch": 2.48, "learning_rate": 8.711651460123424e-06, "loss": 0.6181, "step": 223730 }, { "epoch": 2.48, "learning_rate": 8.710728732984912e-06, "loss": 0.6107, "step": 223735 }, { "epoch": 2.48, "learning_rate": 8.7098060058464e-06, "loss": 0.5593, "step": 223740 }, { "epoch": 2.48, "learning_rate": 8.708883278707888e-06, "loss": 0.5522, "step": 223745 }, { "epoch": 2.48, "learning_rate": 8.707960551569374e-06, "loss": 0.5751, "step": 223750 }, { "epoch": 2.48, "learning_rate": 8.707037824430862e-06, "loss": 0.572, "step": 223755 }, { "epoch": 2.48, "learning_rate": 8.70611509729235e-06, "loss": 0.6025, "step": 223760 }, { "epoch": 2.48, "learning_rate": 8.705192370153837e-06, "loss": 0.6378, "step": 223765 }, { "epoch": 2.48, "learning_rate": 8.704269643015325e-06, "loss": 0.5636, "step": 223770 }, { "epoch": 2.48, "learning_rate": 8.703346915876813e-06, "loss": 0.5842, "step": 223775 }, { "epoch": 2.48, "learning_rate": 8.702424188738301e-06, "loss": 0.5692, "step": 223780 }, { "epoch": 2.48, "learning_rate": 8.701501461599789e-06, "loss": 0.5861, "step": 223785 }, { "epoch": 2.48, "learning_rate": 8.700578734461277e-06, "loss": 0.5912, "step": 223790 }, { "epoch": 2.48, "learning_rate": 8.699656007322763e-06, "loss": 0.5831, "step": 223795 }, { "epoch": 2.48, "learning_rate": 8.69873328018425e-06, "loss": 0.6511, "step": 223800 }, { "epoch": 2.48, "learning_rate": 8.697810553045737e-06, "loss": 0.5897, "step": 223805 }, { "epoch": 2.48, "learning_rate": 8.696887825907225e-06, "loss": 0.5461, "step": 223810 }, { "epoch": 2.48, "learning_rate": 8.695965098768713e-06, "loss": 0.5475, "step": 223815 }, { "epoch": 2.48, "learning_rate": 8.6950423716302e-06, "loss": 0.551, "step": 223820 }, { "epoch": 2.48, "learning_rate": 8.694119644491688e-06, "loss": 0.6452, "step": 223825 }, { "epoch": 2.48, "learning_rate": 8.693196917353176e-06, "loss": 0.5776, "step": 223830 }, { "epoch": 2.48, "learning_rate": 8.692274190214664e-06, "loss": 0.5915, "step": 223835 }, { "epoch": 2.48, "learning_rate": 8.691351463076152e-06, "loss": 0.5836, "step": 223840 }, { "epoch": 2.48, "learning_rate": 8.69042873593764e-06, "loss": 0.6256, "step": 223845 }, { "epoch": 2.48, "learning_rate": 8.689506008799127e-06, "loss": 0.5177, "step": 223850 }, { "epoch": 2.48, "learning_rate": 8.688583281660614e-06, "loss": 0.589, "step": 223855 }, { "epoch": 2.48, "learning_rate": 8.687660554522101e-06, "loss": 0.6901, "step": 223860 }, { "epoch": 2.48, "learning_rate": 8.68673782738359e-06, "loss": 0.597, "step": 223865 }, { "epoch": 2.48, "learning_rate": 8.685815100245075e-06, "loss": 0.5687, "step": 223870 }, { "epoch": 2.48, "learning_rate": 8.684892373106563e-06, "loss": 0.5935, "step": 223875 }, { "epoch": 2.48, "learning_rate": 8.683969645968051e-06, "loss": 0.589, "step": 223880 }, { "epoch": 2.48, "learning_rate": 8.683046918829539e-06, "loss": 0.6149, "step": 223885 }, { "epoch": 2.48, "learning_rate": 8.682124191691027e-06, "loss": 0.5777, "step": 223890 }, { "epoch": 2.48, "learning_rate": 8.681201464552515e-06, "loss": 0.5831, "step": 223895 }, { "epoch": 2.48, "learning_rate": 8.680278737414003e-06, "loss": 0.5879, "step": 223900 }, { "epoch": 2.48, "learning_rate": 8.67935601027549e-06, "loss": 0.578, "step": 223905 }, { "epoch": 2.48, "learning_rate": 8.678433283136978e-06, "loss": 0.5917, "step": 223910 }, { "epoch": 2.48, "learning_rate": 8.677510555998464e-06, "loss": 0.5694, "step": 223915 }, { "epoch": 2.48, "learning_rate": 8.676587828859952e-06, "loss": 0.5662, "step": 223920 }, { "epoch": 2.48, "learning_rate": 8.67566510172144e-06, "loss": 0.5601, "step": 223925 }, { "epoch": 2.48, "learning_rate": 8.674742374582928e-06, "loss": 0.5755, "step": 223930 }, { "epoch": 2.48, "learning_rate": 8.673819647444416e-06, "loss": 0.5465, "step": 223935 }, { "epoch": 2.48, "learning_rate": 8.672896920305904e-06, "loss": 0.6235, "step": 223940 }, { "epoch": 2.48, "learning_rate": 8.671974193167392e-06, "loss": 0.6333, "step": 223945 }, { "epoch": 2.48, "learning_rate": 8.671051466028878e-06, "loss": 0.5902, "step": 223950 }, { "epoch": 2.48, "learning_rate": 8.670128738890366e-06, "loss": 0.5979, "step": 223955 }, { "epoch": 2.48, "learning_rate": 8.669206011751853e-06, "loss": 0.5941, "step": 223960 }, { "epoch": 2.48, "learning_rate": 8.668283284613341e-06, "loss": 0.5903, "step": 223965 }, { "epoch": 2.48, "learning_rate": 8.667360557474827e-06, "loss": 0.5874, "step": 223970 }, { "epoch": 2.48, "learning_rate": 8.666437830336315e-06, "loss": 0.6388, "step": 223975 }, { "epoch": 2.48, "learning_rate": 8.665515103197803e-06, "loss": 0.5462, "step": 223980 }, { "epoch": 2.48, "learning_rate": 8.664592376059291e-06, "loss": 0.5982, "step": 223985 }, { "epoch": 2.48, "learning_rate": 8.663669648920779e-06, "loss": 0.6093, "step": 223990 }, { "epoch": 2.48, "learning_rate": 8.662746921782267e-06, "loss": 0.5628, "step": 223995 }, { "epoch": 2.48, "learning_rate": 8.661824194643754e-06, "loss": 0.5869, "step": 224000 }, { "epoch": 2.48, "eval_loss": 0.5519337058067322, "eval_runtime": 69.2618, "eval_samples_per_second": 28.876, "eval_steps_per_second": 14.438, "step": 224000 }, { "epoch": 2.48, "learning_rate": 8.660901467505242e-06, "loss": 0.5471, "step": 224005 }, { "epoch": 2.48, "learning_rate": 8.65997874036673e-06, "loss": 0.5844, "step": 224010 }, { "epoch": 2.48, "learning_rate": 8.659056013228218e-06, "loss": 0.5747, "step": 224015 }, { "epoch": 2.48, "learning_rate": 8.658133286089704e-06, "loss": 0.583, "step": 224020 }, { "epoch": 2.48, "learning_rate": 8.657210558951192e-06, "loss": 0.579, "step": 224025 }, { "epoch": 2.48, "learning_rate": 8.656287831812678e-06, "loss": 0.5821, "step": 224030 }, { "epoch": 2.48, "learning_rate": 8.655365104674166e-06, "loss": 0.6398, "step": 224035 }, { "epoch": 2.48, "learning_rate": 8.654442377535654e-06, "loss": 0.5854, "step": 224040 }, { "epoch": 2.48, "learning_rate": 8.653519650397142e-06, "loss": 0.5915, "step": 224045 }, { "epoch": 2.48, "learning_rate": 8.65259692325863e-06, "loss": 0.5451, "step": 224050 }, { "epoch": 2.48, "learning_rate": 8.651674196120117e-06, "loss": 0.5635, "step": 224055 }, { "epoch": 2.48, "learning_rate": 8.650751468981605e-06, "loss": 0.5935, "step": 224060 }, { "epoch": 2.48, "learning_rate": 8.649828741843093e-06, "loss": 0.5746, "step": 224065 }, { "epoch": 2.48, "learning_rate": 8.648906014704581e-06, "loss": 0.5635, "step": 224070 }, { "epoch": 2.48, "learning_rate": 8.647983287566067e-06, "loss": 0.5935, "step": 224075 }, { "epoch": 2.48, "learning_rate": 8.647060560427555e-06, "loss": 0.5295, "step": 224080 }, { "epoch": 2.48, "learning_rate": 8.646137833289043e-06, "loss": 0.6097, "step": 224085 }, { "epoch": 2.48, "learning_rate": 8.64521510615053e-06, "loss": 0.5614, "step": 224090 }, { "epoch": 2.48, "learning_rate": 8.644292379012019e-06, "loss": 0.5805, "step": 224095 }, { "epoch": 2.48, "learning_rate": 8.643369651873505e-06, "loss": 0.5646, "step": 224100 }, { "epoch": 2.48, "learning_rate": 8.642446924734993e-06, "loss": 0.5945, "step": 224105 }, { "epoch": 2.48, "learning_rate": 8.64152419759648e-06, "loss": 0.5702, "step": 224110 }, { "epoch": 2.48, "learning_rate": 8.640601470457968e-06, "loss": 0.5718, "step": 224115 }, { "epoch": 2.48, "learning_rate": 8.639678743319456e-06, "loss": 0.5752, "step": 224120 }, { "epoch": 2.48, "learning_rate": 8.638756016180944e-06, "loss": 0.548, "step": 224125 }, { "epoch": 2.48, "learning_rate": 8.637833289042432e-06, "loss": 0.5956, "step": 224130 }, { "epoch": 2.48, "learning_rate": 8.636910561903918e-06, "loss": 0.5611, "step": 224135 }, { "epoch": 2.48, "learning_rate": 8.635987834765406e-06, "loss": 0.6275, "step": 224140 }, { "epoch": 2.48, "learning_rate": 8.635065107626894e-06, "loss": 0.6005, "step": 224145 }, { "epoch": 2.48, "learning_rate": 8.634142380488381e-06, "loss": 0.5593, "step": 224150 }, { "epoch": 2.48, "learning_rate": 8.63321965334987e-06, "loss": 0.5892, "step": 224155 }, { "epoch": 2.48, "learning_rate": 8.632296926211357e-06, "loss": 0.5973, "step": 224160 }, { "epoch": 2.48, "learning_rate": 8.631374199072845e-06, "loss": 0.6427, "step": 224165 }, { "epoch": 2.48, "learning_rate": 8.630451471934333e-06, "loss": 0.6568, "step": 224170 }, { "epoch": 2.48, "learning_rate": 8.629528744795819e-06, "loss": 0.6256, "step": 224175 }, { "epoch": 2.48, "learning_rate": 8.628606017657307e-06, "loss": 0.604, "step": 224180 }, { "epoch": 2.48, "learning_rate": 8.627683290518795e-06, "loss": 0.5975, "step": 224185 }, { "epoch": 2.48, "learning_rate": 8.626760563380283e-06, "loss": 0.6339, "step": 224190 }, { "epoch": 2.48, "learning_rate": 8.625837836241769e-06, "loss": 0.5663, "step": 224195 }, { "epoch": 2.48, "learning_rate": 8.624915109103257e-06, "loss": 0.6253, "step": 224200 }, { "epoch": 2.48, "learning_rate": 8.623992381964744e-06, "loss": 0.5936, "step": 224205 }, { "epoch": 2.48, "learning_rate": 8.623069654826232e-06, "loss": 0.6165, "step": 224210 }, { "epoch": 2.48, "learning_rate": 8.62214692768772e-06, "loss": 0.6032, "step": 224215 }, { "epoch": 2.48, "learning_rate": 8.621224200549208e-06, "loss": 0.5585, "step": 224220 }, { "epoch": 2.48, "learning_rate": 8.620301473410696e-06, "loss": 0.6023, "step": 224225 }, { "epoch": 2.48, "learning_rate": 8.619378746272184e-06, "loss": 0.5772, "step": 224230 }, { "epoch": 2.48, "learning_rate": 8.618456019133672e-06, "loss": 0.5993, "step": 224235 }, { "epoch": 2.48, "learning_rate": 8.617533291995158e-06, "loss": 0.6033, "step": 224240 }, { "epoch": 2.48, "learning_rate": 8.616610564856646e-06, "loss": 0.5502, "step": 224245 }, { "epoch": 2.48, "learning_rate": 8.615687837718132e-06, "loss": 0.5571, "step": 224250 }, { "epoch": 2.48, "learning_rate": 8.61476511057962e-06, "loss": 0.6122, "step": 224255 }, { "epoch": 2.48, "learning_rate": 8.613842383441107e-06, "loss": 0.5883, "step": 224260 }, { "epoch": 2.48, "learning_rate": 8.612919656302595e-06, "loss": 0.6221, "step": 224265 }, { "epoch": 2.48, "learning_rate": 8.611996929164083e-06, "loss": 0.5761, "step": 224270 }, { "epoch": 2.48, "learning_rate": 8.611074202025571e-06, "loss": 0.5765, "step": 224275 }, { "epoch": 2.48, "learning_rate": 8.610151474887059e-06, "loss": 0.6146, "step": 224280 }, { "epoch": 2.48, "learning_rate": 8.609228747748547e-06, "loss": 0.6133, "step": 224285 }, { "epoch": 2.48, "learning_rate": 8.608306020610034e-06, "loss": 0.5379, "step": 224290 }, { "epoch": 2.48, "learning_rate": 8.607383293471522e-06, "loss": 0.5958, "step": 224295 }, { "epoch": 2.48, "learning_rate": 8.606460566333008e-06, "loss": 0.5919, "step": 224300 }, { "epoch": 2.48, "learning_rate": 8.605537839194496e-06, "loss": 0.6031, "step": 224305 }, { "epoch": 2.48, "learning_rate": 8.604615112055984e-06, "loss": 0.6172, "step": 224310 }, { "epoch": 2.48, "learning_rate": 8.603692384917472e-06, "loss": 0.5881, "step": 224315 }, { "epoch": 2.48, "learning_rate": 8.60276965777896e-06, "loss": 0.5553, "step": 224320 }, { "epoch": 2.48, "learning_rate": 8.601846930640448e-06, "loss": 0.6016, "step": 224325 }, { "epoch": 2.48, "learning_rate": 8.600924203501934e-06, "loss": 0.5486, "step": 224330 }, { "epoch": 2.48, "learning_rate": 8.600001476363422e-06, "loss": 0.6078, "step": 224335 }, { "epoch": 2.48, "learning_rate": 8.59907874922491e-06, "loss": 0.6187, "step": 224340 }, { "epoch": 2.48, "learning_rate": 8.598156022086397e-06, "loss": 0.6354, "step": 224345 }, { "epoch": 2.48, "learning_rate": 8.597233294947885e-06, "loss": 0.579, "step": 224350 }, { "epoch": 2.48, "learning_rate": 8.596310567809371e-06, "loss": 0.5822, "step": 224355 }, { "epoch": 2.48, "learning_rate": 8.59538784067086e-06, "loss": 0.5818, "step": 224360 }, { "epoch": 2.48, "learning_rate": 8.594465113532347e-06, "loss": 0.5788, "step": 224365 }, { "epoch": 2.48, "learning_rate": 8.593542386393835e-06, "loss": 0.6155, "step": 224370 }, { "epoch": 2.48, "learning_rate": 8.592619659255323e-06, "loss": 0.5743, "step": 224375 }, { "epoch": 2.48, "learning_rate": 8.59169693211681e-06, "loss": 0.6168, "step": 224380 }, { "epoch": 2.48, "learning_rate": 8.590774204978298e-06, "loss": 0.5926, "step": 224385 }, { "epoch": 2.48, "learning_rate": 8.589851477839786e-06, "loss": 0.6064, "step": 224390 }, { "epoch": 2.48, "learning_rate": 8.588928750701274e-06, "loss": 0.6277, "step": 224395 }, { "epoch": 2.48, "learning_rate": 8.588006023562762e-06, "loss": 0.5582, "step": 224400 }, { "epoch": 2.48, "learning_rate": 8.587083296424248e-06, "loss": 0.5608, "step": 224405 }, { "epoch": 2.48, "learning_rate": 8.586160569285736e-06, "loss": 0.5805, "step": 224410 }, { "epoch": 2.48, "learning_rate": 8.585237842147222e-06, "loss": 0.632, "step": 224415 }, { "epoch": 2.48, "learning_rate": 8.58431511500871e-06, "loss": 0.5663, "step": 224420 }, { "epoch": 2.48, "learning_rate": 8.583392387870198e-06, "loss": 0.5992, "step": 224425 }, { "epoch": 2.49, "learning_rate": 8.582469660731686e-06, "loss": 0.631, "step": 224430 }, { "epoch": 2.49, "learning_rate": 8.581546933593174e-06, "loss": 0.5718, "step": 224435 }, { "epoch": 2.49, "learning_rate": 8.580624206454661e-06, "loss": 0.597, "step": 224440 }, { "epoch": 2.49, "learning_rate": 8.57970147931615e-06, "loss": 0.5317, "step": 224445 }, { "epoch": 2.49, "learning_rate": 8.578778752177637e-06, "loss": 0.5371, "step": 224450 }, { "epoch": 2.49, "learning_rate": 8.577856025039125e-06, "loss": 0.5813, "step": 224455 }, { "epoch": 2.49, "learning_rate": 8.576933297900613e-06, "loss": 0.5552, "step": 224460 }, { "epoch": 2.49, "learning_rate": 8.576010570762099e-06, "loss": 0.6124, "step": 224465 }, { "epoch": 2.49, "learning_rate": 8.575087843623587e-06, "loss": 0.6087, "step": 224470 }, { "epoch": 2.49, "learning_rate": 8.574165116485075e-06, "loss": 0.6118, "step": 224475 }, { "epoch": 2.49, "learning_rate": 8.57324238934656e-06, "loss": 0.5428, "step": 224480 }, { "epoch": 2.49, "learning_rate": 8.572319662208049e-06, "loss": 0.5732, "step": 224485 }, { "epoch": 2.49, "learning_rate": 8.571396935069537e-06, "loss": 0.5925, "step": 224490 }, { "epoch": 2.49, "learning_rate": 8.570474207931024e-06, "loss": 0.6105, "step": 224495 }, { "epoch": 2.49, "learning_rate": 8.569551480792512e-06, "loss": 0.5913, "step": 224500 }, { "epoch": 2.49, "learning_rate": 8.568628753654e-06, "loss": 0.5678, "step": 224505 }, { "epoch": 2.49, "learning_rate": 8.567706026515488e-06, "loss": 0.5634, "step": 224510 }, { "epoch": 2.49, "learning_rate": 8.566783299376976e-06, "loss": 0.5433, "step": 224515 }, { "epoch": 2.49, "learning_rate": 8.565860572238462e-06, "loss": 0.5915, "step": 224520 }, { "epoch": 2.49, "learning_rate": 8.56493784509995e-06, "loss": 0.568, "step": 224525 }, { "epoch": 2.49, "learning_rate": 8.564015117961438e-06, "loss": 0.5208, "step": 224530 }, { "epoch": 2.49, "learning_rate": 8.563092390822925e-06, "loss": 0.6262, "step": 224535 }, { "epoch": 2.49, "learning_rate": 8.562169663684413e-06, "loss": 0.5237, "step": 224540 }, { "epoch": 2.49, "learning_rate": 8.561246936545901e-06, "loss": 0.5621, "step": 224545 }, { "epoch": 2.49, "learning_rate": 8.560324209407389e-06, "loss": 0.5597, "step": 224550 }, { "epoch": 2.49, "learning_rate": 8.559401482268875e-06, "loss": 0.5439, "step": 224555 }, { "epoch": 2.49, "learning_rate": 8.558478755130363e-06, "loss": 0.5862, "step": 224560 }, { "epoch": 2.49, "learning_rate": 8.557556027991851e-06, "loss": 0.5803, "step": 224565 }, { "epoch": 2.49, "learning_rate": 8.556633300853339e-06, "loss": 0.5579, "step": 224570 }, { "epoch": 2.49, "learning_rate": 8.555710573714827e-06, "loss": 0.5423, "step": 224575 }, { "epoch": 2.49, "learning_rate": 8.554787846576313e-06, "loss": 0.632, "step": 224580 }, { "epoch": 2.49, "learning_rate": 8.5538651194378e-06, "loss": 0.5887, "step": 224585 }, { "epoch": 2.49, "learning_rate": 8.552942392299288e-06, "loss": 0.5565, "step": 224590 }, { "epoch": 2.49, "learning_rate": 8.552019665160776e-06, "loss": 0.63, "step": 224595 }, { "epoch": 2.49, "learning_rate": 8.551096938022264e-06, "loss": 0.6211, "step": 224600 }, { "epoch": 2.49, "learning_rate": 8.550174210883752e-06, "loss": 0.5342, "step": 224605 }, { "epoch": 2.49, "learning_rate": 8.54925148374524e-06, "loss": 0.5838, "step": 224610 }, { "epoch": 2.49, "learning_rate": 8.548328756606728e-06, "loss": 0.5602, "step": 224615 }, { "epoch": 2.49, "learning_rate": 8.547406029468216e-06, "loss": 0.593, "step": 224620 }, { "epoch": 2.49, "learning_rate": 8.546483302329702e-06, "loss": 0.6001, "step": 224625 }, { "epoch": 2.49, "learning_rate": 8.54556057519119e-06, "loss": 0.6162, "step": 224630 }, { "epoch": 2.49, "learning_rate": 8.544637848052676e-06, "loss": 0.5258, "step": 224635 }, { "epoch": 2.49, "learning_rate": 8.543715120914164e-06, "loss": 0.5883, "step": 224640 }, { "epoch": 2.49, "learning_rate": 8.542792393775651e-06, "loss": 0.5331, "step": 224645 }, { "epoch": 2.49, "learning_rate": 8.54186966663714e-06, "loss": 0.5697, "step": 224650 }, { "epoch": 2.49, "learning_rate": 8.540946939498627e-06, "loss": 0.6169, "step": 224655 }, { "epoch": 2.49, "learning_rate": 8.540024212360115e-06, "loss": 0.6171, "step": 224660 }, { "epoch": 2.49, "learning_rate": 8.539101485221603e-06, "loss": 0.6, "step": 224665 }, { "epoch": 2.49, "learning_rate": 8.53817875808309e-06, "loss": 0.5783, "step": 224670 }, { "epoch": 2.49, "learning_rate": 8.537256030944578e-06, "loss": 0.6248, "step": 224675 }, { "epoch": 2.49, "learning_rate": 8.536333303806066e-06, "loss": 0.6317, "step": 224680 }, { "epoch": 2.49, "learning_rate": 8.535410576667552e-06, "loss": 0.6235, "step": 224685 }, { "epoch": 2.49, "learning_rate": 8.53448784952904e-06, "loss": 0.545, "step": 224690 }, { "epoch": 2.49, "learning_rate": 8.533565122390528e-06, "loss": 0.6305, "step": 224695 }, { "epoch": 2.49, "learning_rate": 8.532642395252016e-06, "loss": 0.5928, "step": 224700 }, { "epoch": 2.49, "learning_rate": 8.531719668113502e-06, "loss": 0.6322, "step": 224705 }, { "epoch": 2.49, "learning_rate": 8.53079694097499e-06, "loss": 0.5922, "step": 224710 }, { "epoch": 2.49, "learning_rate": 8.529874213836478e-06, "loss": 0.6075, "step": 224715 }, { "epoch": 2.49, "learning_rate": 8.528951486697966e-06, "loss": 0.5727, "step": 224720 }, { "epoch": 2.49, "learning_rate": 8.528028759559454e-06, "loss": 0.5422, "step": 224725 }, { "epoch": 2.49, "learning_rate": 8.527106032420941e-06, "loss": 0.5877, "step": 224730 }, { "epoch": 2.49, "learning_rate": 8.52618330528243e-06, "loss": 0.5613, "step": 224735 }, { "epoch": 2.49, "learning_rate": 8.525260578143917e-06, "loss": 0.5254, "step": 224740 }, { "epoch": 2.49, "learning_rate": 8.524337851005403e-06, "loss": 0.5863, "step": 224745 }, { "epoch": 2.49, "learning_rate": 8.523415123866891e-06, "loss": 0.5935, "step": 224750 }, { "epoch": 2.49, "learning_rate": 8.522492396728379e-06, "loss": 0.611, "step": 224755 }, { "epoch": 2.49, "learning_rate": 8.521569669589867e-06, "loss": 0.6031, "step": 224760 }, { "epoch": 2.49, "learning_rate": 8.520646942451355e-06, "loss": 0.5884, "step": 224765 }, { "epoch": 2.49, "learning_rate": 8.519724215312843e-06, "loss": 0.5718, "step": 224770 }, { "epoch": 2.49, "learning_rate": 8.51880148817433e-06, "loss": 0.605, "step": 224775 }, { "epoch": 2.49, "learning_rate": 8.517878761035818e-06, "loss": 0.5463, "step": 224780 }, { "epoch": 2.49, "learning_rate": 8.516956033897304e-06, "loss": 0.5567, "step": 224785 }, { "epoch": 2.49, "learning_rate": 8.516033306758792e-06, "loss": 0.5811, "step": 224790 }, { "epoch": 2.49, "learning_rate": 8.51511057962028e-06, "loss": 0.5794, "step": 224795 }, { "epoch": 2.49, "learning_rate": 8.514187852481766e-06, "loss": 0.5483, "step": 224800 }, { "epoch": 2.49, "learning_rate": 8.513265125343254e-06, "loss": 0.5769, "step": 224805 }, { "epoch": 2.49, "learning_rate": 8.512342398204742e-06, "loss": 0.6136, "step": 224810 }, { "epoch": 2.49, "learning_rate": 8.51141967106623e-06, "loss": 0.5555, "step": 224815 }, { "epoch": 2.49, "learning_rate": 8.510496943927718e-06, "loss": 0.6107, "step": 224820 }, { "epoch": 2.49, "learning_rate": 8.509574216789205e-06, "loss": 0.6605, "step": 224825 }, { "epoch": 2.49, "learning_rate": 8.508651489650693e-06, "loss": 0.6243, "step": 224830 }, { "epoch": 2.49, "learning_rate": 8.507728762512181e-06, "loss": 0.6287, "step": 224835 }, { "epoch": 2.49, "learning_rate": 8.506806035373669e-06, "loss": 0.6138, "step": 224840 }, { "epoch": 2.49, "learning_rate": 8.505883308235157e-06, "loss": 0.5783, "step": 224845 }, { "epoch": 2.49, "learning_rate": 8.504960581096643e-06, "loss": 0.6332, "step": 224850 }, { "epoch": 2.49, "learning_rate": 8.504037853958131e-06, "loss": 0.5964, "step": 224855 }, { "epoch": 2.49, "learning_rate": 8.503115126819617e-06, "loss": 0.5824, "step": 224860 }, { "epoch": 2.49, "learning_rate": 8.502192399681105e-06, "loss": 0.6798, "step": 224865 }, { "epoch": 2.49, "learning_rate": 8.501269672542593e-06, "loss": 0.5511, "step": 224870 }, { "epoch": 2.49, "learning_rate": 8.50034694540408e-06, "loss": 0.6026, "step": 224875 }, { "epoch": 2.49, "learning_rate": 8.499424218265568e-06, "loss": 0.5972, "step": 224880 }, { "epoch": 2.49, "learning_rate": 8.498501491127056e-06, "loss": 0.5285, "step": 224885 }, { "epoch": 2.49, "learning_rate": 8.497578763988544e-06, "loss": 0.5303, "step": 224890 }, { "epoch": 2.49, "learning_rate": 8.496656036850032e-06, "loss": 0.57, "step": 224895 }, { "epoch": 2.49, "learning_rate": 8.49573330971152e-06, "loss": 0.5672, "step": 224900 }, { "epoch": 2.49, "learning_rate": 8.494810582573006e-06, "loss": 0.5777, "step": 224905 }, { "epoch": 2.49, "learning_rate": 8.493887855434494e-06, "loss": 0.6077, "step": 224910 }, { "epoch": 2.49, "learning_rate": 8.492965128295982e-06, "loss": 0.5615, "step": 224915 }, { "epoch": 2.49, "learning_rate": 8.49204240115747e-06, "loss": 0.5762, "step": 224920 }, { "epoch": 2.49, "learning_rate": 8.491119674018957e-06, "loss": 0.546, "step": 224925 }, { "epoch": 2.49, "learning_rate": 8.490196946880445e-06, "loss": 0.5671, "step": 224930 }, { "epoch": 2.49, "learning_rate": 8.489274219741931e-06, "loss": 0.6458, "step": 224935 }, { "epoch": 2.49, "learning_rate": 8.48835149260342e-06, "loss": 0.6083, "step": 224940 }, { "epoch": 2.49, "learning_rate": 8.487428765464907e-06, "loss": 0.6532, "step": 224945 }, { "epoch": 2.49, "learning_rate": 8.486506038326395e-06, "loss": 0.5749, "step": 224950 }, { "epoch": 2.49, "learning_rate": 8.485583311187883e-06, "loss": 0.6086, "step": 224955 }, { "epoch": 2.49, "learning_rate": 8.48466058404937e-06, "loss": 0.5767, "step": 224960 }, { "epoch": 2.49, "learning_rate": 8.483737856910857e-06, "loss": 0.6049, "step": 224965 }, { "epoch": 2.49, "learning_rate": 8.482815129772345e-06, "loss": 0.6418, "step": 224970 }, { "epoch": 2.49, "learning_rate": 8.481892402633832e-06, "loss": 0.6141, "step": 224975 }, { "epoch": 2.49, "learning_rate": 8.48096967549532e-06, "loss": 0.6293, "step": 224980 }, { "epoch": 2.49, "learning_rate": 8.480046948356808e-06, "loss": 0.6764, "step": 224985 }, { "epoch": 2.49, "learning_rate": 8.479124221218296e-06, "loss": 0.5924, "step": 224990 }, { "epoch": 2.49, "learning_rate": 8.478201494079784e-06, "loss": 0.6078, "step": 224995 }, { "epoch": 2.49, "learning_rate": 8.477278766941272e-06, "loss": 0.5717, "step": 225000 }, { "epoch": 2.49, "eval_loss": 0.5620911717414856, "eval_runtime": 69.2128, "eval_samples_per_second": 28.896, "eval_steps_per_second": 14.448, "step": 225000 }, { "epoch": 2.49, "learning_rate": 8.47635603980276e-06, "loss": 0.5665, "step": 225005 }, { "epoch": 2.49, "learning_rate": 8.475433312664246e-06, "loss": 0.5968, "step": 225010 }, { "epoch": 2.49, "learning_rate": 8.474510585525734e-06, "loss": 0.5571, "step": 225015 }, { "epoch": 2.49, "learning_rate": 8.47358785838722e-06, "loss": 0.5529, "step": 225020 }, { "epoch": 2.49, "learning_rate": 8.472665131248708e-06, "loss": 0.6201, "step": 225025 }, { "epoch": 2.49, "learning_rate": 8.471742404110195e-06, "loss": 0.5465, "step": 225030 }, { "epoch": 2.49, "learning_rate": 8.470819676971683e-06, "loss": 0.5905, "step": 225035 }, { "epoch": 2.49, "learning_rate": 8.469896949833171e-06, "loss": 0.6008, "step": 225040 }, { "epoch": 2.49, "learning_rate": 8.468974222694659e-06, "loss": 0.6184, "step": 225045 }, { "epoch": 2.49, "learning_rate": 8.468051495556147e-06, "loss": 0.5628, "step": 225050 }, { "epoch": 2.49, "learning_rate": 8.467128768417635e-06, "loss": 0.5807, "step": 225055 }, { "epoch": 2.49, "learning_rate": 8.466206041279122e-06, "loss": 0.5808, "step": 225060 }, { "epoch": 2.49, "learning_rate": 8.46528331414061e-06, "loss": 0.5838, "step": 225065 }, { "epoch": 2.49, "learning_rate": 8.464360587002096e-06, "loss": 0.5911, "step": 225070 }, { "epoch": 2.49, "learning_rate": 8.463437859863584e-06, "loss": 0.5769, "step": 225075 }, { "epoch": 2.49, "learning_rate": 8.462515132725072e-06, "loss": 0.5326, "step": 225080 }, { "epoch": 2.49, "learning_rate": 8.461592405586558e-06, "loss": 0.5926, "step": 225085 }, { "epoch": 2.49, "learning_rate": 8.460669678448046e-06, "loss": 0.6156, "step": 225090 }, { "epoch": 2.49, "learning_rate": 8.459746951309534e-06, "loss": 0.5543, "step": 225095 }, { "epoch": 2.49, "learning_rate": 8.458824224171022e-06, "loss": 0.6106, "step": 225100 }, { "epoch": 2.49, "learning_rate": 8.45790149703251e-06, "loss": 0.6339, "step": 225105 }, { "epoch": 2.49, "learning_rate": 8.456978769893998e-06, "loss": 0.6449, "step": 225110 }, { "epoch": 2.49, "learning_rate": 8.456056042755485e-06, "loss": 0.6023, "step": 225115 }, { "epoch": 2.49, "learning_rate": 8.455133315616973e-06, "loss": 0.5739, "step": 225120 }, { "epoch": 2.49, "learning_rate": 8.454210588478461e-06, "loss": 0.5685, "step": 225125 }, { "epoch": 2.49, "learning_rate": 8.453287861339947e-06, "loss": 0.603, "step": 225130 }, { "epoch": 2.49, "learning_rate": 8.452365134201435e-06, "loss": 0.5336, "step": 225135 }, { "epoch": 2.49, "learning_rate": 8.451442407062923e-06, "loss": 0.6106, "step": 225140 }, { "epoch": 2.49, "learning_rate": 8.45051967992441e-06, "loss": 0.5397, "step": 225145 }, { "epoch": 2.49, "learning_rate": 8.449596952785899e-06, "loss": 0.571, "step": 225150 }, { "epoch": 2.49, "learning_rate": 8.448674225647387e-06, "loss": 0.5805, "step": 225155 }, { "epoch": 2.49, "learning_rate": 8.447751498508874e-06, "loss": 0.5826, "step": 225160 }, { "epoch": 2.49, "learning_rate": 8.44682877137036e-06, "loss": 0.5554, "step": 225165 }, { "epoch": 2.49, "learning_rate": 8.445906044231848e-06, "loss": 0.6327, "step": 225170 }, { "epoch": 2.49, "learning_rate": 8.444983317093336e-06, "loss": 0.55, "step": 225175 }, { "epoch": 2.49, "learning_rate": 8.444060589954824e-06, "loss": 0.5951, "step": 225180 }, { "epoch": 2.49, "learning_rate": 8.44313786281631e-06, "loss": 0.544, "step": 225185 }, { "epoch": 2.49, "learning_rate": 8.442215135677798e-06, "loss": 0.5633, "step": 225190 }, { "epoch": 2.49, "learning_rate": 8.441292408539286e-06, "loss": 0.5819, "step": 225195 }, { "epoch": 2.49, "learning_rate": 8.440369681400774e-06, "loss": 0.5805, "step": 225200 }, { "epoch": 2.49, "learning_rate": 8.439446954262262e-06, "loss": 0.6441, "step": 225205 }, { "epoch": 2.49, "learning_rate": 8.43852422712375e-06, "loss": 0.5804, "step": 225210 }, { "epoch": 2.49, "learning_rate": 8.437601499985237e-06, "loss": 0.6287, "step": 225215 }, { "epoch": 2.49, "learning_rate": 8.436678772846725e-06, "loss": 0.6108, "step": 225220 }, { "epoch": 2.49, "learning_rate": 8.435756045708213e-06, "loss": 0.596, "step": 225225 }, { "epoch": 2.49, "learning_rate": 8.434833318569701e-06, "loss": 0.5791, "step": 225230 }, { "epoch": 2.49, "learning_rate": 8.433910591431187e-06, "loss": 0.5875, "step": 225235 }, { "epoch": 2.49, "learning_rate": 8.432987864292675e-06, "loss": 0.6215, "step": 225240 }, { "epoch": 2.49, "learning_rate": 8.432065137154161e-06, "loss": 0.5815, "step": 225245 }, { "epoch": 2.49, "learning_rate": 8.431142410015649e-06, "loss": 0.5491, "step": 225250 }, { "epoch": 2.49, "learning_rate": 8.430219682877137e-06, "loss": 0.5885, "step": 225255 }, { "epoch": 2.49, "learning_rate": 8.429296955738625e-06, "loss": 0.5653, "step": 225260 }, { "epoch": 2.49, "learning_rate": 8.428374228600112e-06, "loss": 0.5895, "step": 225265 }, { "epoch": 2.49, "learning_rate": 8.4274515014616e-06, "loss": 0.595, "step": 225270 }, { "epoch": 2.49, "learning_rate": 8.426528774323088e-06, "loss": 0.5808, "step": 225275 }, { "epoch": 2.49, "learning_rate": 8.425606047184576e-06, "loss": 0.5604, "step": 225280 }, { "epoch": 2.49, "learning_rate": 8.424683320046064e-06, "loss": 0.5571, "step": 225285 }, { "epoch": 2.49, "learning_rate": 8.42376059290755e-06, "loss": 0.572, "step": 225290 }, { "epoch": 2.49, "learning_rate": 8.422837865769038e-06, "loss": 0.6199, "step": 225295 }, { "epoch": 2.49, "learning_rate": 8.421915138630526e-06, "loss": 0.5603, "step": 225300 }, { "epoch": 2.49, "learning_rate": 8.420992411492014e-06, "loss": 0.5745, "step": 225305 }, { "epoch": 2.49, "learning_rate": 8.420069684353501e-06, "loss": 0.5623, "step": 225310 }, { "epoch": 2.49, "learning_rate": 8.419146957214988e-06, "loss": 0.6055, "step": 225315 }, { "epoch": 2.49, "learning_rate": 8.418224230076475e-06, "loss": 0.5578, "step": 225320 }, { "epoch": 2.49, "learning_rate": 8.417301502937963e-06, "loss": 0.5636, "step": 225325 }, { "epoch": 2.5, "learning_rate": 8.416378775799451e-06, "loss": 0.5812, "step": 225330 }, { "epoch": 2.5, "learning_rate": 8.415456048660939e-06, "loss": 0.5091, "step": 225335 }, { "epoch": 2.5, "learning_rate": 8.414533321522427e-06, "loss": 0.6109, "step": 225340 }, { "epoch": 2.5, "learning_rate": 8.413610594383915e-06, "loss": 0.6342, "step": 225345 }, { "epoch": 2.5, "learning_rate": 8.4126878672454e-06, "loss": 0.6131, "step": 225350 }, { "epoch": 2.5, "learning_rate": 8.411765140106889e-06, "loss": 0.5369, "step": 225355 }, { "epoch": 2.5, "learning_rate": 8.410842412968376e-06, "loss": 0.6326, "step": 225360 }, { "epoch": 2.5, "learning_rate": 8.409919685829864e-06, "loss": 0.5665, "step": 225365 }, { "epoch": 2.5, "learning_rate": 8.408996958691352e-06, "loss": 0.6129, "step": 225370 }, { "epoch": 2.5, "learning_rate": 8.40807423155284e-06, "loss": 0.5929, "step": 225375 }, { "epoch": 2.5, "learning_rate": 8.407151504414328e-06, "loss": 0.5609, "step": 225380 }, { "epoch": 2.5, "learning_rate": 8.406228777275816e-06, "loss": 0.5545, "step": 225385 }, { "epoch": 2.5, "learning_rate": 8.405306050137302e-06, "loss": 0.5445, "step": 225390 }, { "epoch": 2.5, "learning_rate": 8.40438332299879e-06, "loss": 0.5918, "step": 225395 }, { "epoch": 2.5, "learning_rate": 8.403460595860278e-06, "loss": 0.5686, "step": 225400 }, { "epoch": 2.5, "learning_rate": 8.402537868721765e-06, "loss": 0.5744, "step": 225405 }, { "epoch": 2.5, "learning_rate": 8.401615141583252e-06, "loss": 0.5964, "step": 225410 }, { "epoch": 2.5, "learning_rate": 8.40069241444474e-06, "loss": 0.5869, "step": 225415 }, { "epoch": 2.5, "learning_rate": 8.399769687306227e-06, "loss": 0.5896, "step": 225420 }, { "epoch": 2.5, "learning_rate": 8.398846960167715e-06, "loss": 0.6033, "step": 225425 }, { "epoch": 2.5, "learning_rate": 8.397924233029203e-06, "loss": 0.5661, "step": 225430 }, { "epoch": 2.5, "learning_rate": 8.39700150589069e-06, "loss": 0.592, "step": 225435 }, { "epoch": 2.5, "learning_rate": 8.396078778752179e-06, "loss": 0.6206, "step": 225440 }, { "epoch": 2.5, "learning_rate": 8.395156051613667e-06, "loss": 0.614, "step": 225445 }, { "epoch": 2.5, "learning_rate": 8.394233324475154e-06, "loss": 0.5825, "step": 225450 }, { "epoch": 2.5, "learning_rate": 8.39331059733664e-06, "loss": 0.5984, "step": 225455 }, { "epoch": 2.5, "learning_rate": 8.392387870198128e-06, "loss": 0.5286, "step": 225460 }, { "epoch": 2.5, "learning_rate": 8.391465143059615e-06, "loss": 0.5372, "step": 225465 }, { "epoch": 2.5, "learning_rate": 8.390542415921102e-06, "loss": 0.5846, "step": 225470 }, { "epoch": 2.5, "learning_rate": 8.38961968878259e-06, "loss": 0.6444, "step": 225475 }, { "epoch": 2.5, "learning_rate": 8.388696961644078e-06, "loss": 0.6013, "step": 225480 }, { "epoch": 2.5, "learning_rate": 8.387774234505566e-06, "loss": 0.6292, "step": 225485 }, { "epoch": 2.5, "learning_rate": 8.386851507367054e-06, "loss": 0.5849, "step": 225490 }, { "epoch": 2.5, "learning_rate": 8.385928780228542e-06, "loss": 0.6147, "step": 225495 }, { "epoch": 2.5, "learning_rate": 8.38500605309003e-06, "loss": 0.5246, "step": 225500 }, { "epoch": 2.5, "learning_rate": 8.384083325951517e-06, "loss": 0.6127, "step": 225505 }, { "epoch": 2.5, "learning_rate": 8.383160598813005e-06, "loss": 0.5883, "step": 225510 }, { "epoch": 2.5, "learning_rate": 8.382237871674491e-06, "loss": 0.5577, "step": 225515 }, { "epoch": 2.5, "learning_rate": 8.38131514453598e-06, "loss": 0.6129, "step": 225520 }, { "epoch": 2.5, "learning_rate": 8.380392417397467e-06, "loss": 0.5659, "step": 225525 }, { "epoch": 2.5, "learning_rate": 8.379469690258955e-06, "loss": 0.6104, "step": 225530 }, { "epoch": 2.5, "learning_rate": 8.378546963120443e-06, "loss": 0.6284, "step": 225535 }, { "epoch": 2.5, "learning_rate": 8.377624235981929e-06, "loss": 0.6017, "step": 225540 }, { "epoch": 2.5, "learning_rate": 8.376701508843417e-06, "loss": 0.6196, "step": 225545 }, { "epoch": 2.5, "learning_rate": 8.375778781704905e-06, "loss": 0.6343, "step": 225550 }, { "epoch": 2.5, "learning_rate": 8.374856054566392e-06, "loss": 0.5726, "step": 225555 }, { "epoch": 2.5, "learning_rate": 8.37393332742788e-06, "loss": 0.6153, "step": 225560 }, { "epoch": 2.5, "learning_rate": 8.373010600289368e-06, "loss": 0.5819, "step": 225565 }, { "epoch": 2.5, "learning_rate": 8.372087873150854e-06, "loss": 0.6432, "step": 225570 }, { "epoch": 2.5, "learning_rate": 8.371165146012342e-06, "loss": 0.5859, "step": 225575 }, { "epoch": 2.5, "learning_rate": 8.37024241887383e-06, "loss": 0.5701, "step": 225580 }, { "epoch": 2.5, "learning_rate": 8.369319691735318e-06, "loss": 0.6235, "step": 225585 }, { "epoch": 2.5, "learning_rate": 8.368396964596806e-06, "loss": 0.5549, "step": 225590 }, { "epoch": 2.5, "learning_rate": 8.367474237458293e-06, "loss": 0.5681, "step": 225595 }, { "epoch": 2.5, "learning_rate": 8.366551510319781e-06, "loss": 0.6214, "step": 225600 }, { "epoch": 2.5, "learning_rate": 8.36562878318127e-06, "loss": 0.5846, "step": 225605 }, { "epoch": 2.5, "learning_rate": 8.364706056042757e-06, "loss": 0.6039, "step": 225610 }, { "epoch": 2.5, "learning_rate": 8.363783328904245e-06, "loss": 0.6164, "step": 225615 }, { "epoch": 2.5, "learning_rate": 8.362860601765731e-06, "loss": 0.5378, "step": 225620 }, { "epoch": 2.5, "learning_rate": 8.361937874627219e-06, "loss": 0.5848, "step": 225625 }, { "epoch": 2.5, "learning_rate": 8.361015147488705e-06, "loss": 0.5596, "step": 225630 }, { "epoch": 2.5, "learning_rate": 8.360092420350193e-06, "loss": 0.6177, "step": 225635 }, { "epoch": 2.5, "learning_rate": 8.35916969321168e-06, "loss": 0.5762, "step": 225640 }, { "epoch": 2.5, "learning_rate": 8.358246966073169e-06, "loss": 0.5968, "step": 225645 }, { "epoch": 2.5, "learning_rate": 8.357324238934656e-06, "loss": 0.5537, "step": 225650 }, { "epoch": 2.5, "learning_rate": 8.356401511796144e-06, "loss": 0.5822, "step": 225655 }, { "epoch": 2.5, "learning_rate": 8.355478784657632e-06, "loss": 0.5365, "step": 225660 }, { "epoch": 2.5, "learning_rate": 8.35455605751912e-06, "loss": 0.5814, "step": 225665 }, { "epoch": 2.5, "learning_rate": 8.353633330380608e-06, "loss": 0.5887, "step": 225670 }, { "epoch": 2.5, "learning_rate": 8.352710603242096e-06, "loss": 0.5571, "step": 225675 }, { "epoch": 2.5, "learning_rate": 8.351787876103582e-06, "loss": 0.5848, "step": 225680 }, { "epoch": 2.5, "learning_rate": 8.35086514896507e-06, "loss": 0.5548, "step": 225685 }, { "epoch": 2.5, "learning_rate": 8.349942421826558e-06, "loss": 0.5955, "step": 225690 }, { "epoch": 2.5, "learning_rate": 8.349019694688044e-06, "loss": 0.5954, "step": 225695 }, { "epoch": 2.5, "learning_rate": 8.348096967549532e-06, "loss": 0.5423, "step": 225700 }, { "epoch": 2.5, "learning_rate": 8.34717424041102e-06, "loss": 0.5917, "step": 225705 }, { "epoch": 2.5, "learning_rate": 8.346251513272507e-06, "loss": 0.6431, "step": 225710 }, { "epoch": 2.5, "learning_rate": 8.345328786133995e-06, "loss": 0.6153, "step": 225715 }, { "epoch": 2.5, "learning_rate": 8.344406058995483e-06, "loss": 0.5916, "step": 225720 }, { "epoch": 2.5, "learning_rate": 8.34348333185697e-06, "loss": 0.5276, "step": 225725 }, { "epoch": 2.5, "learning_rate": 8.342560604718459e-06, "loss": 0.5707, "step": 225730 }, { "epoch": 2.5, "learning_rate": 8.341637877579945e-06, "loss": 0.578, "step": 225735 }, { "epoch": 2.5, "learning_rate": 8.340715150441433e-06, "loss": 0.5491, "step": 225740 }, { "epoch": 2.5, "learning_rate": 8.33979242330292e-06, "loss": 0.5567, "step": 225745 }, { "epoch": 2.5, "learning_rate": 8.338869696164408e-06, "loss": 0.5874, "step": 225750 }, { "epoch": 2.5, "learning_rate": 8.337946969025896e-06, "loss": 0.5617, "step": 225755 }, { "epoch": 2.5, "learning_rate": 8.337024241887384e-06, "loss": 0.5823, "step": 225760 }, { "epoch": 2.5, "learning_rate": 8.336101514748872e-06, "loss": 0.5832, "step": 225765 }, { "epoch": 2.5, "learning_rate": 8.335178787610358e-06, "loss": 0.5847, "step": 225770 }, { "epoch": 2.5, "learning_rate": 8.334256060471846e-06, "loss": 0.5978, "step": 225775 }, { "epoch": 2.5, "learning_rate": 8.333333333333334e-06, "loss": 0.5657, "step": 225780 }, { "epoch": 2.5, "learning_rate": 8.332410606194822e-06, "loss": 0.6142, "step": 225785 }, { "epoch": 2.5, "learning_rate": 8.33148787905631e-06, "loss": 0.5335, "step": 225790 }, { "epoch": 2.5, "learning_rate": 8.330565151917796e-06, "loss": 0.6045, "step": 225795 }, { "epoch": 2.5, "learning_rate": 8.329642424779283e-06, "loss": 0.6207, "step": 225800 }, { "epoch": 2.5, "learning_rate": 8.328719697640771e-06, "loss": 0.5678, "step": 225805 }, { "epoch": 2.5, "learning_rate": 8.327796970502259e-06, "loss": 0.6064, "step": 225810 }, { "epoch": 2.5, "learning_rate": 8.326874243363747e-06, "loss": 0.6091, "step": 225815 }, { "epoch": 2.5, "learning_rate": 8.325951516225235e-06, "loss": 0.5606, "step": 225820 }, { "epoch": 2.5, "learning_rate": 8.325028789086723e-06, "loss": 0.5512, "step": 225825 }, { "epoch": 2.5, "learning_rate": 8.32410606194821e-06, "loss": 0.5773, "step": 225830 }, { "epoch": 2.5, "learning_rate": 8.323183334809698e-06, "loss": 0.5683, "step": 225835 }, { "epoch": 2.5, "learning_rate": 8.322260607671185e-06, "loss": 0.6326, "step": 225840 }, { "epoch": 2.5, "learning_rate": 8.321337880532672e-06, "loss": 0.5448, "step": 225845 }, { "epoch": 2.5, "learning_rate": 8.320415153394159e-06, "loss": 0.5698, "step": 225850 }, { "epoch": 2.5, "learning_rate": 8.319492426255646e-06, "loss": 0.6005, "step": 225855 }, { "epoch": 2.5, "learning_rate": 8.318569699117134e-06, "loss": 0.5849, "step": 225860 }, { "epoch": 2.5, "learning_rate": 8.317646971978622e-06, "loss": 0.611, "step": 225865 }, { "epoch": 2.5, "learning_rate": 8.31672424484011e-06, "loss": 0.6464, "step": 225870 }, { "epoch": 2.5, "learning_rate": 8.315801517701598e-06, "loss": 0.5958, "step": 225875 }, { "epoch": 2.5, "learning_rate": 8.314878790563086e-06, "loss": 0.5564, "step": 225880 }, { "epoch": 2.5, "learning_rate": 8.313956063424573e-06, "loss": 0.5494, "step": 225885 }, { "epoch": 2.5, "learning_rate": 8.313033336286061e-06, "loss": 0.5573, "step": 225890 }, { "epoch": 2.5, "learning_rate": 8.31211060914755e-06, "loss": 0.542, "step": 225895 }, { "epoch": 2.5, "learning_rate": 8.311187882009035e-06, "loss": 0.5799, "step": 225900 }, { "epoch": 2.5, "learning_rate": 8.310265154870523e-06, "loss": 0.5847, "step": 225905 }, { "epoch": 2.5, "learning_rate": 8.309342427732011e-06, "loss": 0.5778, "step": 225910 }, { "epoch": 2.5, "learning_rate": 8.308419700593499e-06, "loss": 0.6504, "step": 225915 }, { "epoch": 2.5, "learning_rate": 8.307496973454985e-06, "loss": 0.6296, "step": 225920 }, { "epoch": 2.5, "learning_rate": 8.306574246316473e-06, "loss": 0.5957, "step": 225925 }, { "epoch": 2.5, "learning_rate": 8.30565151917796e-06, "loss": 0.5653, "step": 225930 }, { "epoch": 2.5, "learning_rate": 8.304728792039449e-06, "loss": 0.6217, "step": 225935 }, { "epoch": 2.5, "learning_rate": 8.303806064900936e-06, "loss": 0.6274, "step": 225940 }, { "epoch": 2.5, "learning_rate": 8.302883337762424e-06, "loss": 0.5681, "step": 225945 }, { "epoch": 2.5, "learning_rate": 8.301960610623912e-06, "loss": 0.6603, "step": 225950 }, { "epoch": 2.5, "learning_rate": 8.3010378834854e-06, "loss": 0.5692, "step": 225955 }, { "epoch": 2.5, "learning_rate": 8.300115156346886e-06, "loss": 0.5715, "step": 225960 }, { "epoch": 2.5, "learning_rate": 8.299192429208374e-06, "loss": 0.5732, "step": 225965 }, { "epoch": 2.5, "learning_rate": 8.298269702069862e-06, "loss": 0.5829, "step": 225970 }, { "epoch": 2.5, "learning_rate": 8.29734697493135e-06, "loss": 0.5724, "step": 225975 }, { "epoch": 2.5, "learning_rate": 8.296424247792838e-06, "loss": 0.6616, "step": 225980 }, { "epoch": 2.5, "learning_rate": 8.295501520654325e-06, "loss": 0.5461, "step": 225985 }, { "epoch": 2.5, "learning_rate": 8.294578793515813e-06, "loss": 0.5725, "step": 225990 }, { "epoch": 2.5, "learning_rate": 8.293656066377301e-06, "loss": 0.5663, "step": 225995 }, { "epoch": 2.5, "learning_rate": 8.292733339238787e-06, "loss": 0.5675, "step": 226000 }, { "epoch": 2.5, "eval_loss": 0.5445634722709656, "eval_runtime": 69.1827, "eval_samples_per_second": 28.909, "eval_steps_per_second": 14.454, "step": 226000 }, { "epoch": 2.5, "learning_rate": 8.291810612100275e-06, "loss": 0.6205, "step": 226005 }, { "epoch": 2.5, "learning_rate": 8.290887884961763e-06, "loss": 0.6176, "step": 226010 }, { "epoch": 2.5, "learning_rate": 8.289965157823249e-06, "loss": 0.5495, "step": 226015 }, { "epoch": 2.5, "learning_rate": 8.289042430684737e-06, "loss": 0.5664, "step": 226020 }, { "epoch": 2.5, "learning_rate": 8.288119703546225e-06, "loss": 0.5819, "step": 226025 }, { "epoch": 2.5, "learning_rate": 8.287196976407713e-06, "loss": 0.6066, "step": 226030 }, { "epoch": 2.5, "learning_rate": 8.2862742492692e-06, "loss": 0.5963, "step": 226035 }, { "epoch": 2.5, "learning_rate": 8.285351522130688e-06, "loss": 0.5322, "step": 226040 }, { "epoch": 2.5, "learning_rate": 8.284428794992176e-06, "loss": 0.5873, "step": 226045 }, { "epoch": 2.5, "learning_rate": 8.283506067853664e-06, "loss": 0.5322, "step": 226050 }, { "epoch": 2.5, "learning_rate": 8.282583340715152e-06, "loss": 0.5684, "step": 226055 }, { "epoch": 2.5, "learning_rate": 8.28166061357664e-06, "loss": 0.5897, "step": 226060 }, { "epoch": 2.5, "learning_rate": 8.280737886438126e-06, "loss": 0.5897, "step": 226065 }, { "epoch": 2.5, "learning_rate": 8.279815159299614e-06, "loss": 0.58, "step": 226070 }, { "epoch": 2.5, "learning_rate": 8.2788924321611e-06, "loss": 0.5761, "step": 226075 }, { "epoch": 2.5, "learning_rate": 8.277969705022588e-06, "loss": 0.6027, "step": 226080 }, { "epoch": 2.5, "learning_rate": 8.277046977884076e-06, "loss": 0.5778, "step": 226085 }, { "epoch": 2.5, "learning_rate": 8.276124250745563e-06, "loss": 0.5659, "step": 226090 }, { "epoch": 2.5, "learning_rate": 8.275201523607051e-06, "loss": 0.584, "step": 226095 }, { "epoch": 2.5, "learning_rate": 8.274278796468539e-06, "loss": 0.5896, "step": 226100 }, { "epoch": 2.5, "learning_rate": 8.273356069330027e-06, "loss": 0.6124, "step": 226105 }, { "epoch": 2.5, "learning_rate": 8.272433342191515e-06, "loss": 0.5982, "step": 226110 }, { "epoch": 2.5, "learning_rate": 8.271510615053003e-06, "loss": 0.5974, "step": 226115 }, { "epoch": 2.5, "learning_rate": 8.270587887914489e-06, "loss": 0.5565, "step": 226120 }, { "epoch": 2.5, "learning_rate": 8.269665160775977e-06, "loss": 0.5716, "step": 226125 }, { "epoch": 2.5, "learning_rate": 8.268742433637465e-06, "loss": 0.5954, "step": 226130 }, { "epoch": 2.5, "learning_rate": 8.267819706498952e-06, "loss": 0.6142, "step": 226135 }, { "epoch": 2.5, "learning_rate": 8.26689697936044e-06, "loss": 0.5633, "step": 226140 }, { "epoch": 2.5, "learning_rate": 8.265974252221928e-06, "loss": 0.5753, "step": 226145 }, { "epoch": 2.5, "learning_rate": 8.265051525083414e-06, "loss": 0.5444, "step": 226150 }, { "epoch": 2.5, "learning_rate": 8.264128797944902e-06, "loss": 0.5804, "step": 226155 }, { "epoch": 2.5, "learning_rate": 8.26320607080639e-06, "loss": 0.5737, "step": 226160 }, { "epoch": 2.5, "learning_rate": 8.262283343667878e-06, "loss": 0.5728, "step": 226165 }, { "epoch": 2.5, "learning_rate": 8.261360616529366e-06, "loss": 0.5841, "step": 226170 }, { "epoch": 2.5, "learning_rate": 8.260437889390853e-06, "loss": 0.5584, "step": 226175 }, { "epoch": 2.5, "learning_rate": 8.25951516225234e-06, "loss": 0.6355, "step": 226180 }, { "epoch": 2.5, "learning_rate": 8.258592435113827e-06, "loss": 0.6142, "step": 226185 }, { "epoch": 2.5, "learning_rate": 8.257669707975315e-06, "loss": 0.6061, "step": 226190 }, { "epoch": 2.5, "learning_rate": 8.256746980836803e-06, "loss": 0.5844, "step": 226195 }, { "epoch": 2.5, "learning_rate": 8.255824253698291e-06, "loss": 0.5947, "step": 226200 }, { "epoch": 2.5, "learning_rate": 8.254901526559779e-06, "loss": 0.6031, "step": 226205 }, { "epoch": 2.5, "learning_rate": 8.253978799421267e-06, "loss": 0.6087, "step": 226210 }, { "epoch": 2.5, "learning_rate": 8.253056072282755e-06, "loss": 0.5817, "step": 226215 }, { "epoch": 2.5, "learning_rate": 8.252133345144242e-06, "loss": 0.513, "step": 226220 }, { "epoch": 2.5, "learning_rate": 8.251210618005729e-06, "loss": 0.6303, "step": 226225 }, { "epoch": 2.5, "learning_rate": 8.250287890867216e-06, "loss": 0.6222, "step": 226230 }, { "epoch": 2.51, "learning_rate": 8.249365163728703e-06, "loss": 0.5822, "step": 226235 }, { "epoch": 2.51, "learning_rate": 8.24844243659019e-06, "loss": 0.5549, "step": 226240 }, { "epoch": 2.51, "learning_rate": 8.247519709451678e-06, "loss": 0.5973, "step": 226245 }, { "epoch": 2.51, "learning_rate": 8.246596982313166e-06, "loss": 0.5665, "step": 226250 }, { "epoch": 2.51, "learning_rate": 8.245674255174654e-06, "loss": 0.6516, "step": 226255 }, { "epoch": 2.51, "learning_rate": 8.244751528036142e-06, "loss": 0.6461, "step": 226260 }, { "epoch": 2.51, "learning_rate": 8.24382880089763e-06, "loss": 0.5942, "step": 226265 }, { "epoch": 2.51, "learning_rate": 8.242906073759117e-06, "loss": 0.5934, "step": 226270 }, { "epoch": 2.51, "learning_rate": 8.241983346620605e-06, "loss": 0.5977, "step": 226275 }, { "epoch": 2.51, "learning_rate": 8.241060619482093e-06, "loss": 0.5272, "step": 226280 }, { "epoch": 2.51, "learning_rate": 8.24013789234358e-06, "loss": 0.6349, "step": 226285 }, { "epoch": 2.51, "learning_rate": 8.239215165205067e-06, "loss": 0.5695, "step": 226290 }, { "epoch": 2.51, "learning_rate": 8.238292438066555e-06, "loss": 0.5663, "step": 226295 }, { "epoch": 2.51, "learning_rate": 8.237369710928041e-06, "loss": 0.5742, "step": 226300 }, { "epoch": 2.51, "learning_rate": 8.236446983789529e-06, "loss": 0.5685, "step": 226305 }, { "epoch": 2.51, "learning_rate": 8.235524256651017e-06, "loss": 0.601, "step": 226310 }, { "epoch": 2.51, "learning_rate": 8.234601529512505e-06, "loss": 0.5413, "step": 226315 }, { "epoch": 2.51, "learning_rate": 8.233678802373993e-06, "loss": 0.5763, "step": 226320 }, { "epoch": 2.51, "learning_rate": 8.23275607523548e-06, "loss": 0.5752, "step": 226325 }, { "epoch": 2.51, "learning_rate": 8.231833348096968e-06, "loss": 0.6941, "step": 226330 }, { "epoch": 2.51, "learning_rate": 8.230910620958456e-06, "loss": 0.5792, "step": 226335 }, { "epoch": 2.51, "learning_rate": 8.229987893819944e-06, "loss": 0.5256, "step": 226340 }, { "epoch": 2.51, "learning_rate": 8.22906516668143e-06, "loss": 0.6263, "step": 226345 }, { "epoch": 2.51, "learning_rate": 8.228142439542918e-06, "loss": 0.6174, "step": 226350 }, { "epoch": 2.51, "learning_rate": 8.227219712404406e-06, "loss": 0.5857, "step": 226355 }, { "epoch": 2.51, "learning_rate": 8.226296985265894e-06, "loss": 0.5663, "step": 226360 }, { "epoch": 2.51, "learning_rate": 8.225374258127382e-06, "loss": 0.6414, "step": 226365 }, { "epoch": 2.51, "learning_rate": 8.22445153098887e-06, "loss": 0.6014, "step": 226370 }, { "epoch": 2.51, "learning_rate": 8.223528803850356e-06, "loss": 0.5841, "step": 226375 }, { "epoch": 2.51, "learning_rate": 8.222606076711843e-06, "loss": 0.5969, "step": 226380 }, { "epoch": 2.51, "learning_rate": 8.221683349573331e-06, "loss": 0.5873, "step": 226385 }, { "epoch": 2.51, "learning_rate": 8.220760622434819e-06, "loss": 0.5843, "step": 226390 }, { "epoch": 2.51, "learning_rate": 8.219837895296307e-06, "loss": 0.6313, "step": 226395 }, { "epoch": 2.51, "learning_rate": 8.218915168157793e-06, "loss": 0.5469, "step": 226400 }, { "epoch": 2.51, "learning_rate": 8.217992441019281e-06, "loss": 0.5685, "step": 226405 }, { "epoch": 2.51, "learning_rate": 8.217069713880769e-06, "loss": 0.6007, "step": 226410 }, { "epoch": 2.51, "learning_rate": 8.216146986742257e-06, "loss": 0.6207, "step": 226415 }, { "epoch": 2.51, "learning_rate": 8.215224259603744e-06, "loss": 0.6026, "step": 226420 }, { "epoch": 2.51, "learning_rate": 8.214301532465232e-06, "loss": 0.5673, "step": 226425 }, { "epoch": 2.51, "learning_rate": 8.21337880532672e-06, "loss": 0.5981, "step": 226430 }, { "epoch": 2.51, "learning_rate": 8.212456078188208e-06, "loss": 0.5916, "step": 226435 }, { "epoch": 2.51, "learning_rate": 8.211533351049696e-06, "loss": 0.5058, "step": 226440 }, { "epoch": 2.51, "learning_rate": 8.210610623911184e-06, "loss": 0.5654, "step": 226445 }, { "epoch": 2.51, "learning_rate": 8.20968789677267e-06, "loss": 0.6787, "step": 226450 }, { "epoch": 2.51, "learning_rate": 8.208765169634158e-06, "loss": 0.5777, "step": 226455 }, { "epoch": 2.51, "learning_rate": 8.207842442495644e-06, "loss": 0.5473, "step": 226460 }, { "epoch": 2.51, "learning_rate": 8.206919715357132e-06, "loss": 0.5583, "step": 226465 }, { "epoch": 2.51, "learning_rate": 8.20599698821862e-06, "loss": 0.5445, "step": 226470 }, { "epoch": 2.51, "learning_rate": 8.205074261080107e-06, "loss": 0.5567, "step": 226475 }, { "epoch": 2.51, "learning_rate": 8.204151533941595e-06, "loss": 0.5975, "step": 226480 }, { "epoch": 2.51, "learning_rate": 8.203228806803083e-06, "loss": 0.556, "step": 226485 }, { "epoch": 2.51, "learning_rate": 8.202306079664571e-06, "loss": 0.5986, "step": 226490 }, { "epoch": 2.51, "learning_rate": 8.201383352526059e-06, "loss": 0.552, "step": 226495 }, { "epoch": 2.51, "learning_rate": 8.200460625387547e-06, "loss": 0.5569, "step": 226500 }, { "epoch": 2.51, "learning_rate": 8.199537898249035e-06, "loss": 0.5942, "step": 226505 }, { "epoch": 2.51, "learning_rate": 8.19861517111052e-06, "loss": 0.5932, "step": 226510 }, { "epoch": 2.51, "learning_rate": 8.197692443972009e-06, "loss": 0.5728, "step": 226515 }, { "epoch": 2.51, "learning_rate": 8.196769716833496e-06, "loss": 0.6004, "step": 226520 }, { "epoch": 2.51, "learning_rate": 8.195846989694984e-06, "loss": 0.5853, "step": 226525 }, { "epoch": 2.51, "learning_rate": 8.19492426255647e-06, "loss": 0.6064, "step": 226530 }, { "epoch": 2.51, "learning_rate": 8.194001535417958e-06, "loss": 0.5698, "step": 226535 }, { "epoch": 2.51, "learning_rate": 8.193078808279446e-06, "loss": 0.5749, "step": 226540 }, { "epoch": 2.51, "learning_rate": 8.192156081140934e-06, "loss": 0.58, "step": 226545 }, { "epoch": 2.51, "learning_rate": 8.191233354002422e-06, "loss": 0.5912, "step": 226550 }, { "epoch": 2.51, "learning_rate": 8.19031062686391e-06, "loss": 0.588, "step": 226555 }, { "epoch": 2.51, "learning_rate": 8.189387899725397e-06, "loss": 0.5484, "step": 226560 }, { "epoch": 2.51, "learning_rate": 8.188465172586884e-06, "loss": 0.5476, "step": 226565 }, { "epoch": 2.51, "learning_rate": 8.187542445448371e-06, "loss": 0.6112, "step": 226570 }, { "epoch": 2.51, "learning_rate": 8.18661971830986e-06, "loss": 0.5593, "step": 226575 }, { "epoch": 2.51, "learning_rate": 8.185696991171347e-06, "loss": 0.6294, "step": 226580 }, { "epoch": 2.51, "learning_rate": 8.184774264032835e-06, "loss": 0.5903, "step": 226585 }, { "epoch": 2.51, "learning_rate": 8.183851536894323e-06, "loss": 0.5703, "step": 226590 }, { "epoch": 2.51, "learning_rate": 8.18292880975581e-06, "loss": 0.6097, "step": 226595 }, { "epoch": 2.51, "learning_rate": 8.182006082617299e-06, "loss": 0.565, "step": 226600 }, { "epoch": 2.51, "learning_rate": 8.181083355478785e-06, "loss": 0.5776, "step": 226605 }, { "epoch": 2.51, "learning_rate": 8.180160628340273e-06, "loss": 0.5415, "step": 226610 }, { "epoch": 2.51, "learning_rate": 8.17923790120176e-06, "loss": 0.5813, "step": 226615 }, { "epoch": 2.51, "learning_rate": 8.178315174063248e-06, "loss": 0.592, "step": 226620 }, { "epoch": 2.51, "learning_rate": 8.177392446924734e-06, "loss": 0.6481, "step": 226625 }, { "epoch": 2.51, "learning_rate": 8.176469719786222e-06, "loss": 0.5635, "step": 226630 }, { "epoch": 2.51, "learning_rate": 8.17554699264771e-06, "loss": 0.5215, "step": 226635 }, { "epoch": 2.51, "learning_rate": 8.174624265509198e-06, "loss": 0.5792, "step": 226640 }, { "epoch": 2.51, "learning_rate": 8.173701538370686e-06, "loss": 0.6122, "step": 226645 }, { "epoch": 2.51, "learning_rate": 8.172778811232174e-06, "loss": 0.5282, "step": 226650 }, { "epoch": 2.51, "learning_rate": 8.171856084093662e-06, "loss": 0.5824, "step": 226655 }, { "epoch": 2.51, "learning_rate": 8.17093335695515e-06, "loss": 0.5521, "step": 226660 }, { "epoch": 2.51, "learning_rate": 8.170010629816637e-06, "loss": 0.5924, "step": 226665 }, { "epoch": 2.51, "learning_rate": 8.169087902678123e-06, "loss": 0.6182, "step": 226670 }, { "epoch": 2.51, "learning_rate": 8.168165175539611e-06, "loss": 0.5734, "step": 226675 }, { "epoch": 2.51, "learning_rate": 8.167242448401097e-06, "loss": 0.5722, "step": 226680 }, { "epoch": 2.51, "learning_rate": 8.166319721262585e-06, "loss": 0.5765, "step": 226685 }, { "epoch": 2.51, "learning_rate": 8.165396994124073e-06, "loss": 0.6446, "step": 226690 }, { "epoch": 2.51, "learning_rate": 8.164474266985561e-06, "loss": 0.6139, "step": 226695 }, { "epoch": 2.51, "learning_rate": 8.163551539847049e-06, "loss": 0.6275, "step": 226700 }, { "epoch": 2.51, "learning_rate": 8.162628812708537e-06, "loss": 0.553, "step": 226705 }, { "epoch": 2.51, "learning_rate": 8.161706085570024e-06, "loss": 0.5093, "step": 226710 }, { "epoch": 2.51, "learning_rate": 8.160783358431512e-06, "loss": 0.5428, "step": 226715 }, { "epoch": 2.51, "learning_rate": 8.159860631293e-06, "loss": 0.5855, "step": 226720 }, { "epoch": 2.51, "learning_rate": 8.158937904154488e-06, "loss": 0.6316, "step": 226725 }, { "epoch": 2.51, "learning_rate": 8.158015177015974e-06, "loss": 0.5379, "step": 226730 }, { "epoch": 2.51, "learning_rate": 8.157092449877462e-06, "loss": 0.5927, "step": 226735 }, { "epoch": 2.51, "learning_rate": 8.15616972273895e-06, "loss": 0.5854, "step": 226740 }, { "epoch": 2.51, "learning_rate": 8.155246995600438e-06, "loss": 0.6016, "step": 226745 }, { "epoch": 2.51, "learning_rate": 8.154324268461926e-06, "loss": 0.6324, "step": 226750 }, { "epoch": 2.51, "learning_rate": 8.153401541323412e-06, "loss": 0.5615, "step": 226755 }, { "epoch": 2.51, "learning_rate": 8.1524788141849e-06, "loss": 0.5597, "step": 226760 }, { "epoch": 2.51, "learning_rate": 8.151556087046387e-06, "loss": 0.612, "step": 226765 }, { "epoch": 2.51, "learning_rate": 8.150633359907875e-06, "loss": 0.565, "step": 226770 }, { "epoch": 2.51, "learning_rate": 8.149710632769363e-06, "loss": 0.5432, "step": 226775 }, { "epoch": 2.51, "learning_rate": 8.148787905630851e-06, "loss": 0.5527, "step": 226780 }, { "epoch": 2.51, "learning_rate": 8.147865178492337e-06, "loss": 0.6141, "step": 226785 }, { "epoch": 2.51, "learning_rate": 8.146942451353825e-06, "loss": 0.5664, "step": 226790 }, { "epoch": 2.51, "learning_rate": 8.146019724215313e-06, "loss": 0.6221, "step": 226795 }, { "epoch": 2.51, "learning_rate": 8.1450969970768e-06, "loss": 0.5902, "step": 226800 }, { "epoch": 2.51, "learning_rate": 8.144174269938289e-06, "loss": 0.6199, "step": 226805 }, { "epoch": 2.51, "learning_rate": 8.143251542799776e-06, "loss": 0.5192, "step": 226810 }, { "epoch": 2.51, "learning_rate": 8.142328815661264e-06, "loss": 0.6147, "step": 226815 }, { "epoch": 2.51, "learning_rate": 8.141406088522752e-06, "loss": 0.5454, "step": 226820 }, { "epoch": 2.51, "learning_rate": 8.14048336138424e-06, "loss": 0.6349, "step": 226825 }, { "epoch": 2.51, "learning_rate": 8.139560634245726e-06, "loss": 0.578, "step": 226830 }, { "epoch": 2.51, "learning_rate": 8.138637907107214e-06, "loss": 0.605, "step": 226835 }, { "epoch": 2.51, "learning_rate": 8.137715179968702e-06, "loss": 0.641, "step": 226840 }, { "epoch": 2.51, "learning_rate": 8.136792452830188e-06, "loss": 0.5973, "step": 226845 }, { "epoch": 2.51, "learning_rate": 8.135869725691676e-06, "loss": 0.6172, "step": 226850 }, { "epoch": 2.51, "learning_rate": 8.134946998553164e-06, "loss": 0.6238, "step": 226855 }, { "epoch": 2.51, "learning_rate": 8.134024271414651e-06, "loss": 0.6183, "step": 226860 }, { "epoch": 2.51, "learning_rate": 8.13310154427614e-06, "loss": 0.5456, "step": 226865 }, { "epoch": 2.51, "learning_rate": 8.132178817137627e-06, "loss": 0.5988, "step": 226870 }, { "epoch": 2.51, "learning_rate": 8.131256089999115e-06, "loss": 0.5976, "step": 226875 }, { "epoch": 2.51, "learning_rate": 8.130333362860603e-06, "loss": 0.5869, "step": 226880 }, { "epoch": 2.51, "learning_rate": 8.12941063572209e-06, "loss": 0.5665, "step": 226885 }, { "epoch": 2.51, "learning_rate": 8.128487908583579e-06, "loss": 0.5882, "step": 226890 }, { "epoch": 2.51, "learning_rate": 8.127565181445065e-06, "loss": 0.5607, "step": 226895 }, { "epoch": 2.51, "learning_rate": 8.126642454306553e-06, "loss": 0.5553, "step": 226900 }, { "epoch": 2.51, "learning_rate": 8.12571972716804e-06, "loss": 0.5857, "step": 226905 }, { "epoch": 2.51, "learning_rate": 8.124797000029527e-06, "loss": 0.627, "step": 226910 }, { "epoch": 2.51, "learning_rate": 8.123874272891014e-06, "loss": 0.6693, "step": 226915 }, { "epoch": 2.51, "learning_rate": 8.122951545752502e-06, "loss": 0.5607, "step": 226920 }, { "epoch": 2.51, "learning_rate": 8.12202881861399e-06, "loss": 0.5875, "step": 226925 }, { "epoch": 2.51, "learning_rate": 8.121106091475478e-06, "loss": 0.5621, "step": 226930 }, { "epoch": 2.51, "learning_rate": 8.120183364336966e-06, "loss": 0.6184, "step": 226935 }, { "epoch": 2.51, "learning_rate": 8.119260637198454e-06, "loss": 0.6255, "step": 226940 }, { "epoch": 2.51, "learning_rate": 8.118337910059941e-06, "loss": 0.5586, "step": 226945 }, { "epoch": 2.51, "learning_rate": 8.117415182921428e-06, "loss": 0.5821, "step": 226950 }, { "epoch": 2.51, "learning_rate": 8.116492455782915e-06, "loss": 0.5882, "step": 226955 }, { "epoch": 2.51, "learning_rate": 8.115569728644403e-06, "loss": 0.5579, "step": 226960 }, { "epoch": 2.51, "learning_rate": 8.114647001505891e-06, "loss": 0.5883, "step": 226965 }, { "epoch": 2.51, "learning_rate": 8.113724274367379e-06, "loss": 0.5317, "step": 226970 }, { "epoch": 2.51, "learning_rate": 8.112801547228867e-06, "loss": 0.6351, "step": 226975 }, { "epoch": 2.51, "learning_rate": 8.111878820090355e-06, "loss": 0.5929, "step": 226980 }, { "epoch": 2.51, "learning_rate": 8.110956092951841e-06, "loss": 0.5965, "step": 226985 }, { "epoch": 2.51, "learning_rate": 8.110033365813329e-06, "loss": 0.5758, "step": 226990 }, { "epoch": 2.51, "learning_rate": 8.109110638674817e-06, "loss": 0.571, "step": 226995 }, { "epoch": 2.51, "learning_rate": 8.108187911536304e-06, "loss": 0.6321, "step": 227000 }, { "epoch": 2.51, "eval_loss": 0.5812163949012756, "eval_runtime": 69.2682, "eval_samples_per_second": 28.873, "eval_steps_per_second": 14.437, "step": 227000 }, { "epoch": 2.51, "learning_rate": 8.107265184397792e-06, "loss": 0.5908, "step": 227005 }, { "epoch": 2.51, "learning_rate": 8.106342457259278e-06, "loss": 0.6394, "step": 227010 }, { "epoch": 2.51, "learning_rate": 8.105419730120766e-06, "loss": 0.5925, "step": 227015 }, { "epoch": 2.51, "learning_rate": 8.104497002982254e-06, "loss": 0.5806, "step": 227020 }, { "epoch": 2.51, "learning_rate": 8.103574275843742e-06, "loss": 0.6021, "step": 227025 }, { "epoch": 2.51, "learning_rate": 8.10265154870523e-06, "loss": 0.5575, "step": 227030 }, { "epoch": 2.51, "learning_rate": 8.101728821566718e-06, "loss": 0.5776, "step": 227035 }, { "epoch": 2.51, "learning_rate": 8.100806094428206e-06, "loss": 0.599, "step": 227040 }, { "epoch": 2.51, "learning_rate": 8.099883367289693e-06, "loss": 0.6062, "step": 227045 }, { "epoch": 2.51, "learning_rate": 8.098960640151181e-06, "loss": 0.6225, "step": 227050 }, { "epoch": 2.51, "learning_rate": 8.098037913012669e-06, "loss": 0.6115, "step": 227055 }, { "epoch": 2.51, "learning_rate": 8.097115185874155e-06, "loss": 0.5867, "step": 227060 }, { "epoch": 2.51, "learning_rate": 8.096192458735641e-06, "loss": 0.5742, "step": 227065 }, { "epoch": 2.51, "learning_rate": 8.09526973159713e-06, "loss": 0.5758, "step": 227070 }, { "epoch": 2.51, "learning_rate": 8.094347004458617e-06, "loss": 0.6233, "step": 227075 }, { "epoch": 2.51, "learning_rate": 8.093424277320105e-06, "loss": 0.5659, "step": 227080 }, { "epoch": 2.51, "learning_rate": 8.092501550181593e-06, "loss": 0.6012, "step": 227085 }, { "epoch": 2.51, "learning_rate": 8.09157882304308e-06, "loss": 0.6018, "step": 227090 }, { "epoch": 2.51, "learning_rate": 8.090656095904568e-06, "loss": 0.5953, "step": 227095 }, { "epoch": 2.51, "learning_rate": 8.089733368766056e-06, "loss": 0.5853, "step": 227100 }, { "epoch": 2.51, "learning_rate": 8.088810641627544e-06, "loss": 0.6072, "step": 227105 }, { "epoch": 2.51, "learning_rate": 8.087887914489032e-06, "loss": 0.5505, "step": 227110 }, { "epoch": 2.51, "learning_rate": 8.086965187350518e-06, "loss": 0.6734, "step": 227115 }, { "epoch": 2.51, "learning_rate": 8.086042460212006e-06, "loss": 0.5984, "step": 227120 }, { "epoch": 2.51, "learning_rate": 8.085119733073494e-06, "loss": 0.5782, "step": 227125 }, { "epoch": 2.51, "learning_rate": 8.084197005934982e-06, "loss": 0.5685, "step": 227130 }, { "epoch": 2.52, "learning_rate": 8.083274278796468e-06, "loss": 0.6035, "step": 227135 }, { "epoch": 2.52, "learning_rate": 8.082351551657956e-06, "loss": 0.5844, "step": 227140 }, { "epoch": 2.52, "learning_rate": 8.081428824519444e-06, "loss": 0.5763, "step": 227145 }, { "epoch": 2.52, "learning_rate": 8.080506097380931e-06, "loss": 0.6095, "step": 227150 }, { "epoch": 2.52, "learning_rate": 8.07958337024242e-06, "loss": 0.6158, "step": 227155 }, { "epoch": 2.52, "learning_rate": 8.078660643103907e-06, "loss": 0.5817, "step": 227160 }, { "epoch": 2.52, "learning_rate": 8.077737915965395e-06, "loss": 0.5441, "step": 227165 }, { "epoch": 2.52, "learning_rate": 8.076815188826883e-06, "loss": 0.5817, "step": 227170 }, { "epoch": 2.52, "learning_rate": 8.075892461688369e-06, "loss": 0.5788, "step": 227175 }, { "epoch": 2.52, "learning_rate": 8.074969734549857e-06, "loss": 0.5926, "step": 227180 }, { "epoch": 2.52, "learning_rate": 8.074047007411345e-06, "loss": 0.5771, "step": 227185 }, { "epoch": 2.52, "learning_rate": 8.073124280272833e-06, "loss": 0.5781, "step": 227190 }, { "epoch": 2.52, "learning_rate": 8.07220155313432e-06, "loss": 0.5378, "step": 227195 }, { "epoch": 2.52, "learning_rate": 8.071278825995808e-06, "loss": 0.5648, "step": 227200 }, { "epoch": 2.52, "learning_rate": 8.070356098857296e-06, "loss": 0.6091, "step": 227205 }, { "epoch": 2.52, "learning_rate": 8.069433371718782e-06, "loss": 0.5842, "step": 227210 }, { "epoch": 2.52, "learning_rate": 8.06851064458027e-06, "loss": 0.5973, "step": 227215 }, { "epoch": 2.52, "learning_rate": 8.067587917441758e-06, "loss": 0.5414, "step": 227220 }, { "epoch": 2.52, "learning_rate": 8.066665190303246e-06, "loss": 0.6216, "step": 227225 }, { "epoch": 2.52, "learning_rate": 8.065742463164732e-06, "loss": 0.5317, "step": 227230 }, { "epoch": 2.52, "learning_rate": 8.06481973602622e-06, "loss": 0.5708, "step": 227235 }, { "epoch": 2.52, "learning_rate": 8.063897008887708e-06, "loss": 0.5543, "step": 227240 }, { "epoch": 2.52, "learning_rate": 8.062974281749195e-06, "loss": 0.5494, "step": 227245 }, { "epoch": 2.52, "learning_rate": 8.062051554610683e-06, "loss": 0.5859, "step": 227250 }, { "epoch": 2.52, "learning_rate": 8.061128827472171e-06, "loss": 0.6413, "step": 227255 }, { "epoch": 2.52, "learning_rate": 8.060206100333659e-06, "loss": 0.5979, "step": 227260 }, { "epoch": 2.52, "learning_rate": 8.059283373195147e-06, "loss": 0.623, "step": 227265 }, { "epoch": 2.52, "learning_rate": 8.058360646056635e-06, "loss": 0.5906, "step": 227270 }, { "epoch": 2.52, "learning_rate": 8.057437918918123e-06, "loss": 0.5474, "step": 227275 }, { "epoch": 2.52, "learning_rate": 8.056515191779609e-06, "loss": 0.6313, "step": 227280 }, { "epoch": 2.52, "learning_rate": 8.055592464641097e-06, "loss": 0.624, "step": 227285 }, { "epoch": 2.52, "learning_rate": 8.054669737502583e-06, "loss": 0.5796, "step": 227290 }, { "epoch": 2.52, "learning_rate": 8.05374701036407e-06, "loss": 0.5956, "step": 227295 }, { "epoch": 2.52, "learning_rate": 8.052824283225558e-06, "loss": 0.6259, "step": 227300 }, { "epoch": 2.52, "learning_rate": 8.051901556087046e-06, "loss": 0.5884, "step": 227305 }, { "epoch": 2.52, "learning_rate": 8.050978828948534e-06, "loss": 0.5826, "step": 227310 }, { "epoch": 2.52, "learning_rate": 8.050056101810022e-06, "loss": 0.5462, "step": 227315 }, { "epoch": 2.52, "learning_rate": 8.04913337467151e-06, "loss": 0.61, "step": 227320 }, { "epoch": 2.52, "learning_rate": 8.048210647532998e-06, "loss": 0.6345, "step": 227325 }, { "epoch": 2.52, "learning_rate": 8.047287920394486e-06, "loss": 0.5524, "step": 227330 }, { "epoch": 2.52, "learning_rate": 8.046365193255972e-06, "loss": 0.5752, "step": 227335 }, { "epoch": 2.52, "learning_rate": 8.04544246611746e-06, "loss": 0.5899, "step": 227340 }, { "epoch": 2.52, "learning_rate": 8.044519738978947e-06, "loss": 0.6188, "step": 227345 }, { "epoch": 2.52, "learning_rate": 8.043597011840435e-06, "loss": 0.666, "step": 227350 }, { "epoch": 2.52, "learning_rate": 8.042674284701923e-06, "loss": 0.5699, "step": 227355 }, { "epoch": 2.52, "learning_rate": 8.041751557563411e-06, "loss": 0.5617, "step": 227360 }, { "epoch": 2.52, "learning_rate": 8.040828830424897e-06, "loss": 0.6263, "step": 227365 }, { "epoch": 2.52, "learning_rate": 8.039906103286385e-06, "loss": 0.5928, "step": 227370 }, { "epoch": 2.52, "learning_rate": 8.038983376147873e-06, "loss": 0.5602, "step": 227375 }, { "epoch": 2.52, "learning_rate": 8.03806064900936e-06, "loss": 0.5888, "step": 227380 }, { "epoch": 2.52, "learning_rate": 8.037137921870848e-06, "loss": 0.5739, "step": 227385 }, { "epoch": 2.52, "learning_rate": 8.036215194732336e-06, "loss": 0.5759, "step": 227390 }, { "epoch": 2.52, "learning_rate": 8.035292467593822e-06, "loss": 0.564, "step": 227395 }, { "epoch": 2.52, "learning_rate": 8.03436974045531e-06, "loss": 0.5517, "step": 227400 }, { "epoch": 2.52, "learning_rate": 8.033447013316798e-06, "loss": 0.5933, "step": 227405 }, { "epoch": 2.52, "learning_rate": 8.032524286178286e-06, "loss": 0.5519, "step": 227410 }, { "epoch": 2.52, "learning_rate": 8.031601559039774e-06, "loss": 0.5604, "step": 227415 }, { "epoch": 2.52, "learning_rate": 8.030678831901262e-06, "loss": 0.5385, "step": 227420 }, { "epoch": 2.52, "learning_rate": 8.02975610476275e-06, "loss": 0.6169, "step": 227425 }, { "epoch": 2.52, "learning_rate": 8.028833377624237e-06, "loss": 0.5847, "step": 227430 }, { "epoch": 2.52, "learning_rate": 8.027910650485725e-06, "loss": 0.571, "step": 227435 }, { "epoch": 2.52, "learning_rate": 8.026987923347211e-06, "loss": 0.5292, "step": 227440 }, { "epoch": 2.52, "learning_rate": 8.0260651962087e-06, "loss": 0.5928, "step": 227445 }, { "epoch": 2.52, "learning_rate": 8.025142469070185e-06, "loss": 0.6078, "step": 227450 }, { "epoch": 2.52, "learning_rate": 8.024219741931673e-06, "loss": 0.5982, "step": 227455 }, { "epoch": 2.52, "learning_rate": 8.023297014793161e-06, "loss": 0.5572, "step": 227460 }, { "epoch": 2.52, "learning_rate": 8.022374287654649e-06, "loss": 0.619, "step": 227465 }, { "epoch": 2.52, "learning_rate": 8.021451560516137e-06, "loss": 0.5986, "step": 227470 }, { "epoch": 2.52, "learning_rate": 8.020528833377625e-06, "loss": 0.5721, "step": 227475 }, { "epoch": 2.52, "learning_rate": 8.019606106239112e-06, "loss": 0.6016, "step": 227480 }, { "epoch": 2.52, "learning_rate": 8.0186833791006e-06, "loss": 0.6653, "step": 227485 }, { "epoch": 2.52, "learning_rate": 8.017760651962088e-06, "loss": 0.5812, "step": 227490 }, { "epoch": 2.52, "learning_rate": 8.016837924823576e-06, "loss": 0.6, "step": 227495 }, { "epoch": 2.52, "learning_rate": 8.015915197685062e-06, "loss": 0.6237, "step": 227500 }, { "epoch": 2.52, "learning_rate": 8.01499247054655e-06, "loss": 0.6549, "step": 227505 }, { "epoch": 2.52, "learning_rate": 8.014069743408038e-06, "loss": 0.5629, "step": 227510 }, { "epoch": 2.52, "learning_rate": 8.013147016269524e-06, "loss": 0.5638, "step": 227515 }, { "epoch": 2.52, "learning_rate": 8.012224289131012e-06, "loss": 0.533, "step": 227520 }, { "epoch": 2.52, "learning_rate": 8.0113015619925e-06, "loss": 0.5272, "step": 227525 }, { "epoch": 2.52, "learning_rate": 8.010378834853988e-06, "loss": 0.6418, "step": 227530 }, { "epoch": 2.52, "learning_rate": 8.009456107715475e-06, "loss": 0.5307, "step": 227535 }, { "epoch": 2.52, "learning_rate": 8.008533380576963e-06, "loss": 0.5782, "step": 227540 }, { "epoch": 2.52, "learning_rate": 8.007610653438451e-06, "loss": 0.583, "step": 227545 }, { "epoch": 2.52, "learning_rate": 8.006687926299939e-06, "loss": 0.6427, "step": 227550 }, { "epoch": 2.52, "learning_rate": 8.005765199161427e-06, "loss": 0.5566, "step": 227555 }, { "epoch": 2.52, "learning_rate": 8.004842472022913e-06, "loss": 0.5839, "step": 227560 }, { "epoch": 2.52, "learning_rate": 8.0039197448844e-06, "loss": 0.6037, "step": 227565 }, { "epoch": 2.52, "learning_rate": 8.002997017745889e-06, "loss": 0.6117, "step": 227570 }, { "epoch": 2.52, "learning_rate": 8.002074290607377e-06, "loss": 0.6289, "step": 227575 }, { "epoch": 2.52, "learning_rate": 8.001151563468864e-06, "loss": 0.5719, "step": 227580 }, { "epoch": 2.52, "learning_rate": 8.000228836330352e-06, "loss": 0.5685, "step": 227585 }, { "epoch": 2.52, "learning_rate": 7.999306109191838e-06, "loss": 0.5866, "step": 227590 }, { "epoch": 2.52, "learning_rate": 7.998383382053326e-06, "loss": 0.5782, "step": 227595 }, { "epoch": 2.52, "learning_rate": 7.997460654914814e-06, "loss": 0.5763, "step": 227600 }, { "epoch": 2.52, "learning_rate": 7.996537927776302e-06, "loss": 0.5886, "step": 227605 }, { "epoch": 2.52, "learning_rate": 7.99561520063779e-06, "loss": 0.579, "step": 227610 }, { "epoch": 2.52, "learning_rate": 7.994692473499276e-06, "loss": 0.601, "step": 227615 }, { "epoch": 2.52, "learning_rate": 7.993769746360764e-06, "loss": 0.6089, "step": 227620 }, { "epoch": 2.52, "learning_rate": 7.992847019222252e-06, "loss": 0.5133, "step": 227625 }, { "epoch": 2.52, "learning_rate": 7.99192429208374e-06, "loss": 0.5697, "step": 227630 }, { "epoch": 2.52, "learning_rate": 7.991001564945227e-06, "loss": 0.637, "step": 227635 }, { "epoch": 2.52, "learning_rate": 7.990078837806715e-06, "loss": 0.492, "step": 227640 }, { "epoch": 2.52, "learning_rate": 7.989156110668203e-06, "loss": 0.5286, "step": 227645 }, { "epoch": 2.52, "learning_rate": 7.988233383529691e-06, "loss": 0.5717, "step": 227650 }, { "epoch": 2.52, "learning_rate": 7.987310656391179e-06, "loss": 0.6269, "step": 227655 }, { "epoch": 2.52, "learning_rate": 7.986387929252667e-06, "loss": 0.5908, "step": 227660 }, { "epoch": 2.52, "learning_rate": 7.985465202114153e-06, "loss": 0.5485, "step": 227665 }, { "epoch": 2.52, "learning_rate": 7.98454247497564e-06, "loss": 0.5864, "step": 227670 }, { "epoch": 2.52, "learning_rate": 7.983619747837127e-06, "loss": 0.591, "step": 227675 }, { "epoch": 2.52, "learning_rate": 7.982697020698615e-06, "loss": 0.631, "step": 227680 }, { "epoch": 2.52, "learning_rate": 7.981774293560102e-06, "loss": 0.6082, "step": 227685 }, { "epoch": 2.52, "learning_rate": 7.98085156642159e-06, "loss": 0.5883, "step": 227690 }, { "epoch": 2.52, "learning_rate": 7.979928839283078e-06, "loss": 0.5775, "step": 227695 }, { "epoch": 2.52, "learning_rate": 7.979006112144566e-06, "loss": 0.6375, "step": 227700 }, { "epoch": 2.52, "learning_rate": 7.978083385006054e-06, "loss": 0.592, "step": 227705 }, { "epoch": 2.52, "learning_rate": 7.977160657867542e-06, "loss": 0.5741, "step": 227710 }, { "epoch": 2.52, "learning_rate": 7.97623793072903e-06, "loss": 0.5452, "step": 227715 }, { "epoch": 2.52, "learning_rate": 7.975315203590517e-06, "loss": 0.6067, "step": 227720 }, { "epoch": 2.52, "learning_rate": 7.974392476452004e-06, "loss": 0.5507, "step": 227725 }, { "epoch": 2.52, "learning_rate": 7.973469749313491e-06, "loss": 0.6098, "step": 227730 }, { "epoch": 2.52, "learning_rate": 7.97254702217498e-06, "loss": 0.5607, "step": 227735 }, { "epoch": 2.52, "learning_rate": 7.971624295036467e-06, "loss": 0.6269, "step": 227740 }, { "epoch": 2.52, "learning_rate": 7.970701567897953e-06, "loss": 0.587, "step": 227745 }, { "epoch": 2.52, "learning_rate": 7.969778840759441e-06, "loss": 0.5725, "step": 227750 }, { "epoch": 2.52, "learning_rate": 7.968856113620929e-06, "loss": 0.5466, "step": 227755 }, { "epoch": 2.52, "learning_rate": 7.967933386482417e-06, "loss": 0.5912, "step": 227760 }, { "epoch": 2.52, "learning_rate": 7.967010659343905e-06, "loss": 0.5946, "step": 227765 }, { "epoch": 2.52, "learning_rate": 7.966087932205392e-06, "loss": 0.619, "step": 227770 }, { "epoch": 2.52, "learning_rate": 7.96516520506688e-06, "loss": 0.5659, "step": 227775 }, { "epoch": 2.52, "learning_rate": 7.964242477928366e-06, "loss": 0.6088, "step": 227780 }, { "epoch": 2.52, "learning_rate": 7.963319750789854e-06, "loss": 0.6245, "step": 227785 }, { "epoch": 2.52, "learning_rate": 7.962397023651342e-06, "loss": 0.5984, "step": 227790 }, { "epoch": 2.52, "learning_rate": 7.96147429651283e-06, "loss": 0.5583, "step": 227795 }, { "epoch": 2.52, "learning_rate": 7.960551569374318e-06, "loss": 0.5975, "step": 227800 }, { "epoch": 2.52, "learning_rate": 7.959628842235806e-06, "loss": 0.5875, "step": 227805 }, { "epoch": 2.52, "learning_rate": 7.958706115097294e-06, "loss": 0.573, "step": 227810 }, { "epoch": 2.52, "learning_rate": 7.957783387958781e-06, "loss": 0.5198, "step": 227815 }, { "epoch": 2.52, "learning_rate": 7.956860660820268e-06, "loss": 0.575, "step": 227820 }, { "epoch": 2.52, "learning_rate": 7.955937933681755e-06, "loss": 0.582, "step": 227825 }, { "epoch": 2.52, "learning_rate": 7.955015206543243e-06, "loss": 0.5689, "step": 227830 }, { "epoch": 2.52, "learning_rate": 7.954092479404731e-06, "loss": 0.5321, "step": 227835 }, { "epoch": 2.52, "learning_rate": 7.953169752266217e-06, "loss": 0.5578, "step": 227840 }, { "epoch": 2.52, "learning_rate": 7.952247025127705e-06, "loss": 0.6024, "step": 227845 }, { "epoch": 2.52, "learning_rate": 7.951324297989193e-06, "loss": 0.6642, "step": 227850 }, { "epoch": 2.52, "learning_rate": 7.95040157085068e-06, "loss": 0.5679, "step": 227855 }, { "epoch": 2.52, "learning_rate": 7.949478843712169e-06, "loss": 0.5223, "step": 227860 }, { "epoch": 2.52, "learning_rate": 7.948556116573657e-06, "loss": 0.5515, "step": 227865 }, { "epoch": 2.52, "learning_rate": 7.947633389435144e-06, "loss": 0.5637, "step": 227870 }, { "epoch": 2.52, "learning_rate": 7.946710662296632e-06, "loss": 0.5906, "step": 227875 }, { "epoch": 2.52, "learning_rate": 7.94578793515812e-06, "loss": 0.5968, "step": 227880 }, { "epoch": 2.52, "learning_rate": 7.944865208019606e-06, "loss": 0.5771, "step": 227885 }, { "epoch": 2.52, "learning_rate": 7.943942480881094e-06, "loss": 0.662, "step": 227890 }, { "epoch": 2.52, "learning_rate": 7.94301975374258e-06, "loss": 0.5774, "step": 227895 }, { "epoch": 2.52, "learning_rate": 7.942097026604068e-06, "loss": 0.5941, "step": 227900 }, { "epoch": 2.52, "learning_rate": 7.941174299465556e-06, "loss": 0.5677, "step": 227905 }, { "epoch": 2.52, "learning_rate": 7.940251572327044e-06, "loss": 0.5904, "step": 227910 }, { "epoch": 2.52, "learning_rate": 7.939328845188532e-06, "loss": 0.5911, "step": 227915 }, { "epoch": 2.52, "learning_rate": 7.93840611805002e-06, "loss": 0.618, "step": 227920 }, { "epoch": 2.52, "learning_rate": 7.937483390911507e-06, "loss": 0.5835, "step": 227925 }, { "epoch": 2.52, "learning_rate": 7.936560663772995e-06, "loss": 0.6512, "step": 227930 }, { "epoch": 2.52, "learning_rate": 7.935637936634483e-06, "loss": 0.578, "step": 227935 }, { "epoch": 2.52, "learning_rate": 7.934715209495971e-06, "loss": 0.622, "step": 227940 }, { "epoch": 2.52, "learning_rate": 7.933792482357457e-06, "loss": 0.5283, "step": 227945 }, { "epoch": 2.52, "learning_rate": 7.932869755218945e-06, "loss": 0.6399, "step": 227950 }, { "epoch": 2.52, "learning_rate": 7.931947028080433e-06, "loss": 0.589, "step": 227955 }, { "epoch": 2.52, "learning_rate": 7.93102430094192e-06, "loss": 0.5824, "step": 227960 }, { "epoch": 2.52, "learning_rate": 7.930101573803408e-06, "loss": 0.5578, "step": 227965 }, { "epoch": 2.52, "learning_rate": 7.929178846664895e-06, "loss": 0.5724, "step": 227970 }, { "epoch": 2.52, "learning_rate": 7.928256119526382e-06, "loss": 0.553, "step": 227975 }, { "epoch": 2.52, "learning_rate": 7.92733339238787e-06, "loss": 0.6084, "step": 227980 }, { "epoch": 2.52, "learning_rate": 7.926410665249358e-06, "loss": 0.6093, "step": 227985 }, { "epoch": 2.52, "learning_rate": 7.925487938110846e-06, "loss": 0.6152, "step": 227990 }, { "epoch": 2.52, "learning_rate": 7.924565210972334e-06, "loss": 0.5696, "step": 227995 }, { "epoch": 2.52, "learning_rate": 7.92364248383382e-06, "loss": 0.568, "step": 228000 }, { "epoch": 2.52, "eval_loss": 0.5672938227653503, "eval_runtime": 69.6156, "eval_samples_per_second": 28.729, "eval_steps_per_second": 14.365, "step": 228000 }, { "epoch": 2.52, "learning_rate": 7.922719756695308e-06, "loss": 0.5568, "step": 228005 }, { "epoch": 2.52, "learning_rate": 7.921797029556796e-06, "loss": 0.531, "step": 228010 }, { "epoch": 2.52, "learning_rate": 7.920874302418284e-06, "loss": 0.5829, "step": 228015 }, { "epoch": 2.52, "learning_rate": 7.919951575279771e-06, "loss": 0.6007, "step": 228020 }, { "epoch": 2.52, "learning_rate": 7.91902884814126e-06, "loss": 0.5787, "step": 228025 }, { "epoch": 2.52, "learning_rate": 7.918106121002747e-06, "loss": 0.5433, "step": 228030 }, { "epoch": 2.52, "learning_rate": 7.917183393864235e-06, "loss": 0.6025, "step": 228035 }, { "epoch": 2.53, "learning_rate": 7.916260666725723e-06, "loss": 0.5773, "step": 228040 }, { "epoch": 2.53, "learning_rate": 7.915337939587209e-06, "loss": 0.6196, "step": 228045 }, { "epoch": 2.53, "learning_rate": 7.914415212448697e-06, "loss": 0.5827, "step": 228050 }, { "epoch": 2.53, "learning_rate": 7.913492485310185e-06, "loss": 0.6296, "step": 228055 }, { "epoch": 2.53, "learning_rate": 7.91256975817167e-06, "loss": 0.5971, "step": 228060 }, { "epoch": 2.53, "learning_rate": 7.911647031033159e-06, "loss": 0.6453, "step": 228065 }, { "epoch": 2.53, "learning_rate": 7.910724303894646e-06, "loss": 0.5824, "step": 228070 }, { "epoch": 2.53, "learning_rate": 7.909801576756134e-06, "loss": 0.6191, "step": 228075 }, { "epoch": 2.53, "learning_rate": 7.908878849617622e-06, "loss": 0.5932, "step": 228080 }, { "epoch": 2.53, "learning_rate": 7.90795612247911e-06, "loss": 0.6128, "step": 228085 }, { "epoch": 2.53, "learning_rate": 7.907033395340598e-06, "loss": 0.5652, "step": 228090 }, { "epoch": 2.53, "learning_rate": 7.906110668202086e-06, "loss": 0.5606, "step": 228095 }, { "epoch": 2.53, "learning_rate": 7.905187941063574e-06, "loss": 0.5578, "step": 228100 }, { "epoch": 2.53, "learning_rate": 7.904265213925061e-06, "loss": 0.5343, "step": 228105 }, { "epoch": 2.53, "learning_rate": 7.903342486786548e-06, "loss": 0.5972, "step": 228110 }, { "epoch": 2.53, "learning_rate": 7.902419759648035e-06, "loss": 0.5715, "step": 228115 }, { "epoch": 2.53, "learning_rate": 7.901497032509523e-06, "loss": 0.5998, "step": 228120 }, { "epoch": 2.53, "learning_rate": 7.90057430537101e-06, "loss": 0.559, "step": 228125 }, { "epoch": 2.53, "learning_rate": 7.899651578232497e-06, "loss": 0.6049, "step": 228130 }, { "epoch": 2.53, "learning_rate": 7.898728851093985e-06, "loss": 0.6085, "step": 228135 }, { "epoch": 2.53, "learning_rate": 7.897806123955473e-06, "loss": 0.6074, "step": 228140 }, { "epoch": 2.53, "learning_rate": 7.89688339681696e-06, "loss": 0.5718, "step": 228145 }, { "epoch": 2.53, "learning_rate": 7.895960669678449e-06, "loss": 0.5936, "step": 228150 }, { "epoch": 2.53, "learning_rate": 7.895037942539936e-06, "loss": 0.6083, "step": 228155 }, { "epoch": 2.53, "learning_rate": 7.894115215401424e-06, "loss": 0.5984, "step": 228160 }, { "epoch": 2.53, "learning_rate": 7.89319248826291e-06, "loss": 0.5854, "step": 228165 }, { "epoch": 2.53, "learning_rate": 7.892269761124398e-06, "loss": 0.532, "step": 228170 }, { "epoch": 2.53, "learning_rate": 7.891347033985886e-06, "loss": 0.589, "step": 228175 }, { "epoch": 2.53, "learning_rate": 7.890424306847374e-06, "loss": 0.6056, "step": 228180 }, { "epoch": 2.53, "learning_rate": 7.889501579708862e-06, "loss": 0.6025, "step": 228185 }, { "epoch": 2.53, "learning_rate": 7.88857885257035e-06, "loss": 0.6, "step": 228190 }, { "epoch": 2.53, "learning_rate": 7.887656125431838e-06, "loss": 0.6191, "step": 228195 }, { "epoch": 2.53, "learning_rate": 7.886733398293324e-06, "loss": 0.5462, "step": 228200 }, { "epoch": 2.53, "learning_rate": 7.885810671154812e-06, "loss": 0.6, "step": 228205 }, { "epoch": 2.53, "learning_rate": 7.8848879440163e-06, "loss": 0.5688, "step": 228210 }, { "epoch": 2.53, "learning_rate": 7.883965216877787e-06, "loss": 0.5939, "step": 228215 }, { "epoch": 2.53, "learning_rate": 7.883042489739275e-06, "loss": 0.5536, "step": 228220 }, { "epoch": 2.53, "learning_rate": 7.882119762600761e-06, "loss": 0.5758, "step": 228225 }, { "epoch": 2.53, "learning_rate": 7.881197035462249e-06, "loss": 0.6015, "step": 228230 }, { "epoch": 2.53, "learning_rate": 7.880274308323737e-06, "loss": 0.5592, "step": 228235 }, { "epoch": 2.53, "learning_rate": 7.879351581185225e-06, "loss": 0.5703, "step": 228240 }, { "epoch": 2.53, "learning_rate": 7.878428854046713e-06, "loss": 0.5845, "step": 228245 }, { "epoch": 2.53, "learning_rate": 7.8775061269082e-06, "loss": 0.6021, "step": 228250 }, { "epoch": 2.53, "learning_rate": 7.876583399769688e-06, "loss": 0.5545, "step": 228255 }, { "epoch": 2.53, "learning_rate": 7.875660672631176e-06, "loss": 0.5892, "step": 228260 }, { "epoch": 2.53, "learning_rate": 7.874737945492664e-06, "loss": 0.5552, "step": 228265 }, { "epoch": 2.53, "learning_rate": 7.873815218354152e-06, "loss": 0.5445, "step": 228270 }, { "epoch": 2.53, "learning_rate": 7.872892491215638e-06, "loss": 0.6079, "step": 228275 }, { "epoch": 2.53, "learning_rate": 7.871969764077124e-06, "loss": 0.5988, "step": 228280 }, { "epoch": 2.53, "learning_rate": 7.871047036938612e-06, "loss": 0.5855, "step": 228285 }, { "epoch": 2.53, "learning_rate": 7.8701243098001e-06, "loss": 0.5866, "step": 228290 }, { "epoch": 2.53, "learning_rate": 7.869201582661588e-06, "loss": 0.5644, "step": 228295 }, { "epoch": 2.53, "learning_rate": 7.868278855523076e-06, "loss": 0.5427, "step": 228300 }, { "epoch": 2.53, "learning_rate": 7.867356128384563e-06, "loss": 0.5526, "step": 228305 }, { "epoch": 2.53, "learning_rate": 7.866433401246051e-06, "loss": 0.5758, "step": 228310 }, { "epoch": 2.53, "learning_rate": 7.86551067410754e-06, "loss": 0.5939, "step": 228315 }, { "epoch": 2.53, "learning_rate": 7.864587946969027e-06, "loss": 0.5735, "step": 228320 }, { "epoch": 2.53, "learning_rate": 7.863665219830515e-06, "loss": 0.608, "step": 228325 }, { "epoch": 2.53, "learning_rate": 7.862742492692001e-06, "loss": 0.586, "step": 228330 }, { "epoch": 2.53, "learning_rate": 7.861819765553489e-06, "loss": 0.5635, "step": 228335 }, { "epoch": 2.53, "learning_rate": 7.860897038414977e-06, "loss": 0.5289, "step": 228340 }, { "epoch": 2.53, "learning_rate": 7.859974311276465e-06, "loss": 0.5946, "step": 228345 }, { "epoch": 2.53, "learning_rate": 7.85905158413795e-06, "loss": 0.5927, "step": 228350 }, { "epoch": 2.53, "learning_rate": 7.858128856999439e-06, "loss": 0.5946, "step": 228355 }, { "epoch": 2.53, "learning_rate": 7.857206129860926e-06, "loss": 0.6019, "step": 228360 }, { "epoch": 2.53, "learning_rate": 7.856283402722414e-06, "loss": 0.5849, "step": 228365 }, { "epoch": 2.53, "learning_rate": 7.855360675583902e-06, "loss": 0.5948, "step": 228370 }, { "epoch": 2.53, "learning_rate": 7.85443794844539e-06, "loss": 0.5796, "step": 228375 }, { "epoch": 2.53, "learning_rate": 7.853515221306878e-06, "loss": 0.5744, "step": 228380 }, { "epoch": 2.53, "learning_rate": 7.852592494168366e-06, "loss": 0.6176, "step": 228385 }, { "epoch": 2.53, "learning_rate": 7.851669767029852e-06, "loss": 0.5825, "step": 228390 }, { "epoch": 2.53, "learning_rate": 7.85074703989134e-06, "loss": 0.5984, "step": 228395 }, { "epoch": 2.53, "learning_rate": 7.849824312752828e-06, "loss": 0.6111, "step": 228400 }, { "epoch": 2.53, "learning_rate": 7.848901585614315e-06, "loss": 0.5497, "step": 228405 }, { "epoch": 2.53, "learning_rate": 7.847978858475803e-06, "loss": 0.5697, "step": 228410 }, { "epoch": 2.53, "learning_rate": 7.847056131337291e-06, "loss": 0.6254, "step": 228415 }, { "epoch": 2.53, "learning_rate": 7.846133404198779e-06, "loss": 0.5821, "step": 228420 }, { "epoch": 2.53, "learning_rate": 7.845210677060265e-06, "loss": 0.6771, "step": 228425 }, { "epoch": 2.53, "learning_rate": 7.844287949921753e-06, "loss": 0.5815, "step": 228430 }, { "epoch": 2.53, "learning_rate": 7.84336522278324e-06, "loss": 0.6352, "step": 228435 }, { "epoch": 2.53, "learning_rate": 7.842442495644729e-06, "loss": 0.5958, "step": 228440 }, { "epoch": 2.53, "learning_rate": 7.841519768506215e-06, "loss": 0.5726, "step": 228445 }, { "epoch": 2.53, "learning_rate": 7.840597041367703e-06, "loss": 0.5567, "step": 228450 }, { "epoch": 2.53, "learning_rate": 7.83967431422919e-06, "loss": 0.5831, "step": 228455 }, { "epoch": 2.53, "learning_rate": 7.838751587090678e-06, "loss": 0.5886, "step": 228460 }, { "epoch": 2.53, "learning_rate": 7.837828859952166e-06, "loss": 0.5837, "step": 228465 }, { "epoch": 2.53, "learning_rate": 7.836906132813654e-06, "loss": 0.5786, "step": 228470 }, { "epoch": 2.53, "learning_rate": 7.835983405675142e-06, "loss": 0.5607, "step": 228475 }, { "epoch": 2.53, "learning_rate": 7.83506067853663e-06, "loss": 0.5669, "step": 228480 }, { "epoch": 2.53, "learning_rate": 7.834137951398118e-06, "loss": 0.5716, "step": 228485 }, { "epoch": 2.53, "learning_rate": 7.833215224259605e-06, "loss": 0.5782, "step": 228490 }, { "epoch": 2.53, "learning_rate": 7.832292497121092e-06, "loss": 0.5771, "step": 228495 }, { "epoch": 2.53, "learning_rate": 7.83136976998258e-06, "loss": 0.5617, "step": 228500 }, { "epoch": 2.53, "learning_rate": 7.830447042844066e-06, "loss": 0.6186, "step": 228505 }, { "epoch": 2.53, "learning_rate": 7.829524315705553e-06, "loss": 0.6007, "step": 228510 }, { "epoch": 2.53, "learning_rate": 7.828601588567041e-06, "loss": 0.6033, "step": 228515 }, { "epoch": 2.53, "learning_rate": 7.827678861428529e-06, "loss": 0.6329, "step": 228520 }, { "epoch": 2.53, "learning_rate": 7.826756134290017e-06, "loss": 0.5523, "step": 228525 }, { "epoch": 2.53, "learning_rate": 7.825833407151505e-06, "loss": 0.6415, "step": 228530 }, { "epoch": 2.53, "learning_rate": 7.824910680012993e-06, "loss": 0.638, "step": 228535 }, { "epoch": 2.53, "learning_rate": 7.82398795287448e-06, "loss": 0.6274, "step": 228540 }, { "epoch": 2.53, "learning_rate": 7.823065225735968e-06, "loss": 0.6017, "step": 228545 }, { "epoch": 2.53, "learning_rate": 7.822142498597455e-06, "loss": 0.5656, "step": 228550 }, { "epoch": 2.53, "learning_rate": 7.821219771458942e-06, "loss": 0.6025, "step": 228555 }, { "epoch": 2.53, "learning_rate": 7.82029704432043e-06, "loss": 0.6069, "step": 228560 }, { "epoch": 2.53, "learning_rate": 7.819374317181918e-06, "loss": 0.6312, "step": 228565 }, { "epoch": 2.53, "learning_rate": 7.818451590043406e-06, "loss": 0.5705, "step": 228570 }, { "epoch": 2.53, "learning_rate": 7.817528862904894e-06, "loss": 0.5592, "step": 228575 }, { "epoch": 2.53, "learning_rate": 7.81660613576638e-06, "loss": 0.5518, "step": 228580 }, { "epoch": 2.53, "learning_rate": 7.815683408627868e-06, "loss": 0.5754, "step": 228585 }, { "epoch": 2.53, "learning_rate": 7.814760681489356e-06, "loss": 0.559, "step": 228590 }, { "epoch": 2.53, "learning_rate": 7.813837954350843e-06, "loss": 0.5625, "step": 228595 }, { "epoch": 2.53, "learning_rate": 7.812915227212331e-06, "loss": 0.5714, "step": 228600 }, { "epoch": 2.53, "learning_rate": 7.81199250007382e-06, "loss": 0.579, "step": 228605 }, { "epoch": 2.53, "learning_rate": 7.811069772935305e-06, "loss": 0.5677, "step": 228610 }, { "epoch": 2.53, "learning_rate": 7.810147045796793e-06, "loss": 0.5736, "step": 228615 }, { "epoch": 2.53, "learning_rate": 7.809224318658281e-06, "loss": 0.5582, "step": 228620 }, { "epoch": 2.53, "learning_rate": 7.808301591519769e-06, "loss": 0.5898, "step": 228625 }, { "epoch": 2.53, "learning_rate": 7.807378864381257e-06, "loss": 0.5675, "step": 228630 }, { "epoch": 2.53, "learning_rate": 7.806456137242745e-06, "loss": 0.5177, "step": 228635 }, { "epoch": 2.53, "learning_rate": 7.805533410104232e-06, "loss": 0.591, "step": 228640 }, { "epoch": 2.53, "learning_rate": 7.80461068296572e-06, "loss": 0.6056, "step": 228645 }, { "epoch": 2.53, "learning_rate": 7.803687955827208e-06, "loss": 0.5932, "step": 228650 }, { "epoch": 2.53, "learning_rate": 7.802765228688694e-06, "loss": 0.5616, "step": 228655 }, { "epoch": 2.53, "learning_rate": 7.801842501550182e-06, "loss": 0.5588, "step": 228660 }, { "epoch": 2.53, "learning_rate": 7.800919774411668e-06, "loss": 0.552, "step": 228665 }, { "epoch": 2.53, "learning_rate": 7.799997047273156e-06, "loss": 0.656, "step": 228670 }, { "epoch": 2.53, "learning_rate": 7.799074320134644e-06, "loss": 0.5788, "step": 228675 }, { "epoch": 2.53, "learning_rate": 7.798151592996132e-06, "loss": 0.54, "step": 228680 }, { "epoch": 2.53, "learning_rate": 7.79722886585762e-06, "loss": 0.5908, "step": 228685 }, { "epoch": 2.53, "learning_rate": 7.796306138719108e-06, "loss": 0.5292, "step": 228690 }, { "epoch": 2.53, "learning_rate": 7.795383411580595e-06, "loss": 0.5459, "step": 228695 }, { "epoch": 2.53, "learning_rate": 7.794460684442083e-06, "loss": 0.6101, "step": 228700 }, { "epoch": 2.53, "learning_rate": 7.793537957303571e-06, "loss": 0.5141, "step": 228705 }, { "epoch": 2.53, "learning_rate": 7.792615230165059e-06, "loss": 0.5656, "step": 228710 }, { "epoch": 2.53, "learning_rate": 7.791692503026545e-06, "loss": 0.6393, "step": 228715 }, { "epoch": 2.53, "learning_rate": 7.790769775888033e-06, "loss": 0.5769, "step": 228720 }, { "epoch": 2.53, "learning_rate": 7.78984704874952e-06, "loss": 0.6249, "step": 228725 }, { "epoch": 2.53, "learning_rate": 7.788924321611007e-06, "loss": 0.5762, "step": 228730 }, { "epoch": 2.53, "learning_rate": 7.788001594472495e-06, "loss": 0.5768, "step": 228735 }, { "epoch": 2.53, "learning_rate": 7.787078867333983e-06, "loss": 0.5752, "step": 228740 }, { "epoch": 2.53, "learning_rate": 7.78615614019547e-06, "loss": 0.6064, "step": 228745 }, { "epoch": 2.53, "learning_rate": 7.785233413056958e-06, "loss": 0.5875, "step": 228750 }, { "epoch": 2.53, "learning_rate": 7.784310685918446e-06, "loss": 0.5068, "step": 228755 }, { "epoch": 2.53, "learning_rate": 7.783387958779934e-06, "loss": 0.5671, "step": 228760 }, { "epoch": 2.53, "learning_rate": 7.782465231641422e-06, "loss": 0.5647, "step": 228765 }, { "epoch": 2.53, "learning_rate": 7.78154250450291e-06, "loss": 0.6178, "step": 228770 }, { "epoch": 2.53, "learning_rate": 7.780619777364396e-06, "loss": 0.5537, "step": 228775 }, { "epoch": 2.53, "learning_rate": 7.779697050225884e-06, "loss": 0.575, "step": 228780 }, { "epoch": 2.53, "learning_rate": 7.778774323087372e-06, "loss": 0.5883, "step": 228785 }, { "epoch": 2.53, "learning_rate": 7.77785159594886e-06, "loss": 0.5435, "step": 228790 }, { "epoch": 2.53, "learning_rate": 7.776928868810347e-06, "loss": 0.6154, "step": 228795 }, { "epoch": 2.53, "learning_rate": 7.776006141671835e-06, "loss": 0.6242, "step": 228800 }, { "epoch": 2.53, "learning_rate": 7.775083414533321e-06, "loss": 0.6103, "step": 228805 }, { "epoch": 2.53, "learning_rate": 7.774160687394809e-06, "loss": 0.6006, "step": 228810 }, { "epoch": 2.53, "learning_rate": 7.773237960256297e-06, "loss": 0.5986, "step": 228815 }, { "epoch": 2.53, "learning_rate": 7.772315233117785e-06, "loss": 0.6417, "step": 228820 }, { "epoch": 2.53, "learning_rate": 7.771392505979273e-06, "loss": 0.6052, "step": 228825 }, { "epoch": 2.53, "learning_rate": 7.770469778840759e-06, "loss": 0.5625, "step": 228830 }, { "epoch": 2.53, "learning_rate": 7.769547051702247e-06, "loss": 0.5646, "step": 228835 }, { "epoch": 2.53, "learning_rate": 7.768624324563734e-06, "loss": 0.6202, "step": 228840 }, { "epoch": 2.53, "learning_rate": 7.767701597425222e-06, "loss": 0.5974, "step": 228845 }, { "epoch": 2.53, "learning_rate": 7.76677887028671e-06, "loss": 0.5667, "step": 228850 }, { "epoch": 2.53, "learning_rate": 7.765856143148198e-06, "loss": 0.5699, "step": 228855 }, { "epoch": 2.53, "learning_rate": 7.764933416009686e-06, "loss": 0.6012, "step": 228860 }, { "epoch": 2.53, "learning_rate": 7.764010688871174e-06, "loss": 0.5929, "step": 228865 }, { "epoch": 2.53, "learning_rate": 7.763087961732662e-06, "loss": 0.5489, "step": 228870 }, { "epoch": 2.53, "learning_rate": 7.76216523459415e-06, "loss": 0.5835, "step": 228875 }, { "epoch": 2.53, "learning_rate": 7.761242507455636e-06, "loss": 0.5858, "step": 228880 }, { "epoch": 2.53, "learning_rate": 7.760319780317123e-06, "loss": 0.5994, "step": 228885 }, { "epoch": 2.53, "learning_rate": 7.75939705317861e-06, "loss": 0.6216, "step": 228890 }, { "epoch": 2.53, "learning_rate": 7.758474326040097e-06, "loss": 0.5791, "step": 228895 }, { "epoch": 2.53, "learning_rate": 7.757551598901585e-06, "loss": 0.5459, "step": 228900 }, { "epoch": 2.53, "learning_rate": 7.756628871763073e-06, "loss": 0.5827, "step": 228905 }, { "epoch": 2.53, "learning_rate": 7.755706144624561e-06, "loss": 0.5808, "step": 228910 }, { "epoch": 2.53, "learning_rate": 7.754783417486049e-06, "loss": 0.5947, "step": 228915 }, { "epoch": 2.53, "learning_rate": 7.753860690347537e-06, "loss": 0.622, "step": 228920 }, { "epoch": 2.53, "learning_rate": 7.752937963209025e-06, "loss": 0.5791, "step": 228925 }, { "epoch": 2.53, "learning_rate": 7.752015236070512e-06, "loss": 0.5885, "step": 228930 }, { "epoch": 2.53, "learning_rate": 7.751092508932e-06, "loss": 0.5927, "step": 228935 }, { "epoch": 2.53, "learning_rate": 7.750169781793486e-06, "loss": 0.6425, "step": 228940 }, { "epoch": 2.54, "learning_rate": 7.749247054654974e-06, "loss": 0.5868, "step": 228945 }, { "epoch": 2.54, "learning_rate": 7.748324327516462e-06, "loss": 0.5646, "step": 228950 }, { "epoch": 2.54, "learning_rate": 7.74740160037795e-06, "loss": 0.595, "step": 228955 }, { "epoch": 2.54, "learning_rate": 7.746478873239436e-06, "loss": 0.5894, "step": 228960 }, { "epoch": 2.54, "learning_rate": 7.745556146100924e-06, "loss": 0.5859, "step": 228965 }, { "epoch": 2.54, "learning_rate": 7.744633418962412e-06, "loss": 0.6165, "step": 228970 }, { "epoch": 2.54, "learning_rate": 7.7437106918239e-06, "loss": 0.6087, "step": 228975 }, { "epoch": 2.54, "learning_rate": 7.742787964685387e-06, "loss": 0.5982, "step": 228980 }, { "epoch": 2.54, "learning_rate": 7.741865237546875e-06, "loss": 0.5535, "step": 228985 }, { "epoch": 2.54, "learning_rate": 7.740942510408363e-06, "loss": 0.6442, "step": 228990 }, { "epoch": 2.54, "learning_rate": 7.74001978326985e-06, "loss": 0.5957, "step": 228995 }, { "epoch": 2.54, "learning_rate": 7.739097056131337e-06, "loss": 0.5577, "step": 229000 }, { "epoch": 2.54, "eval_loss": 0.559029757976532, "eval_runtime": 69.6655, "eval_samples_per_second": 28.709, "eval_steps_per_second": 14.354, "step": 229000 }, { "epoch": 2.54, "learning_rate": 7.738174328992825e-06, "loss": 0.6371, "step": 229005 }, { "epoch": 2.54, "learning_rate": 7.737251601854313e-06, "loss": 0.5217, "step": 229010 }, { "epoch": 2.54, "learning_rate": 7.7363288747158e-06, "loss": 0.6227, "step": 229015 }, { "epoch": 2.54, "learning_rate": 7.735406147577289e-06, "loss": 0.5969, "step": 229020 }, { "epoch": 2.54, "learning_rate": 7.734483420438776e-06, "loss": 0.5848, "step": 229025 }, { "epoch": 2.54, "learning_rate": 7.733560693300264e-06, "loss": 0.5915, "step": 229030 }, { "epoch": 2.54, "learning_rate": 7.73263796616175e-06, "loss": 0.5857, "step": 229035 }, { "epoch": 2.54, "learning_rate": 7.731715239023238e-06, "loss": 0.6189, "step": 229040 }, { "epoch": 2.54, "learning_rate": 7.730792511884726e-06, "loss": 0.602, "step": 229045 }, { "epoch": 2.54, "learning_rate": 7.729869784746214e-06, "loss": 0.5954, "step": 229050 }, { "epoch": 2.54, "learning_rate": 7.7289470576077e-06, "loss": 0.5478, "step": 229055 }, { "epoch": 2.54, "learning_rate": 7.728024330469188e-06, "loss": 0.5698, "step": 229060 }, { "epoch": 2.54, "learning_rate": 7.727101603330676e-06, "loss": 0.6043, "step": 229065 }, { "epoch": 2.54, "learning_rate": 7.726178876192164e-06, "loss": 0.5948, "step": 229070 }, { "epoch": 2.54, "learning_rate": 7.725256149053652e-06, "loss": 0.5858, "step": 229075 }, { "epoch": 2.54, "learning_rate": 7.72433342191514e-06, "loss": 0.5492, "step": 229080 }, { "epoch": 2.54, "learning_rate": 7.723410694776627e-06, "loss": 0.5327, "step": 229085 }, { "epoch": 2.54, "learning_rate": 7.722487967638115e-06, "loss": 0.629, "step": 229090 }, { "epoch": 2.54, "learning_rate": 7.721565240499603e-06, "loss": 0.604, "step": 229095 }, { "epoch": 2.54, "learning_rate": 7.720642513361089e-06, "loss": 0.5833, "step": 229100 }, { "epoch": 2.54, "learning_rate": 7.719719786222577e-06, "loss": 0.5693, "step": 229105 }, { "epoch": 2.54, "learning_rate": 7.718797059084063e-06, "loss": 0.6, "step": 229110 }, { "epoch": 2.54, "learning_rate": 7.717874331945551e-06, "loss": 0.5704, "step": 229115 }, { "epoch": 2.54, "learning_rate": 7.716951604807039e-06, "loss": 0.5253, "step": 229120 }, { "epoch": 2.54, "learning_rate": 7.716028877668527e-06, "loss": 0.6307, "step": 229125 }, { "epoch": 2.54, "learning_rate": 7.715106150530014e-06, "loss": 0.544, "step": 229130 }, { "epoch": 2.54, "learning_rate": 7.714183423391502e-06, "loss": 0.5578, "step": 229135 }, { "epoch": 2.54, "learning_rate": 7.71326069625299e-06, "loss": 0.579, "step": 229140 }, { "epoch": 2.54, "learning_rate": 7.712337969114478e-06, "loss": 0.5362, "step": 229145 }, { "epoch": 2.54, "learning_rate": 7.711415241975966e-06, "loss": 0.5957, "step": 229150 }, { "epoch": 2.54, "learning_rate": 7.710492514837454e-06, "loss": 0.5661, "step": 229155 }, { "epoch": 2.54, "learning_rate": 7.70956978769894e-06, "loss": 0.6044, "step": 229160 }, { "epoch": 2.54, "learning_rate": 7.708647060560428e-06, "loss": 0.6449, "step": 229165 }, { "epoch": 2.54, "learning_rate": 7.707724333421916e-06, "loss": 0.55, "step": 229170 }, { "epoch": 2.54, "learning_rate": 7.706801606283403e-06, "loss": 0.5848, "step": 229175 }, { "epoch": 2.54, "learning_rate": 7.705878879144891e-06, "loss": 0.5631, "step": 229180 }, { "epoch": 2.54, "learning_rate": 7.704956152006377e-06, "loss": 0.5876, "step": 229185 }, { "epoch": 2.54, "learning_rate": 7.704033424867865e-06, "loss": 0.635, "step": 229190 }, { "epoch": 2.54, "learning_rate": 7.703110697729353e-06, "loss": 0.6292, "step": 229195 }, { "epoch": 2.54, "learning_rate": 7.702187970590841e-06, "loss": 0.5809, "step": 229200 }, { "epoch": 2.54, "learning_rate": 7.701265243452329e-06, "loss": 0.6072, "step": 229205 }, { "epoch": 2.54, "learning_rate": 7.700342516313817e-06, "loss": 0.6265, "step": 229210 }, { "epoch": 2.54, "learning_rate": 7.699419789175303e-06, "loss": 0.6304, "step": 229215 }, { "epoch": 2.54, "learning_rate": 7.69849706203679e-06, "loss": 0.5766, "step": 229220 }, { "epoch": 2.54, "learning_rate": 7.697574334898279e-06, "loss": 0.5736, "step": 229225 }, { "epoch": 2.54, "learning_rate": 7.696651607759766e-06, "loss": 0.5742, "step": 229230 }, { "epoch": 2.54, "learning_rate": 7.695728880621254e-06, "loss": 0.6309, "step": 229235 }, { "epoch": 2.54, "learning_rate": 7.694806153482742e-06, "loss": 0.5723, "step": 229240 }, { "epoch": 2.54, "learning_rate": 7.69388342634423e-06, "loss": 0.6205, "step": 229245 }, { "epoch": 2.54, "learning_rate": 7.692960699205718e-06, "loss": 0.6035, "step": 229250 }, { "epoch": 2.54, "learning_rate": 7.692037972067206e-06, "loss": 0.5864, "step": 229255 }, { "epoch": 2.54, "learning_rate": 7.691115244928692e-06, "loss": 0.5986, "step": 229260 }, { "epoch": 2.54, "learning_rate": 7.69019251779018e-06, "loss": 0.5655, "step": 229265 }, { "epoch": 2.54, "learning_rate": 7.689269790651667e-06, "loss": 0.578, "step": 229270 }, { "epoch": 2.54, "learning_rate": 7.688347063513154e-06, "loss": 0.5931, "step": 229275 }, { "epoch": 2.54, "learning_rate": 7.687424336374641e-06, "loss": 0.5543, "step": 229280 }, { "epoch": 2.54, "learning_rate": 7.68650160923613e-06, "loss": 0.5837, "step": 229285 }, { "epoch": 2.54, "learning_rate": 7.685578882097617e-06, "loss": 0.5911, "step": 229290 }, { "epoch": 2.54, "learning_rate": 7.684656154959105e-06, "loss": 0.5802, "step": 229295 }, { "epoch": 2.54, "learning_rate": 7.683733427820593e-06, "loss": 0.576, "step": 229300 }, { "epoch": 2.54, "learning_rate": 7.68281070068208e-06, "loss": 0.6382, "step": 229305 }, { "epoch": 2.54, "learning_rate": 7.681887973543569e-06, "loss": 0.5988, "step": 229310 }, { "epoch": 2.54, "learning_rate": 7.680965246405056e-06, "loss": 0.5676, "step": 229315 }, { "epoch": 2.54, "learning_rate": 7.680042519266544e-06, "loss": 0.596, "step": 229320 }, { "epoch": 2.54, "learning_rate": 7.67911979212803e-06, "loss": 0.5708, "step": 229325 }, { "epoch": 2.54, "learning_rate": 7.678197064989518e-06, "loss": 0.6252, "step": 229330 }, { "epoch": 2.54, "learning_rate": 7.677274337851004e-06, "loss": 0.5388, "step": 229335 }, { "epoch": 2.54, "learning_rate": 7.676351610712492e-06, "loss": 0.5863, "step": 229340 }, { "epoch": 2.54, "learning_rate": 7.67542888357398e-06, "loss": 0.5778, "step": 229345 }, { "epoch": 2.54, "learning_rate": 7.674506156435468e-06, "loss": 0.5474, "step": 229350 }, { "epoch": 2.54, "learning_rate": 7.673583429296956e-06, "loss": 0.5947, "step": 229355 }, { "epoch": 2.54, "learning_rate": 7.672660702158444e-06, "loss": 0.5601, "step": 229360 }, { "epoch": 2.54, "learning_rate": 7.671737975019931e-06, "loss": 0.5423, "step": 229365 }, { "epoch": 2.54, "learning_rate": 7.67081524788142e-06, "loss": 0.6103, "step": 229370 }, { "epoch": 2.54, "learning_rate": 7.669892520742907e-06, "loss": 0.5638, "step": 229375 }, { "epoch": 2.54, "learning_rate": 7.668969793604393e-06, "loss": 0.6006, "step": 229380 }, { "epoch": 2.54, "learning_rate": 7.668047066465881e-06, "loss": 0.6168, "step": 229385 }, { "epoch": 2.54, "learning_rate": 7.667124339327369e-06, "loss": 0.5807, "step": 229390 }, { "epoch": 2.54, "learning_rate": 7.666201612188857e-06, "loss": 0.5433, "step": 229395 }, { "epoch": 2.54, "learning_rate": 7.665278885050345e-06, "loss": 0.5997, "step": 229400 }, { "epoch": 2.54, "learning_rate": 7.664356157911833e-06, "loss": 0.5877, "step": 229405 }, { "epoch": 2.54, "learning_rate": 7.66343343077332e-06, "loss": 0.5694, "step": 229410 }, { "epoch": 2.54, "learning_rate": 7.662510703634807e-06, "loss": 0.6017, "step": 229415 }, { "epoch": 2.54, "learning_rate": 7.661587976496294e-06, "loss": 0.5585, "step": 229420 }, { "epoch": 2.54, "learning_rate": 7.660665249357782e-06, "loss": 0.5422, "step": 229425 }, { "epoch": 2.54, "learning_rate": 7.65974252221927e-06, "loss": 0.5566, "step": 229430 }, { "epoch": 2.54, "learning_rate": 7.658819795080758e-06, "loss": 0.6344, "step": 229435 }, { "epoch": 2.54, "learning_rate": 7.657897067942244e-06, "loss": 0.5911, "step": 229440 }, { "epoch": 2.54, "learning_rate": 7.656974340803732e-06, "loss": 0.5476, "step": 229445 }, { "epoch": 2.54, "learning_rate": 7.65605161366522e-06, "loss": 0.5932, "step": 229450 }, { "epoch": 2.54, "learning_rate": 7.655128886526708e-06, "loss": 0.5755, "step": 229455 }, { "epoch": 2.54, "learning_rate": 7.654206159388196e-06, "loss": 0.57, "step": 229460 }, { "epoch": 2.54, "learning_rate": 7.653283432249683e-06, "loss": 0.5717, "step": 229465 }, { "epoch": 2.54, "learning_rate": 7.652360705111171e-06, "loss": 0.5484, "step": 229470 }, { "epoch": 2.54, "learning_rate": 7.651437977972659e-06, "loss": 0.588, "step": 229475 }, { "epoch": 2.54, "learning_rate": 7.650515250834147e-06, "loss": 0.5541, "step": 229480 }, { "epoch": 2.54, "learning_rate": 7.649592523695635e-06, "loss": 0.5565, "step": 229485 }, { "epoch": 2.54, "learning_rate": 7.648669796557121e-06, "loss": 0.6124, "step": 229490 }, { "epoch": 2.54, "learning_rate": 7.647747069418607e-06, "loss": 0.5633, "step": 229495 }, { "epoch": 2.54, "learning_rate": 7.646824342280095e-06, "loss": 0.6386, "step": 229500 }, { "epoch": 2.54, "learning_rate": 7.645901615141583e-06, "loss": 0.5852, "step": 229505 }, { "epoch": 2.54, "learning_rate": 7.64497888800307e-06, "loss": 0.53, "step": 229510 }, { "epoch": 2.54, "learning_rate": 7.644056160864558e-06, "loss": 0.5183, "step": 229515 }, { "epoch": 2.54, "learning_rate": 7.643133433726046e-06, "loss": 0.6177, "step": 229520 }, { "epoch": 2.54, "learning_rate": 7.642210706587534e-06, "loss": 0.5525, "step": 229525 }, { "epoch": 2.54, "learning_rate": 7.641287979449022e-06, "loss": 0.5423, "step": 229530 }, { "epoch": 2.54, "learning_rate": 7.64036525231051e-06, "loss": 0.5972, "step": 229535 }, { "epoch": 2.54, "learning_rate": 7.639442525171998e-06, "loss": 0.6162, "step": 229540 }, { "epoch": 2.54, "learning_rate": 7.638519798033484e-06, "loss": 0.5308, "step": 229545 }, { "epoch": 2.54, "learning_rate": 7.637597070894972e-06, "loss": 0.5739, "step": 229550 }, { "epoch": 2.54, "learning_rate": 7.63667434375646e-06, "loss": 0.5905, "step": 229555 }, { "epoch": 2.54, "learning_rate": 7.635751616617947e-06, "loss": 0.5392, "step": 229560 }, { "epoch": 2.54, "learning_rate": 7.634828889479434e-06, "loss": 0.5472, "step": 229565 }, { "epoch": 2.54, "learning_rate": 7.633906162340921e-06, "loss": 0.6001, "step": 229570 }, { "epoch": 2.54, "learning_rate": 7.63298343520241e-06, "loss": 0.5199, "step": 229575 }, { "epoch": 2.54, "learning_rate": 7.632060708063897e-06, "loss": 0.5709, "step": 229580 }, { "epoch": 2.54, "learning_rate": 7.631137980925385e-06, "loss": 0.6159, "step": 229585 }, { "epoch": 2.54, "learning_rate": 7.630215253786873e-06, "loss": 0.607, "step": 229590 }, { "epoch": 2.54, "learning_rate": 7.62929252664836e-06, "loss": 0.592, "step": 229595 }, { "epoch": 2.54, "learning_rate": 7.628369799509848e-06, "loss": 0.5734, "step": 229600 }, { "epoch": 2.54, "learning_rate": 7.6274470723713355e-06, "loss": 0.5919, "step": 229605 }, { "epoch": 2.54, "learning_rate": 7.626524345232823e-06, "loss": 0.545, "step": 229610 }, { "epoch": 2.54, "learning_rate": 7.62560161809431e-06, "loss": 0.5906, "step": 229615 }, { "epoch": 2.54, "learning_rate": 7.624678890955798e-06, "loss": 0.5851, "step": 229620 }, { "epoch": 2.54, "learning_rate": 7.623756163817286e-06, "loss": 0.5606, "step": 229625 }, { "epoch": 2.54, "learning_rate": 7.622833436678774e-06, "loss": 0.5938, "step": 229630 }, { "epoch": 2.54, "learning_rate": 7.621910709540262e-06, "loss": 0.5443, "step": 229635 }, { "epoch": 2.54, "learning_rate": 7.620987982401748e-06, "loss": 0.5306, "step": 229640 }, { "epoch": 2.54, "learning_rate": 7.620065255263236e-06, "loss": 0.5641, "step": 229645 }, { "epoch": 2.54, "learning_rate": 7.619142528124723e-06, "loss": 0.5538, "step": 229650 }, { "epoch": 2.54, "learning_rate": 7.618219800986211e-06, "loss": 0.5706, "step": 229655 }, { "epoch": 2.54, "learning_rate": 7.6172970738476985e-06, "loss": 0.5886, "step": 229660 }, { "epoch": 2.54, "learning_rate": 7.616374346709186e-06, "loss": 0.5744, "step": 229665 }, { "epoch": 2.54, "learning_rate": 7.615451619570674e-06, "loss": 0.6097, "step": 229670 }, { "epoch": 2.54, "learning_rate": 7.614528892432161e-06, "loss": 0.6053, "step": 229675 }, { "epoch": 2.54, "learning_rate": 7.613606165293649e-06, "loss": 0.5893, "step": 229680 }, { "epoch": 2.54, "learning_rate": 7.612683438155137e-06, "loss": 0.5629, "step": 229685 }, { "epoch": 2.54, "learning_rate": 7.611760711016625e-06, "loss": 0.59, "step": 229690 }, { "epoch": 2.54, "learning_rate": 7.610837983878112e-06, "loss": 0.6419, "step": 229695 }, { "epoch": 2.54, "learning_rate": 7.6099152567395996e-06, "loss": 0.5145, "step": 229700 }, { "epoch": 2.54, "learning_rate": 7.608992529601087e-06, "loss": 0.5756, "step": 229705 }, { "epoch": 2.54, "learning_rate": 7.608069802462575e-06, "loss": 0.6101, "step": 229710 }, { "epoch": 2.54, "learning_rate": 7.6071470753240614e-06, "loss": 0.4954, "step": 229715 }, { "epoch": 2.54, "learning_rate": 7.606224348185549e-06, "loss": 0.5906, "step": 229720 }, { "epoch": 2.54, "learning_rate": 7.605301621047037e-06, "loss": 0.5843, "step": 229725 }, { "epoch": 2.54, "learning_rate": 7.604378893908524e-06, "loss": 0.5738, "step": 229730 }, { "epoch": 2.54, "learning_rate": 7.603456166770012e-06, "loss": 0.5734, "step": 229735 }, { "epoch": 2.54, "learning_rate": 7.6025334396315e-06, "loss": 0.5777, "step": 229740 }, { "epoch": 2.54, "learning_rate": 7.601610712492988e-06, "loss": 0.6247, "step": 229745 }, { "epoch": 2.54, "learning_rate": 7.6006879853544755e-06, "loss": 0.5577, "step": 229750 }, { "epoch": 2.54, "learning_rate": 7.5997652582159625e-06, "loss": 0.6157, "step": 229755 }, { "epoch": 2.54, "learning_rate": 7.59884253107745e-06, "loss": 0.5714, "step": 229760 }, { "epoch": 2.54, "learning_rate": 7.597919803938938e-06, "loss": 0.5639, "step": 229765 }, { "epoch": 2.54, "learning_rate": 7.596997076800426e-06, "loss": 0.5732, "step": 229770 }, { "epoch": 2.54, "learning_rate": 7.596074349661914e-06, "loss": 0.5497, "step": 229775 }, { "epoch": 2.54, "learning_rate": 7.595151622523401e-06, "loss": 0.5952, "step": 229780 }, { "epoch": 2.54, "learning_rate": 7.594228895384889e-06, "loss": 0.5432, "step": 229785 }, { "epoch": 2.54, "learning_rate": 7.593306168246377e-06, "loss": 0.5723, "step": 229790 }, { "epoch": 2.54, "learning_rate": 7.592383441107863e-06, "loss": 0.6354, "step": 229795 }, { "epoch": 2.54, "learning_rate": 7.591460713969351e-06, "loss": 0.5632, "step": 229800 }, { "epoch": 2.54, "learning_rate": 7.5905379868308385e-06, "loss": 0.5213, "step": 229805 }, { "epoch": 2.54, "learning_rate": 7.589615259692326e-06, "loss": 0.6056, "step": 229810 }, { "epoch": 2.54, "learning_rate": 7.588692532553813e-06, "loss": 0.6112, "step": 229815 }, { "epoch": 2.54, "learning_rate": 7.587769805415301e-06, "loss": 0.5778, "step": 229820 }, { "epoch": 2.54, "learning_rate": 7.586847078276789e-06, "loss": 0.5933, "step": 229825 }, { "epoch": 2.54, "learning_rate": 7.585924351138277e-06, "loss": 0.5284, "step": 229830 }, { "epoch": 2.54, "learning_rate": 7.585001623999764e-06, "loss": 0.5668, "step": 229835 }, { "epoch": 2.54, "learning_rate": 7.584078896861252e-06, "loss": 0.6241, "step": 229840 }, { "epoch": 2.55, "learning_rate": 7.5831561697227396e-06, "loss": 0.6016, "step": 229845 }, { "epoch": 2.55, "learning_rate": 7.582233442584227e-06, "loss": 0.5759, "step": 229850 }, { "epoch": 2.55, "learning_rate": 7.581310715445715e-06, "loss": 0.6086, "step": 229855 }, { "epoch": 2.55, "learning_rate": 7.580387988307202e-06, "loss": 0.6145, "step": 229860 }, { "epoch": 2.55, "learning_rate": 7.57946526116869e-06, "loss": 0.5169, "step": 229865 }, { "epoch": 2.55, "learning_rate": 7.578542534030176e-06, "loss": 0.5851, "step": 229870 }, { "epoch": 2.55, "learning_rate": 7.577619806891664e-06, "loss": 0.6207, "step": 229875 }, { "epoch": 2.55, "learning_rate": 7.576697079753152e-06, "loss": 0.5792, "step": 229880 }, { "epoch": 2.55, "learning_rate": 7.57577435261464e-06, "loss": 0.5748, "step": 229885 }, { "epoch": 2.55, "learning_rate": 7.574851625476128e-06, "loss": 0.6063, "step": 229890 }, { "epoch": 2.55, "learning_rate": 7.573928898337615e-06, "loss": 0.5715, "step": 229895 }, { "epoch": 2.55, "learning_rate": 7.5730061711991025e-06, "loss": 0.6207, "step": 229900 }, { "epoch": 2.55, "learning_rate": 7.57208344406059e-06, "loss": 0.5283, "step": 229905 }, { "epoch": 2.55, "learning_rate": 7.571160716922078e-06, "loss": 0.5141, "step": 229910 }, { "epoch": 2.55, "learning_rate": 7.570237989783566e-06, "loss": 0.5901, "step": 229915 }, { "epoch": 2.55, "learning_rate": 7.569315262645053e-06, "loss": 0.5399, "step": 229920 }, { "epoch": 2.55, "learning_rate": 7.568392535506541e-06, "loss": 0.5465, "step": 229925 }, { "epoch": 2.55, "learning_rate": 7.567469808368029e-06, "loss": 0.6284, "step": 229930 }, { "epoch": 2.55, "learning_rate": 7.566547081229517e-06, "loss": 0.5769, "step": 229935 }, { "epoch": 2.55, "learning_rate": 7.5656243540910044e-06, "loss": 0.5387, "step": 229940 }, { "epoch": 2.55, "learning_rate": 7.564701626952491e-06, "loss": 0.6278, "step": 229945 }, { "epoch": 2.55, "learning_rate": 7.563778899813978e-06, "loss": 0.6131, "step": 229950 }, { "epoch": 2.55, "learning_rate": 7.5628561726754655e-06, "loss": 0.5941, "step": 229955 }, { "epoch": 2.55, "learning_rate": 7.561933445536953e-06, "loss": 0.613, "step": 229960 }, { "epoch": 2.55, "learning_rate": 7.561010718398441e-06, "loss": 0.5787, "step": 229965 }, { "epoch": 2.55, "learning_rate": 7.560087991259929e-06, "loss": 0.564, "step": 229970 }, { "epoch": 2.55, "learning_rate": 7.559165264121416e-06, "loss": 0.6303, "step": 229975 }, { "epoch": 2.55, "learning_rate": 7.558242536982904e-06, "loss": 0.5834, "step": 229980 }, { "epoch": 2.55, "learning_rate": 7.557319809844392e-06, "loss": 0.5476, "step": 229985 }, { "epoch": 2.55, "learning_rate": 7.5563970827058795e-06, "loss": 0.6131, "step": 229990 }, { "epoch": 2.55, "learning_rate": 7.555474355567367e-06, "loss": 0.5562, "step": 229995 }, { "epoch": 2.55, "learning_rate": 7.554551628428854e-06, "loss": 0.5888, "step": 230000 }, { "epoch": 2.55, "eval_loss": 0.5627982020378113, "eval_runtime": 69.2736, "eval_samples_per_second": 28.871, "eval_steps_per_second": 14.436, "step": 230000 }, { "epoch": 2.55, "learning_rate": 7.553628901290342e-06, "loss": 0.5454, "step": 230005 }, { "epoch": 2.55, "learning_rate": 7.55270617415183e-06, "loss": 0.6183, "step": 230010 }, { "epoch": 2.55, "learning_rate": 7.551783447013318e-06, "loss": 0.5778, "step": 230015 }, { "epoch": 2.55, "learning_rate": 7.550860719874804e-06, "loss": 0.6106, "step": 230020 }, { "epoch": 2.55, "learning_rate": 7.549937992736292e-06, "loss": 0.598, "step": 230025 }, { "epoch": 2.55, "learning_rate": 7.54901526559778e-06, "loss": 0.5668, "step": 230030 }, { "epoch": 2.55, "learning_rate": 7.548092538459267e-06, "loss": 0.5722, "step": 230035 }, { "epoch": 2.55, "learning_rate": 7.547169811320755e-06, "loss": 0.5673, "step": 230040 }, { "epoch": 2.55, "learning_rate": 7.5462470841822425e-06, "loss": 0.5845, "step": 230045 }, { "epoch": 2.55, "learning_rate": 7.54532435704373e-06, "loss": 0.613, "step": 230050 }, { "epoch": 2.55, "learning_rate": 7.544401629905218e-06, "loss": 0.5471, "step": 230055 }, { "epoch": 2.55, "learning_rate": 7.543478902766705e-06, "loss": 0.5663, "step": 230060 }, { "epoch": 2.55, "learning_rate": 7.542556175628193e-06, "loss": 0.5558, "step": 230065 }, { "epoch": 2.55, "learning_rate": 7.541633448489681e-06, "loss": 0.5868, "step": 230070 }, { "epoch": 2.55, "learning_rate": 7.540710721351169e-06, "loss": 0.5674, "step": 230075 }, { "epoch": 2.55, "learning_rate": 7.539787994212657e-06, "loss": 0.6104, "step": 230080 }, { "epoch": 2.55, "learning_rate": 7.538865267074144e-06, "loss": 0.5912, "step": 230085 }, { "epoch": 2.55, "learning_rate": 7.5379425399356314e-06, "loss": 0.6253, "step": 230090 }, { "epoch": 2.55, "learning_rate": 7.537019812797118e-06, "loss": 0.5606, "step": 230095 }, { "epoch": 2.55, "learning_rate": 7.5360970856586054e-06, "loss": 0.6084, "step": 230100 }, { "epoch": 2.55, "learning_rate": 7.535174358520093e-06, "loss": 0.6091, "step": 230105 }, { "epoch": 2.55, "learning_rate": 7.534251631381581e-06, "loss": 0.6165, "step": 230110 }, { "epoch": 2.55, "learning_rate": 7.533328904243068e-06, "loss": 0.5736, "step": 230115 }, { "epoch": 2.55, "learning_rate": 7.532406177104556e-06, "loss": 0.5814, "step": 230120 }, { "epoch": 2.55, "learning_rate": 7.531483449966044e-06, "loss": 0.5999, "step": 230125 }, { "epoch": 2.55, "learning_rate": 7.530560722827532e-06, "loss": 0.552, "step": 230130 }, { "epoch": 2.55, "learning_rate": 7.5296379956890195e-06, "loss": 0.5404, "step": 230135 }, { "epoch": 2.55, "learning_rate": 7.5287152685505065e-06, "loss": 0.5974, "step": 230140 }, { "epoch": 2.55, "learning_rate": 7.527792541411994e-06, "loss": 0.5996, "step": 230145 }, { "epoch": 2.55, "learning_rate": 7.526869814273482e-06, "loss": 0.6054, "step": 230150 }, { "epoch": 2.55, "learning_rate": 7.52594708713497e-06, "loss": 0.5585, "step": 230155 }, { "epoch": 2.55, "learning_rate": 7.525024359996458e-06, "loss": 0.5411, "step": 230160 }, { "epoch": 2.55, "learning_rate": 7.524101632857945e-06, "loss": 0.5996, "step": 230165 }, { "epoch": 2.55, "learning_rate": 7.523178905719432e-06, "loss": 0.5553, "step": 230170 }, { "epoch": 2.55, "learning_rate": 7.522256178580919e-06, "loss": 0.5261, "step": 230175 }, { "epoch": 2.55, "learning_rate": 7.521333451442407e-06, "loss": 0.5925, "step": 230180 }, { "epoch": 2.55, "learning_rate": 7.520410724303895e-06, "loss": 0.6019, "step": 230185 }, { "epoch": 2.55, "learning_rate": 7.5194879971653825e-06, "loss": 0.6333, "step": 230190 }, { "epoch": 2.55, "learning_rate": 7.51856527002687e-06, "loss": 0.649, "step": 230195 }, { "epoch": 2.55, "learning_rate": 7.517642542888357e-06, "loss": 0.5815, "step": 230200 }, { "epoch": 2.55, "learning_rate": 7.516719815749845e-06, "loss": 0.5896, "step": 230205 }, { "epoch": 2.55, "learning_rate": 7.515797088611333e-06, "loss": 0.546, "step": 230210 }, { "epoch": 2.55, "learning_rate": 7.514874361472821e-06, "loss": 0.5968, "step": 230215 }, { "epoch": 2.55, "learning_rate": 7.513951634334309e-06, "loss": 0.5798, "step": 230220 }, { "epoch": 2.55, "learning_rate": 7.513028907195796e-06, "loss": 0.5891, "step": 230225 }, { "epoch": 2.55, "learning_rate": 7.512106180057284e-06, "loss": 0.5867, "step": 230230 }, { "epoch": 2.55, "learning_rate": 7.5111834529187714e-06, "loss": 0.5381, "step": 230235 }, { "epoch": 2.55, "learning_rate": 7.510260725780259e-06, "loss": 0.6259, "step": 230240 }, { "epoch": 2.55, "learning_rate": 7.509337998641746e-06, "loss": 0.5883, "step": 230245 }, { "epoch": 2.55, "learning_rate": 7.508415271503233e-06, "loss": 0.6369, "step": 230250 }, { "epoch": 2.55, "learning_rate": 7.50749254436472e-06, "loss": 0.6299, "step": 230255 }, { "epoch": 2.55, "learning_rate": 7.506569817226208e-06, "loss": 0.5808, "step": 230260 }, { "epoch": 2.55, "learning_rate": 7.505647090087696e-06, "loss": 0.5721, "step": 230265 }, { "epoch": 2.55, "learning_rate": 7.504724362949184e-06, "loss": 0.5526, "step": 230270 }, { "epoch": 2.55, "learning_rate": 7.503801635810672e-06, "loss": 0.5945, "step": 230275 }, { "epoch": 2.55, "learning_rate": 7.502878908672159e-06, "loss": 0.6194, "step": 230280 }, { "epoch": 2.55, "learning_rate": 7.5019561815336465e-06, "loss": 0.6, "step": 230285 }, { "epoch": 2.55, "learning_rate": 7.501033454395134e-06, "loss": 0.5282, "step": 230290 }, { "epoch": 2.55, "learning_rate": 7.500110727256622e-06, "loss": 0.5879, "step": 230295 }, { "epoch": 2.55, "learning_rate": 7.49918800011811e-06, "loss": 0.6265, "step": 230300 }, { "epoch": 2.55, "learning_rate": 7.498265272979597e-06, "loss": 0.5547, "step": 230305 }, { "epoch": 2.55, "learning_rate": 7.497342545841085e-06, "loss": 0.5817, "step": 230310 }, { "epoch": 2.55, "learning_rate": 7.496419818702573e-06, "loss": 0.5937, "step": 230315 }, { "epoch": 2.55, "learning_rate": 7.495497091564061e-06, "loss": 0.6121, "step": 230320 }, { "epoch": 2.55, "learning_rate": 7.494574364425547e-06, "loss": 0.5884, "step": 230325 }, { "epoch": 2.55, "learning_rate": 7.493651637287035e-06, "loss": 0.5675, "step": 230330 }, { "epoch": 2.55, "learning_rate": 7.4927289101485225e-06, "loss": 0.6093, "step": 230335 }, { "epoch": 2.55, "learning_rate": 7.4918061830100095e-06, "loss": 0.5899, "step": 230340 }, { "epoch": 2.55, "learning_rate": 7.490883455871497e-06, "loss": 0.5727, "step": 230345 }, { "epoch": 2.55, "learning_rate": 7.489960728732985e-06, "loss": 0.621, "step": 230350 }, { "epoch": 2.55, "learning_rate": 7.489038001594473e-06, "loss": 0.5726, "step": 230355 }, { "epoch": 2.55, "learning_rate": 7.488115274455961e-06, "loss": 0.6384, "step": 230360 }, { "epoch": 2.55, "learning_rate": 7.487192547317448e-06, "loss": 0.6047, "step": 230365 }, { "epoch": 2.55, "learning_rate": 7.486269820178936e-06, "loss": 0.6046, "step": 230370 }, { "epoch": 2.55, "learning_rate": 7.4853470930404236e-06, "loss": 0.5791, "step": 230375 }, { "epoch": 2.55, "learning_rate": 7.484424365901911e-06, "loss": 0.565, "step": 230380 }, { "epoch": 2.55, "learning_rate": 7.483501638763398e-06, "loss": 0.5752, "step": 230385 }, { "epoch": 2.55, "learning_rate": 7.482578911624886e-06, "loss": 0.5407, "step": 230390 }, { "epoch": 2.55, "learning_rate": 7.481656184486374e-06, "loss": 0.5648, "step": 230395 }, { "epoch": 2.55, "learning_rate": 7.48073345734786e-06, "loss": 0.5657, "step": 230400 }, { "epoch": 2.55, "learning_rate": 7.479810730209348e-06, "loss": 0.5864, "step": 230405 }, { "epoch": 2.55, "learning_rate": 7.478888003070836e-06, "loss": 0.5745, "step": 230410 }, { "epoch": 2.55, "learning_rate": 7.477965275932324e-06, "loss": 0.606, "step": 230415 }, { "epoch": 2.55, "learning_rate": 7.477042548793811e-06, "loss": 0.5756, "step": 230420 }, { "epoch": 2.55, "learning_rate": 7.476119821655299e-06, "loss": 0.5703, "step": 230425 }, { "epoch": 2.55, "learning_rate": 7.4751970945167865e-06, "loss": 0.6244, "step": 230430 }, { "epoch": 2.55, "learning_rate": 7.474274367378274e-06, "loss": 0.6018, "step": 230435 }, { "epoch": 2.55, "learning_rate": 7.473351640239762e-06, "loss": 0.5411, "step": 230440 }, { "epoch": 2.55, "learning_rate": 7.472428913101249e-06, "loss": 0.5816, "step": 230445 }, { "epoch": 2.55, "learning_rate": 7.471506185962737e-06, "loss": 0.585, "step": 230450 }, { "epoch": 2.55, "learning_rate": 7.470583458824225e-06, "loss": 0.602, "step": 230455 }, { "epoch": 2.55, "learning_rate": 7.469660731685713e-06, "loss": 0.627, "step": 230460 }, { "epoch": 2.55, "learning_rate": 7.468738004547201e-06, "loss": 0.5898, "step": 230465 }, { "epoch": 2.55, "learning_rate": 7.467815277408688e-06, "loss": 0.6624, "step": 230470 }, { "epoch": 2.55, "learning_rate": 7.466892550270175e-06, "loss": 0.5512, "step": 230475 }, { "epoch": 2.55, "learning_rate": 7.465969823131662e-06, "loss": 0.5564, "step": 230480 }, { "epoch": 2.55, "learning_rate": 7.4650470959931495e-06, "loss": 0.6126, "step": 230485 }, { "epoch": 2.55, "learning_rate": 7.464124368854637e-06, "loss": 0.5796, "step": 230490 }, { "epoch": 2.55, "learning_rate": 7.463201641716125e-06, "loss": 0.5665, "step": 230495 }, { "epoch": 2.55, "learning_rate": 7.462278914577612e-06, "loss": 0.6032, "step": 230500 }, { "epoch": 2.55, "learning_rate": 7.4613561874391e-06, "loss": 0.5668, "step": 230505 }, { "epoch": 2.55, "learning_rate": 7.460433460300588e-06, "loss": 0.5758, "step": 230510 }, { "epoch": 2.55, "learning_rate": 7.459510733162076e-06, "loss": 0.5539, "step": 230515 }, { "epoch": 2.55, "learning_rate": 7.4585880060235636e-06, "loss": 0.5962, "step": 230520 }, { "epoch": 2.55, "learning_rate": 7.4576652788850506e-06, "loss": 0.5918, "step": 230525 }, { "epoch": 2.55, "learning_rate": 7.456742551746538e-06, "loss": 0.5734, "step": 230530 }, { "epoch": 2.55, "learning_rate": 7.455819824608026e-06, "loss": 0.5214, "step": 230535 }, { "epoch": 2.55, "learning_rate": 7.454897097469514e-06, "loss": 0.5725, "step": 230540 }, { "epoch": 2.55, "learning_rate": 7.453974370331002e-06, "loss": 0.6242, "step": 230545 }, { "epoch": 2.55, "learning_rate": 7.453051643192488e-06, "loss": 0.6277, "step": 230550 }, { "epoch": 2.55, "learning_rate": 7.452128916053976e-06, "loss": 0.5748, "step": 230555 }, { "epoch": 2.55, "learning_rate": 7.451206188915463e-06, "loss": 0.5206, "step": 230560 }, { "epoch": 2.55, "learning_rate": 7.450283461776951e-06, "loss": 0.6434, "step": 230565 }, { "epoch": 2.55, "learning_rate": 7.449360734638439e-06, "loss": 0.5867, "step": 230570 }, { "epoch": 2.55, "learning_rate": 7.4484380074999265e-06, "loss": 0.5853, "step": 230575 }, { "epoch": 2.55, "learning_rate": 7.447515280361414e-06, "loss": 0.5423, "step": 230580 }, { "epoch": 2.55, "learning_rate": 7.446592553222901e-06, "loss": 0.5921, "step": 230585 }, { "epoch": 2.55, "learning_rate": 7.445669826084389e-06, "loss": 0.5996, "step": 230590 }, { "epoch": 2.55, "learning_rate": 7.444747098945877e-06, "loss": 0.54, "step": 230595 }, { "epoch": 2.55, "learning_rate": 7.443824371807365e-06, "loss": 0.5657, "step": 230600 }, { "epoch": 2.55, "learning_rate": 7.442901644668853e-06, "loss": 0.578, "step": 230605 }, { "epoch": 2.55, "learning_rate": 7.44197891753034e-06, "loss": 0.589, "step": 230610 }, { "epoch": 2.55, "learning_rate": 7.441056190391828e-06, "loss": 0.5859, "step": 230615 }, { "epoch": 2.55, "learning_rate": 7.4401334632533154e-06, "loss": 0.5423, "step": 230620 }, { "epoch": 2.55, "learning_rate": 7.439210736114803e-06, "loss": 0.6023, "step": 230625 }, { "epoch": 2.55, "learning_rate": 7.4382880089762895e-06, "loss": 0.5625, "step": 230630 }, { "epoch": 2.55, "learning_rate": 7.437365281837777e-06, "loss": 0.5658, "step": 230635 }, { "epoch": 2.55, "learning_rate": 7.436442554699264e-06, "loss": 0.5999, "step": 230640 }, { "epoch": 2.55, "learning_rate": 7.435519827560752e-06, "loss": 0.591, "step": 230645 }, { "epoch": 2.55, "learning_rate": 7.43459710042224e-06, "loss": 0.5625, "step": 230650 }, { "epoch": 2.55, "learning_rate": 7.433674373283728e-06, "loss": 0.5948, "step": 230655 }, { "epoch": 2.55, "learning_rate": 7.432751646145216e-06, "loss": 0.6076, "step": 230660 }, { "epoch": 2.55, "learning_rate": 7.431828919006703e-06, "loss": 0.5468, "step": 230665 }, { "epoch": 2.55, "learning_rate": 7.4309061918681905e-06, "loss": 0.5439, "step": 230670 }, { "epoch": 2.55, "learning_rate": 7.429983464729678e-06, "loss": 0.6156, "step": 230675 }, { "epoch": 2.55, "learning_rate": 7.429060737591166e-06, "loss": 0.6582, "step": 230680 }, { "epoch": 2.55, "learning_rate": 7.428138010452654e-06, "loss": 0.565, "step": 230685 }, { "epoch": 2.55, "learning_rate": 7.427215283314141e-06, "loss": 0.591, "step": 230690 }, { "epoch": 2.55, "learning_rate": 7.426292556175629e-06, "loss": 0.5268, "step": 230695 }, { "epoch": 2.55, "learning_rate": 7.425369829037117e-06, "loss": 0.6115, "step": 230700 }, { "epoch": 2.55, "learning_rate": 7.424447101898603e-06, "loss": 0.5208, "step": 230705 }, { "epoch": 2.55, "learning_rate": 7.423524374760091e-06, "loss": 0.5982, "step": 230710 }, { "epoch": 2.55, "learning_rate": 7.422601647621579e-06, "loss": 0.5875, "step": 230715 }, { "epoch": 2.55, "learning_rate": 7.4216789204830665e-06, "loss": 0.5837, "step": 230720 }, { "epoch": 2.55, "learning_rate": 7.4207561933445535e-06, "loss": 0.5721, "step": 230725 }, { "epoch": 2.55, "learning_rate": 7.419833466206041e-06, "loss": 0.5491, "step": 230730 }, { "epoch": 2.55, "learning_rate": 7.418910739067529e-06, "loss": 0.6144, "step": 230735 }, { "epoch": 2.55, "learning_rate": 7.417988011929017e-06, "loss": 0.6359, "step": 230740 }, { "epoch": 2.55, "learning_rate": 7.417065284790505e-06, "loss": 0.5872, "step": 230745 }, { "epoch": 2.56, "learning_rate": 7.416142557651992e-06, "loss": 0.5749, "step": 230750 }, { "epoch": 2.56, "learning_rate": 7.41521983051348e-06, "loss": 0.5958, "step": 230755 }, { "epoch": 2.56, "learning_rate": 7.414297103374968e-06, "loss": 0.5914, "step": 230760 }, { "epoch": 2.56, "learning_rate": 7.4133743762364554e-06, "loss": 0.5556, "step": 230765 }, { "epoch": 2.56, "learning_rate": 7.412451649097943e-06, "loss": 0.6014, "step": 230770 }, { "epoch": 2.56, "learning_rate": 7.41152892195943e-06, "loss": 0.5709, "step": 230775 }, { "epoch": 2.56, "learning_rate": 7.4106061948209164e-06, "loss": 0.5538, "step": 230780 }, { "epoch": 2.56, "learning_rate": 7.409683467682404e-06, "loss": 0.562, "step": 230785 }, { "epoch": 2.56, "learning_rate": 7.408760740543892e-06, "loss": 0.6154, "step": 230790 }, { "epoch": 2.56, "learning_rate": 7.40783801340538e-06, "loss": 0.6, "step": 230795 }, { "epoch": 2.56, "learning_rate": 7.406915286266868e-06, "loss": 0.5942, "step": 230800 }, { "epoch": 2.56, "learning_rate": 7.405992559128355e-06, "loss": 0.5777, "step": 230805 }, { "epoch": 2.56, "learning_rate": 7.405069831989843e-06, "loss": 0.5564, "step": 230810 }, { "epoch": 2.56, "learning_rate": 7.4041471048513305e-06, "loss": 0.6357, "step": 230815 }, { "epoch": 2.56, "learning_rate": 7.403224377712818e-06, "loss": 0.591, "step": 230820 }, { "epoch": 2.56, "learning_rate": 7.402301650574306e-06, "loss": 0.589, "step": 230825 }, { "epoch": 2.56, "learning_rate": 7.401378923435793e-06, "loss": 0.5412, "step": 230830 }, { "epoch": 2.56, "learning_rate": 7.400456196297281e-06, "loss": 0.5693, "step": 230835 }, { "epoch": 2.56, "learning_rate": 7.399533469158769e-06, "loss": 0.5931, "step": 230840 }, { "epoch": 2.56, "learning_rate": 7.398610742020257e-06, "loss": 0.5411, "step": 230845 }, { "epoch": 2.56, "learning_rate": 7.397688014881745e-06, "loss": 0.5879, "step": 230850 }, { "epoch": 2.56, "learning_rate": 7.396765287743231e-06, "loss": 0.5558, "step": 230855 }, { "epoch": 2.56, "learning_rate": 7.395842560604719e-06, "loss": 0.592, "step": 230860 }, { "epoch": 2.56, "learning_rate": 7.394919833466206e-06, "loss": 0.5572, "step": 230865 }, { "epoch": 2.56, "learning_rate": 7.3939971063276935e-06, "loss": 0.5738, "step": 230870 }, { "epoch": 2.56, "learning_rate": 7.393074379189181e-06, "loss": 0.5871, "step": 230875 }, { "epoch": 2.56, "learning_rate": 7.392151652050669e-06, "loss": 0.5704, "step": 230880 }, { "epoch": 2.56, "learning_rate": 7.391228924912157e-06, "loss": 0.5783, "step": 230885 }, { "epoch": 2.56, "learning_rate": 7.390306197773644e-06, "loss": 0.6032, "step": 230890 }, { "epoch": 2.56, "learning_rate": 7.389383470635132e-06, "loss": 0.5703, "step": 230895 }, { "epoch": 2.56, "learning_rate": 7.38846074349662e-06, "loss": 0.5931, "step": 230900 }, { "epoch": 2.56, "learning_rate": 7.3875380163581076e-06, "loss": 0.5416, "step": 230905 }, { "epoch": 2.56, "learning_rate": 7.3866152892195954e-06, "loss": 0.6009, "step": 230910 }, { "epoch": 2.56, "learning_rate": 7.3856925620810824e-06, "loss": 0.5328, "step": 230915 }, { "epoch": 2.56, "learning_rate": 7.38476983494257e-06, "loss": 0.5805, "step": 230920 }, { "epoch": 2.56, "learning_rate": 7.383847107804058e-06, "loss": 0.6016, "step": 230925 }, { "epoch": 2.56, "learning_rate": 7.382924380665544e-06, "loss": 0.631, "step": 230930 }, { "epoch": 2.56, "learning_rate": 7.382001653527032e-06, "loss": 0.5784, "step": 230935 }, { "epoch": 2.56, "learning_rate": 7.38107892638852e-06, "loss": 0.5572, "step": 230940 }, { "epoch": 2.56, "learning_rate": 7.380156199250007e-06, "loss": 0.5757, "step": 230945 }, { "epoch": 2.56, "learning_rate": 7.379233472111495e-06, "loss": 0.5925, "step": 230950 }, { "epoch": 2.56, "learning_rate": 7.378310744972983e-06, "loss": 0.5722, "step": 230955 }, { "epoch": 2.56, "learning_rate": 7.3773880178344705e-06, "loss": 0.5885, "step": 230960 }, { "epoch": 2.56, "learning_rate": 7.376465290695958e-06, "loss": 0.5892, "step": 230965 }, { "epoch": 2.56, "learning_rate": 7.375542563557445e-06, "loss": 0.5772, "step": 230970 }, { "epoch": 2.56, "learning_rate": 7.374619836418933e-06, "loss": 0.6182, "step": 230975 }, { "epoch": 2.56, "learning_rate": 7.373697109280421e-06, "loss": 0.5526, "step": 230980 }, { "epoch": 2.56, "learning_rate": 7.372774382141909e-06, "loss": 0.567, "step": 230985 }, { "epoch": 2.56, "learning_rate": 7.371851655003397e-06, "loss": 0.5839, "step": 230990 }, { "epoch": 2.56, "learning_rate": 7.370928927864884e-06, "loss": 0.5347, "step": 230995 }, { "epoch": 2.56, "learning_rate": 7.370006200726372e-06, "loss": 0.6389, "step": 231000 }, { "epoch": 2.56, "eval_loss": 0.5827787518501282, "eval_runtime": 69.3727, "eval_samples_per_second": 28.83, "eval_steps_per_second": 14.415, "step": 231000 }, { "epoch": 2.56, "learning_rate": 7.369083473587858e-06, "loss": 0.5829, "step": 231005 }, { "epoch": 2.56, "learning_rate": 7.368160746449346e-06, "loss": 0.5672, "step": 231010 }, { "epoch": 2.56, "learning_rate": 7.3672380193108335e-06, "loss": 0.5749, "step": 231015 }, { "epoch": 2.56, "learning_rate": 7.366315292172321e-06, "loss": 0.5691, "step": 231020 }, { "epoch": 2.56, "learning_rate": 7.365392565033809e-06, "loss": 0.5879, "step": 231025 }, { "epoch": 2.56, "learning_rate": 7.364469837895296e-06, "loss": 0.5324, "step": 231030 }, { "epoch": 2.56, "learning_rate": 7.363547110756784e-06, "loss": 0.5202, "step": 231035 }, { "epoch": 2.56, "learning_rate": 7.362624383618272e-06, "loss": 0.5477, "step": 231040 }, { "epoch": 2.56, "learning_rate": 7.36170165647976e-06, "loss": 0.6147, "step": 231045 }, { "epoch": 2.56, "learning_rate": 7.360778929341247e-06, "loss": 0.5465, "step": 231050 }, { "epoch": 2.56, "learning_rate": 7.3598562022027346e-06, "loss": 0.5454, "step": 231055 }, { "epoch": 2.56, "learning_rate": 7.358933475064222e-06, "loss": 0.6049, "step": 231060 }, { "epoch": 2.56, "learning_rate": 7.35801074792571e-06, "loss": 0.5675, "step": 231065 }, { "epoch": 2.56, "learning_rate": 7.357088020787198e-06, "loss": 0.6179, "step": 231070 }, { "epoch": 2.56, "learning_rate": 7.356165293648685e-06, "loss": 0.616, "step": 231075 }, { "epoch": 2.56, "learning_rate": 7.355242566510173e-06, "loss": 0.6122, "step": 231080 }, { "epoch": 2.56, "learning_rate": 7.354319839371659e-06, "loss": 0.5875, "step": 231085 }, { "epoch": 2.56, "learning_rate": 7.353397112233147e-06, "loss": 0.6082, "step": 231090 }, { "epoch": 2.56, "learning_rate": 7.352474385094635e-06, "loss": 0.596, "step": 231095 }, { "epoch": 2.56, "learning_rate": 7.351551657956123e-06, "loss": 0.6053, "step": 231100 }, { "epoch": 2.56, "learning_rate": 7.3506289308176105e-06, "loss": 0.621, "step": 231105 }, { "epoch": 2.56, "learning_rate": 7.3497062036790975e-06, "loss": 0.6354, "step": 231110 }, { "epoch": 2.56, "learning_rate": 7.348783476540585e-06, "loss": 0.5704, "step": 231115 }, { "epoch": 2.56, "learning_rate": 7.347860749402073e-06, "loss": 0.6645, "step": 231120 }, { "epoch": 2.56, "learning_rate": 7.346938022263561e-06, "loss": 0.5844, "step": 231125 }, { "epoch": 2.56, "learning_rate": 7.346015295125049e-06, "loss": 0.6339, "step": 231130 }, { "epoch": 2.56, "learning_rate": 7.345092567986536e-06, "loss": 0.6135, "step": 231135 }, { "epoch": 2.56, "learning_rate": 7.344169840848024e-06, "loss": 0.6005, "step": 231140 }, { "epoch": 2.56, "learning_rate": 7.343247113709512e-06, "loss": 0.6124, "step": 231145 }, { "epoch": 2.56, "learning_rate": 7.3423243865709995e-06, "loss": 0.5424, "step": 231150 }, { "epoch": 2.56, "learning_rate": 7.341401659432487e-06, "loss": 0.5482, "step": 231155 }, { "epoch": 2.56, "learning_rate": 7.3404789322939735e-06, "loss": 0.604, "step": 231160 }, { "epoch": 2.56, "learning_rate": 7.339556205155461e-06, "loss": 0.5385, "step": 231165 }, { "epoch": 2.56, "learning_rate": 7.338633478016948e-06, "loss": 0.636, "step": 231170 }, { "epoch": 2.56, "learning_rate": 7.337710750878436e-06, "loss": 0.6004, "step": 231175 }, { "epoch": 2.56, "learning_rate": 7.336788023739924e-06, "loss": 0.5425, "step": 231180 }, { "epoch": 2.56, "learning_rate": 7.335865296601412e-06, "loss": 0.5514, "step": 231185 }, { "epoch": 2.56, "learning_rate": 7.334942569462899e-06, "loss": 0.5698, "step": 231190 }, { "epoch": 2.56, "learning_rate": 7.334019842324387e-06, "loss": 0.5823, "step": 231195 }, { "epoch": 2.56, "learning_rate": 7.3330971151858746e-06, "loss": 0.5928, "step": 231200 }, { "epoch": 2.56, "learning_rate": 7.332174388047362e-06, "loss": 0.5345, "step": 231205 }, { "epoch": 2.56, "learning_rate": 7.33125166090885e-06, "loss": 0.6039, "step": 231210 }, { "epoch": 2.56, "learning_rate": 7.330328933770337e-06, "loss": 0.5985, "step": 231215 }, { "epoch": 2.56, "learning_rate": 7.329406206631825e-06, "loss": 0.5616, "step": 231220 }, { "epoch": 2.56, "learning_rate": 7.328483479493313e-06, "loss": 0.5648, "step": 231225 }, { "epoch": 2.56, "learning_rate": 7.327560752354801e-06, "loss": 0.5964, "step": 231230 }, { "epoch": 2.56, "learning_rate": 7.326638025216287e-06, "loss": 0.5491, "step": 231235 }, { "epoch": 2.56, "learning_rate": 7.325715298077775e-06, "loss": 0.5774, "step": 231240 }, { "epoch": 2.56, "learning_rate": 7.324792570939263e-06, "loss": 0.5216, "step": 231245 }, { "epoch": 2.56, "learning_rate": 7.32386984380075e-06, "loss": 0.6159, "step": 231250 }, { "epoch": 2.56, "learning_rate": 7.3229471166622375e-06, "loss": 0.5805, "step": 231255 }, { "epoch": 2.56, "learning_rate": 7.322024389523725e-06, "loss": 0.5203, "step": 231260 }, { "epoch": 2.56, "learning_rate": 7.321101662385213e-06, "loss": 0.6048, "step": 231265 }, { "epoch": 2.56, "learning_rate": 7.320178935246701e-06, "loss": 0.5847, "step": 231270 }, { "epoch": 2.56, "learning_rate": 7.319256208108188e-06, "loss": 0.604, "step": 231275 }, { "epoch": 2.56, "learning_rate": 7.318333480969676e-06, "loss": 0.6136, "step": 231280 }, { "epoch": 2.56, "learning_rate": 7.317410753831164e-06, "loss": 0.6004, "step": 231285 }, { "epoch": 2.56, "learning_rate": 7.316488026692652e-06, "loss": 0.5562, "step": 231290 }, { "epoch": 2.56, "learning_rate": 7.3155652995541394e-06, "loss": 0.5767, "step": 231295 }, { "epoch": 2.56, "learning_rate": 7.3146425724156264e-06, "loss": 0.5777, "step": 231300 }, { "epoch": 2.56, "learning_rate": 7.313719845277114e-06, "loss": 0.5901, "step": 231305 }, { "epoch": 2.56, "learning_rate": 7.3127971181386004e-06, "loss": 0.5811, "step": 231310 }, { "epoch": 2.56, "learning_rate": 7.311874391000088e-06, "loss": 0.5872, "step": 231315 }, { "epoch": 2.56, "learning_rate": 7.310951663861576e-06, "loss": 0.5945, "step": 231320 }, { "epoch": 2.56, "learning_rate": 7.310028936723064e-06, "loss": 0.5242, "step": 231325 }, { "epoch": 2.56, "learning_rate": 7.309106209584551e-06, "loss": 0.6194, "step": 231330 }, { "epoch": 2.56, "learning_rate": 7.308183482446039e-06, "loss": 0.6162, "step": 231335 }, { "epoch": 2.56, "learning_rate": 7.307260755307527e-06, "loss": 0.559, "step": 231340 }, { "epoch": 2.56, "learning_rate": 7.3063380281690145e-06, "loss": 0.5408, "step": 231345 }, { "epoch": 2.56, "learning_rate": 7.305415301030502e-06, "loss": 0.6456, "step": 231350 }, { "epoch": 2.56, "learning_rate": 7.304492573891989e-06, "loss": 0.5867, "step": 231355 }, { "epoch": 2.56, "learning_rate": 7.303569846753477e-06, "loss": 0.5657, "step": 231360 }, { "epoch": 2.56, "learning_rate": 7.302647119614965e-06, "loss": 0.5644, "step": 231365 }, { "epoch": 2.56, "learning_rate": 7.301724392476453e-06, "loss": 0.5767, "step": 231370 }, { "epoch": 2.56, "learning_rate": 7.300801665337941e-06, "loss": 0.5922, "step": 231375 }, { "epoch": 2.56, "learning_rate": 7.299878938199428e-06, "loss": 0.5405, "step": 231380 }, { "epoch": 2.56, "learning_rate": 7.298956211060915e-06, "loss": 0.6185, "step": 231385 }, { "epoch": 2.56, "learning_rate": 7.298033483922402e-06, "loss": 0.5949, "step": 231390 }, { "epoch": 2.56, "learning_rate": 7.29711075678389e-06, "loss": 0.554, "step": 231395 }, { "epoch": 2.56, "learning_rate": 7.2961880296453775e-06, "loss": 0.5604, "step": 231400 }, { "epoch": 2.56, "learning_rate": 7.295265302506865e-06, "loss": 0.6521, "step": 231405 }, { "epoch": 2.56, "learning_rate": 7.294342575368353e-06, "loss": 0.6074, "step": 231410 }, { "epoch": 2.56, "learning_rate": 7.29341984822984e-06, "loss": 0.5985, "step": 231415 }, { "epoch": 2.56, "learning_rate": 7.292497121091328e-06, "loss": 0.6894, "step": 231420 }, { "epoch": 2.56, "learning_rate": 7.291574393952816e-06, "loss": 0.6222, "step": 231425 }, { "epoch": 2.56, "learning_rate": 7.290651666814304e-06, "loss": 0.5929, "step": 231430 }, { "epoch": 2.56, "learning_rate": 7.289728939675792e-06, "loss": 0.5829, "step": 231435 }, { "epoch": 2.56, "learning_rate": 7.288806212537279e-06, "loss": 0.5769, "step": 231440 }, { "epoch": 2.56, "learning_rate": 7.2878834853987664e-06, "loss": 0.5654, "step": 231445 }, { "epoch": 2.56, "learning_rate": 7.286960758260254e-06, "loss": 0.6009, "step": 231450 }, { "epoch": 2.56, "learning_rate": 7.286038031121742e-06, "loss": 0.5975, "step": 231455 }, { "epoch": 2.56, "learning_rate": 7.285115303983228e-06, "loss": 0.5495, "step": 231460 }, { "epoch": 2.56, "learning_rate": 7.284192576844716e-06, "loss": 0.6242, "step": 231465 }, { "epoch": 2.56, "learning_rate": 7.283269849706203e-06, "loss": 0.5823, "step": 231470 }, { "epoch": 2.56, "learning_rate": 7.282347122567691e-06, "loss": 0.552, "step": 231475 }, { "epoch": 2.56, "learning_rate": 7.281424395429179e-06, "loss": 0.5779, "step": 231480 }, { "epoch": 2.56, "learning_rate": 7.280501668290667e-06, "loss": 0.5209, "step": 231485 }, { "epoch": 2.56, "learning_rate": 7.2795789411521545e-06, "loss": 0.5405, "step": 231490 }, { "epoch": 2.56, "learning_rate": 7.2786562140136415e-06, "loss": 0.5752, "step": 231495 }, { "epoch": 2.56, "learning_rate": 7.277733486875129e-06, "loss": 0.6522, "step": 231500 }, { "epoch": 2.56, "learning_rate": 7.276810759736617e-06, "loss": 0.5744, "step": 231505 }, { "epoch": 2.56, "learning_rate": 7.275888032598105e-06, "loss": 0.5965, "step": 231510 }, { "epoch": 2.56, "learning_rate": 7.274965305459593e-06, "loss": 0.579, "step": 231515 }, { "epoch": 2.56, "learning_rate": 7.27404257832108e-06, "loss": 0.5273, "step": 231520 }, { "epoch": 2.56, "learning_rate": 7.273119851182568e-06, "loss": 0.5699, "step": 231525 }, { "epoch": 2.56, "learning_rate": 7.272197124044056e-06, "loss": 0.5492, "step": 231530 }, { "epoch": 2.56, "learning_rate": 7.2712743969055435e-06, "loss": 0.5959, "step": 231535 }, { "epoch": 2.56, "learning_rate": 7.27035166976703e-06, "loss": 0.5586, "step": 231540 }, { "epoch": 2.56, "learning_rate": 7.2694289426285175e-06, "loss": 0.6252, "step": 231545 }, { "epoch": 2.56, "learning_rate": 7.268506215490005e-06, "loss": 0.582, "step": 231550 }, { "epoch": 2.56, "learning_rate": 7.267583488351492e-06, "loss": 0.6048, "step": 231555 }, { "epoch": 2.56, "learning_rate": 7.26666076121298e-06, "loss": 0.567, "step": 231560 }, { "epoch": 2.56, "learning_rate": 7.265738034074468e-06, "loss": 0.5575, "step": 231565 }, { "epoch": 2.56, "learning_rate": 7.264815306935956e-06, "loss": 0.5953, "step": 231570 }, { "epoch": 2.56, "learning_rate": 7.263892579797444e-06, "loss": 0.5817, "step": 231575 }, { "epoch": 2.56, "learning_rate": 7.262969852658931e-06, "loss": 0.5702, "step": 231580 }, { "epoch": 2.56, "learning_rate": 7.2620471255204186e-06, "loss": 0.5645, "step": 231585 }, { "epoch": 2.56, "learning_rate": 7.261124398381906e-06, "loss": 0.564, "step": 231590 }, { "epoch": 2.56, "learning_rate": 7.260201671243394e-06, "loss": 0.5962, "step": 231595 }, { "epoch": 2.56, "learning_rate": 7.259278944104881e-06, "loss": 0.5649, "step": 231600 }, { "epoch": 2.56, "learning_rate": 7.258356216966369e-06, "loss": 0.6143, "step": 231605 }, { "epoch": 2.56, "learning_rate": 7.257433489827857e-06, "loss": 0.5474, "step": 231610 }, { "epoch": 2.56, "learning_rate": 7.256510762689343e-06, "loss": 0.5669, "step": 231615 }, { "epoch": 2.56, "learning_rate": 7.255588035550831e-06, "loss": 0.5985, "step": 231620 }, { "epoch": 2.56, "learning_rate": 7.254665308412319e-06, "loss": 0.6051, "step": 231625 }, { "epoch": 2.56, "learning_rate": 7.253742581273807e-06, "loss": 0.6059, "step": 231630 }, { "epoch": 2.56, "learning_rate": 7.252819854135294e-06, "loss": 0.5776, "step": 231635 }, { "epoch": 2.56, "learning_rate": 7.2518971269967815e-06, "loss": 0.5785, "step": 231640 }, { "epoch": 2.56, "learning_rate": 7.250974399858269e-06, "loss": 0.6079, "step": 231645 }, { "epoch": 2.56, "learning_rate": 7.250051672719757e-06, "loss": 0.6566, "step": 231650 }, { "epoch": 2.57, "learning_rate": 7.249128945581245e-06, "loss": 0.5683, "step": 231655 }, { "epoch": 2.57, "learning_rate": 7.248206218442732e-06, "loss": 0.594, "step": 231660 }, { "epoch": 2.57, "learning_rate": 7.24728349130422e-06, "loss": 0.5407, "step": 231665 }, { "epoch": 2.57, "learning_rate": 7.246360764165708e-06, "loss": 0.5579, "step": 231670 }, { "epoch": 2.57, "learning_rate": 7.245438037027196e-06, "loss": 0.6433, "step": 231675 }, { "epoch": 2.57, "learning_rate": 7.2445153098886835e-06, "loss": 0.593, "step": 231680 }, { "epoch": 2.57, "learning_rate": 7.2435925827501705e-06, "loss": 0.5402, "step": 231685 }, { "epoch": 2.57, "learning_rate": 7.2426698556116575e-06, "loss": 0.5836, "step": 231690 }, { "epoch": 2.57, "learning_rate": 7.2417471284731445e-06, "loss": 0.6002, "step": 231695 }, { "epoch": 2.57, "learning_rate": 7.240824401334632e-06, "loss": 0.5832, "step": 231700 }, { "epoch": 2.57, "learning_rate": 7.23990167419612e-06, "loss": 0.604, "step": 231705 }, { "epoch": 2.57, "learning_rate": 7.238978947057608e-06, "loss": 0.6575, "step": 231710 }, { "epoch": 2.57, "learning_rate": 7.238056219919095e-06, "loss": 0.5852, "step": 231715 }, { "epoch": 2.57, "learning_rate": 7.237133492780583e-06, "loss": 0.5451, "step": 231720 }, { "epoch": 2.57, "learning_rate": 7.236210765642071e-06, "loss": 0.5457, "step": 231725 }, { "epoch": 2.57, "learning_rate": 7.2352880385035586e-06, "loss": 0.5857, "step": 231730 }, { "epoch": 2.57, "learning_rate": 7.234365311365046e-06, "loss": 0.5833, "step": 231735 }, { "epoch": 2.57, "learning_rate": 7.233442584226533e-06, "loss": 0.6122, "step": 231740 }, { "epoch": 2.57, "learning_rate": 7.232519857088021e-06, "loss": 0.6051, "step": 231745 }, { "epoch": 2.57, "learning_rate": 7.231597129949509e-06, "loss": 0.5745, "step": 231750 }, { "epoch": 2.57, "learning_rate": 7.230674402810997e-06, "loss": 0.5598, "step": 231755 }, { "epoch": 2.57, "learning_rate": 7.229751675672485e-06, "loss": 0.5622, "step": 231760 }, { "epoch": 2.57, "learning_rate": 7.228828948533971e-06, "loss": 0.6037, "step": 231765 }, { "epoch": 2.57, "learning_rate": 7.227906221395459e-06, "loss": 0.5929, "step": 231770 }, { "epoch": 2.57, "learning_rate": 7.226983494256946e-06, "loss": 0.5893, "step": 231775 }, { "epoch": 2.57, "learning_rate": 7.226060767118434e-06, "loss": 0.5325, "step": 231780 }, { "epoch": 2.57, "learning_rate": 7.2251380399799215e-06, "loss": 0.5585, "step": 231785 }, { "epoch": 2.57, "learning_rate": 7.224215312841409e-06, "loss": 0.5279, "step": 231790 }, { "epoch": 2.57, "learning_rate": 7.223292585702897e-06, "loss": 0.5637, "step": 231795 }, { "epoch": 2.57, "learning_rate": 7.222369858564384e-06, "loss": 0.5655, "step": 231800 }, { "epoch": 2.57, "learning_rate": 7.221447131425872e-06, "loss": 0.5949, "step": 231805 }, { "epoch": 2.57, "learning_rate": 7.22052440428736e-06, "loss": 0.5868, "step": 231810 }, { "epoch": 2.57, "learning_rate": 7.219601677148848e-06, "loss": 0.5417, "step": 231815 }, { "epoch": 2.57, "learning_rate": 7.218678950010336e-06, "loss": 0.5543, "step": 231820 }, { "epoch": 2.57, "learning_rate": 7.217756222871823e-06, "loss": 0.5802, "step": 231825 }, { "epoch": 2.57, "learning_rate": 7.2168334957333105e-06, "loss": 0.6278, "step": 231830 }, { "epoch": 2.57, "learning_rate": 7.215910768594798e-06, "loss": 0.6143, "step": 231835 }, { "epoch": 2.57, "learning_rate": 7.2149880414562845e-06, "loss": 0.6639, "step": 231840 }, { "epoch": 2.57, "learning_rate": 7.214065314317772e-06, "loss": 0.5854, "step": 231845 }, { "epoch": 2.57, "learning_rate": 7.21314258717926e-06, "loss": 0.6129, "step": 231850 }, { "epoch": 2.57, "learning_rate": 7.212219860040747e-06, "loss": 0.5818, "step": 231855 }, { "epoch": 2.57, "learning_rate": 7.211297132902235e-06, "loss": 0.5407, "step": 231860 }, { "epoch": 2.57, "learning_rate": 7.210374405763723e-06, "loss": 0.5574, "step": 231865 }, { "epoch": 2.57, "learning_rate": 7.209451678625211e-06, "loss": 0.6514, "step": 231870 }, { "epoch": 2.57, "learning_rate": 7.2085289514866985e-06, "loss": 0.6002, "step": 231875 }, { "epoch": 2.57, "learning_rate": 7.2076062243481856e-06, "loss": 0.5902, "step": 231880 }, { "epoch": 2.57, "learning_rate": 7.206683497209673e-06, "loss": 0.61, "step": 231885 }, { "epoch": 2.57, "learning_rate": 7.205760770071161e-06, "loss": 0.5702, "step": 231890 }, { "epoch": 2.57, "learning_rate": 7.204838042932649e-06, "loss": 0.5818, "step": 231895 }, { "epoch": 2.57, "learning_rate": 7.203915315794137e-06, "loss": 0.5916, "step": 231900 }, { "epoch": 2.57, "learning_rate": 7.202992588655624e-06, "loss": 0.6278, "step": 231905 }, { "epoch": 2.57, "learning_rate": 7.202069861517112e-06, "loss": 0.5832, "step": 231910 }, { "epoch": 2.57, "learning_rate": 7.2011471343786e-06, "loss": 0.5845, "step": 231915 }, { "epoch": 2.57, "learning_rate": 7.200224407240086e-06, "loss": 0.5624, "step": 231920 }, { "epoch": 2.57, "learning_rate": 7.199301680101574e-06, "loss": 0.5887, "step": 231925 }, { "epoch": 2.57, "learning_rate": 7.1983789529630615e-06, "loss": 0.6023, "step": 231930 }, { "epoch": 2.57, "learning_rate": 7.197456225824549e-06, "loss": 0.596, "step": 231935 }, { "epoch": 2.57, "learning_rate": 7.196533498686036e-06, "loss": 0.5867, "step": 231940 }, { "epoch": 2.57, "learning_rate": 7.195610771547524e-06, "loss": 0.5947, "step": 231945 }, { "epoch": 2.57, "learning_rate": 7.194688044409012e-06, "loss": 0.5512, "step": 231950 }, { "epoch": 2.57, "learning_rate": 7.1937653172705e-06, "loss": 0.5507, "step": 231955 }, { "epoch": 2.57, "learning_rate": 7.192842590131988e-06, "loss": 0.562, "step": 231960 }, { "epoch": 2.57, "learning_rate": 7.191919862993475e-06, "loss": 0.605, "step": 231965 }, { "epoch": 2.57, "learning_rate": 7.190997135854963e-06, "loss": 0.6241, "step": 231970 }, { "epoch": 2.57, "learning_rate": 7.1900744087164504e-06, "loss": 0.5563, "step": 231975 }, { "epoch": 2.57, "learning_rate": 7.189151681577938e-06, "loss": 0.5802, "step": 231980 }, { "epoch": 2.57, "learning_rate": 7.188228954439426e-06, "loss": 0.5906, "step": 231985 }, { "epoch": 2.57, "learning_rate": 7.187306227300913e-06, "loss": 0.5868, "step": 231990 }, { "epoch": 2.57, "learning_rate": 7.186383500162399e-06, "loss": 0.5312, "step": 231995 }, { "epoch": 2.57, "learning_rate": 7.185460773023887e-06, "loss": 0.5782, "step": 232000 }, { "epoch": 2.57, "eval_loss": 0.5542840361595154, "eval_runtime": 69.8217, "eval_samples_per_second": 28.644, "eval_steps_per_second": 14.322, "step": 232000 }, { "epoch": 2.57, "learning_rate": 7.184538045885375e-06, "loss": 0.5128, "step": 232005 }, { "epoch": 2.57, "learning_rate": 7.183615318746863e-06, "loss": 0.5811, "step": 232010 }, { "epoch": 2.57, "learning_rate": 7.182692591608351e-06, "loss": 0.5923, "step": 232015 }, { "epoch": 2.57, "learning_rate": 7.181769864469838e-06, "loss": 0.5508, "step": 232020 }, { "epoch": 2.57, "learning_rate": 7.1808471373313255e-06, "loss": 0.5723, "step": 232025 }, { "epoch": 2.57, "learning_rate": 7.179924410192813e-06, "loss": 0.5958, "step": 232030 }, { "epoch": 2.57, "learning_rate": 7.179001683054301e-06, "loss": 0.5942, "step": 232035 }, { "epoch": 2.57, "learning_rate": 7.178078955915789e-06, "loss": 0.6185, "step": 232040 }, { "epoch": 2.57, "learning_rate": 7.177156228777276e-06, "loss": 0.5872, "step": 232045 }, { "epoch": 2.57, "learning_rate": 7.176233501638764e-06, "loss": 0.605, "step": 232050 }, { "epoch": 2.57, "learning_rate": 7.175310774500252e-06, "loss": 0.569, "step": 232055 }, { "epoch": 2.57, "learning_rate": 7.17438804736174e-06, "loss": 0.5741, "step": 232060 }, { "epoch": 2.57, "learning_rate": 7.1734653202232275e-06, "loss": 0.5304, "step": 232065 }, { "epoch": 2.57, "learning_rate": 7.172542593084714e-06, "loss": 0.5903, "step": 232070 }, { "epoch": 2.57, "learning_rate": 7.1716198659462015e-06, "loss": 0.5804, "step": 232075 }, { "epoch": 2.57, "learning_rate": 7.1706971388076885e-06, "loss": 0.6029, "step": 232080 }, { "epoch": 2.57, "learning_rate": 7.169774411669176e-06, "loss": 0.5788, "step": 232085 }, { "epoch": 2.57, "learning_rate": 7.168851684530664e-06, "loss": 0.6336, "step": 232090 }, { "epoch": 2.57, "learning_rate": 7.167928957392152e-06, "loss": 0.5763, "step": 232095 }, { "epoch": 2.57, "learning_rate": 7.16700623025364e-06, "loss": 0.5009, "step": 232100 }, { "epoch": 2.57, "learning_rate": 7.166083503115127e-06, "loss": 0.5964, "step": 232105 }, { "epoch": 2.57, "learning_rate": 7.165160775976615e-06, "loss": 0.5723, "step": 232110 }, { "epoch": 2.57, "learning_rate": 7.164238048838103e-06, "loss": 0.6483, "step": 232115 }, { "epoch": 2.57, "learning_rate": 7.1633153216995904e-06, "loss": 0.5738, "step": 232120 }, { "epoch": 2.57, "learning_rate": 7.162392594561078e-06, "loss": 0.5808, "step": 232125 }, { "epoch": 2.57, "learning_rate": 7.161469867422565e-06, "loss": 0.5132, "step": 232130 }, { "epoch": 2.57, "learning_rate": 7.160547140284053e-06, "loss": 0.5626, "step": 232135 }, { "epoch": 2.57, "learning_rate": 7.159624413145541e-06, "loss": 0.6071, "step": 232140 }, { "epoch": 2.57, "learning_rate": 7.158701686007027e-06, "loss": 0.6353, "step": 232145 }, { "epoch": 2.57, "learning_rate": 7.157778958868515e-06, "loss": 0.6038, "step": 232150 }, { "epoch": 2.57, "learning_rate": 7.156856231730003e-06, "loss": 0.5643, "step": 232155 }, { "epoch": 2.57, "learning_rate": 7.15593350459149e-06, "loss": 0.5986, "step": 232160 }, { "epoch": 2.57, "learning_rate": 7.155010777452978e-06, "loss": 0.5957, "step": 232165 }, { "epoch": 2.57, "learning_rate": 7.1540880503144655e-06, "loss": 0.5294, "step": 232170 }, { "epoch": 2.57, "learning_rate": 7.153165323175953e-06, "loss": 0.6058, "step": 232175 }, { "epoch": 2.57, "learning_rate": 7.152242596037441e-06, "loss": 0.5736, "step": 232180 }, { "epoch": 2.57, "learning_rate": 7.151319868898928e-06, "loss": 0.6063, "step": 232185 }, { "epoch": 2.57, "learning_rate": 7.150397141760416e-06, "loss": 0.534, "step": 232190 }, { "epoch": 2.57, "learning_rate": 7.149474414621904e-06, "loss": 0.6109, "step": 232195 }, { "epoch": 2.57, "learning_rate": 7.148551687483392e-06, "loss": 0.5334, "step": 232200 }, { "epoch": 2.57, "learning_rate": 7.14762896034488e-06, "loss": 0.5624, "step": 232205 }, { "epoch": 2.57, "learning_rate": 7.146706233206367e-06, "loss": 0.5754, "step": 232210 }, { "epoch": 2.57, "learning_rate": 7.1457835060678545e-06, "loss": 0.5707, "step": 232215 }, { "epoch": 2.57, "learning_rate": 7.144860778929341e-06, "loss": 0.6071, "step": 232220 }, { "epoch": 2.57, "learning_rate": 7.1439380517908285e-06, "loss": 0.5792, "step": 232225 }, { "epoch": 2.57, "learning_rate": 7.143015324652316e-06, "loss": 0.5498, "step": 232230 }, { "epoch": 2.57, "learning_rate": 7.142092597513804e-06, "loss": 0.5774, "step": 232235 }, { "epoch": 2.57, "learning_rate": 7.141169870375292e-06, "loss": 0.6478, "step": 232240 }, { "epoch": 2.57, "learning_rate": 7.140247143236779e-06, "loss": 0.5464, "step": 232245 }, { "epoch": 2.57, "learning_rate": 7.139324416098267e-06, "loss": 0.5945, "step": 232250 }, { "epoch": 2.57, "learning_rate": 7.138401688959755e-06, "loss": 0.5869, "step": 232255 }, { "epoch": 2.57, "learning_rate": 7.1374789618212426e-06, "loss": 0.5642, "step": 232260 }, { "epoch": 2.57, "learning_rate": 7.1365562346827296e-06, "loss": 0.5946, "step": 232265 }, { "epoch": 2.57, "learning_rate": 7.135633507544217e-06, "loss": 0.5854, "step": 232270 }, { "epoch": 2.57, "learning_rate": 7.134710780405705e-06, "loss": 0.5596, "step": 232275 }, { "epoch": 2.57, "learning_rate": 7.133788053267193e-06, "loss": 0.5861, "step": 232280 }, { "epoch": 2.57, "learning_rate": 7.132865326128681e-06, "loss": 0.5792, "step": 232285 }, { "epoch": 2.57, "learning_rate": 7.131942598990168e-06, "loss": 0.5528, "step": 232290 }, { "epoch": 2.57, "learning_rate": 7.131019871851655e-06, "loss": 0.5174, "step": 232295 }, { "epoch": 2.57, "learning_rate": 7.130097144713142e-06, "loss": 0.608, "step": 232300 }, { "epoch": 2.57, "learning_rate": 7.12917441757463e-06, "loss": 0.6359, "step": 232305 }, { "epoch": 2.57, "learning_rate": 7.128251690436118e-06, "loss": 0.606, "step": 232310 }, { "epoch": 2.57, "learning_rate": 7.1273289632976055e-06, "loss": 0.6193, "step": 232315 }, { "epoch": 2.57, "learning_rate": 7.126406236159093e-06, "loss": 0.5829, "step": 232320 }, { "epoch": 2.57, "learning_rate": 7.12548350902058e-06, "loss": 0.5586, "step": 232325 }, { "epoch": 2.57, "learning_rate": 7.124560781882068e-06, "loss": 0.5639, "step": 232330 }, { "epoch": 2.57, "learning_rate": 7.123638054743556e-06, "loss": 0.5879, "step": 232335 }, { "epoch": 2.57, "learning_rate": 7.122715327605044e-06, "loss": 0.5584, "step": 232340 }, { "epoch": 2.57, "learning_rate": 7.121792600466532e-06, "loss": 0.614, "step": 232345 }, { "epoch": 2.57, "learning_rate": 7.120869873328019e-06, "loss": 0.5645, "step": 232350 }, { "epoch": 2.57, "learning_rate": 7.119947146189507e-06, "loss": 0.5959, "step": 232355 }, { "epoch": 2.57, "learning_rate": 7.1190244190509945e-06, "loss": 0.5877, "step": 232360 }, { "epoch": 2.57, "learning_rate": 7.118101691912482e-06, "loss": 0.5671, "step": 232365 }, { "epoch": 2.57, "learning_rate": 7.11717896477397e-06, "loss": 0.6124, "step": 232370 }, { "epoch": 2.57, "learning_rate": 7.116256237635456e-06, "loss": 0.6011, "step": 232375 }, { "epoch": 2.57, "learning_rate": 7.115333510496944e-06, "loss": 0.5765, "step": 232380 }, { "epoch": 2.57, "learning_rate": 7.114410783358431e-06, "loss": 0.6012, "step": 232385 }, { "epoch": 2.57, "learning_rate": 7.113488056219919e-06, "loss": 0.5953, "step": 232390 }, { "epoch": 2.57, "learning_rate": 7.112565329081407e-06, "loss": 0.6118, "step": 232395 }, { "epoch": 2.57, "learning_rate": 7.111642601942895e-06, "loss": 0.5947, "step": 232400 }, { "epoch": 2.57, "learning_rate": 7.110719874804382e-06, "loss": 0.5271, "step": 232405 }, { "epoch": 2.57, "learning_rate": 7.1097971476658696e-06, "loss": 0.5805, "step": 232410 }, { "epoch": 2.57, "learning_rate": 7.108874420527357e-06, "loss": 0.5582, "step": 232415 }, { "epoch": 2.57, "learning_rate": 7.107951693388845e-06, "loss": 0.6151, "step": 232420 }, { "epoch": 2.57, "learning_rate": 7.107028966250333e-06, "loss": 0.5822, "step": 232425 }, { "epoch": 2.57, "learning_rate": 7.10610623911182e-06, "loss": 0.5596, "step": 232430 }, { "epoch": 2.57, "learning_rate": 7.105183511973308e-06, "loss": 0.6094, "step": 232435 }, { "epoch": 2.57, "learning_rate": 7.104260784834796e-06, "loss": 0.5546, "step": 232440 }, { "epoch": 2.57, "learning_rate": 7.103338057696284e-06, "loss": 0.5779, "step": 232445 }, { "epoch": 2.57, "learning_rate": 7.10241533055777e-06, "loss": 0.5741, "step": 232450 }, { "epoch": 2.57, "learning_rate": 7.101492603419258e-06, "loss": 0.549, "step": 232455 }, { "epoch": 2.57, "learning_rate": 7.1005698762807455e-06, "loss": 0.6358, "step": 232460 }, { "epoch": 2.57, "learning_rate": 7.0996471491422325e-06, "loss": 0.5535, "step": 232465 }, { "epoch": 2.57, "learning_rate": 7.09872442200372e-06, "loss": 0.5267, "step": 232470 }, { "epoch": 2.57, "learning_rate": 7.097801694865208e-06, "loss": 0.5796, "step": 232475 }, { "epoch": 2.57, "learning_rate": 7.096878967726696e-06, "loss": 0.5369, "step": 232480 }, { "epoch": 2.57, "learning_rate": 7.095956240588184e-06, "loss": 0.5819, "step": 232485 }, { "epoch": 2.57, "learning_rate": 7.095033513449671e-06, "loss": 0.6007, "step": 232490 }, { "epoch": 2.57, "learning_rate": 7.094110786311159e-06, "loss": 0.6017, "step": 232495 }, { "epoch": 2.57, "learning_rate": 7.093188059172647e-06, "loss": 0.6327, "step": 232500 }, { "epoch": 2.57, "learning_rate": 7.0922653320341344e-06, "loss": 0.6267, "step": 232505 }, { "epoch": 2.57, "learning_rate": 7.091342604895622e-06, "loss": 0.6318, "step": 232510 }, { "epoch": 2.57, "learning_rate": 7.090419877757109e-06, "loss": 0.5604, "step": 232515 }, { "epoch": 2.57, "learning_rate": 7.089497150618597e-06, "loss": 0.5623, "step": 232520 }, { "epoch": 2.57, "learning_rate": 7.088574423480083e-06, "loss": 0.5682, "step": 232525 }, { "epoch": 2.57, "learning_rate": 7.087651696341571e-06, "loss": 0.6295, "step": 232530 }, { "epoch": 2.57, "learning_rate": 7.086728969203059e-06, "loss": 0.5903, "step": 232535 }, { "epoch": 2.57, "learning_rate": 7.085806242064547e-06, "loss": 0.6417, "step": 232540 }, { "epoch": 2.57, "learning_rate": 7.084883514926034e-06, "loss": 0.599, "step": 232545 }, { "epoch": 2.57, "learning_rate": 7.083960787787522e-06, "loss": 0.5931, "step": 232550 }, { "epoch": 2.58, "learning_rate": 7.0830380606490095e-06, "loss": 0.553, "step": 232555 }, { "epoch": 2.58, "learning_rate": 7.082115333510497e-06, "loss": 0.5689, "step": 232560 }, { "epoch": 2.58, "learning_rate": 7.081192606371985e-06, "loss": 0.5533, "step": 232565 }, { "epoch": 2.58, "learning_rate": 7.080269879233472e-06, "loss": 0.6495, "step": 232570 }, { "epoch": 2.58, "learning_rate": 7.07934715209496e-06, "loss": 0.5646, "step": 232575 }, { "epoch": 2.58, "learning_rate": 7.078424424956448e-06, "loss": 0.6022, "step": 232580 }, { "epoch": 2.58, "learning_rate": 7.077501697817936e-06, "loss": 0.5986, "step": 232585 }, { "epoch": 2.58, "learning_rate": 7.076578970679424e-06, "loss": 0.5534, "step": 232590 }, { "epoch": 2.58, "learning_rate": 7.075656243540911e-06, "loss": 0.5872, "step": 232595 }, { "epoch": 2.58, "learning_rate": 7.074733516402398e-06, "loss": 0.5689, "step": 232600 }, { "epoch": 2.58, "learning_rate": 7.073810789263885e-06, "loss": 0.5408, "step": 232605 }, { "epoch": 2.58, "learning_rate": 7.0728880621253725e-06, "loss": 0.5593, "step": 232610 }, { "epoch": 2.58, "learning_rate": 7.07196533498686e-06, "loss": 0.5729, "step": 232615 }, { "epoch": 2.58, "learning_rate": 7.071042607848348e-06, "loss": 0.6324, "step": 232620 }, { "epoch": 2.58, "learning_rate": 7.070119880709836e-06, "loss": 0.6019, "step": 232625 }, { "epoch": 2.58, "learning_rate": 7.069197153571323e-06, "loss": 0.5417, "step": 232630 }, { "epoch": 2.58, "learning_rate": 7.068274426432811e-06, "loss": 0.6354, "step": 232635 }, { "epoch": 2.58, "learning_rate": 7.067351699294299e-06, "loss": 0.5631, "step": 232640 }, { "epoch": 2.58, "learning_rate": 7.066428972155787e-06, "loss": 0.5994, "step": 232645 }, { "epoch": 2.58, "learning_rate": 7.0655062450172744e-06, "loss": 0.596, "step": 232650 }, { "epoch": 2.58, "learning_rate": 7.0645835178787614e-06, "loss": 0.5371, "step": 232655 }, { "epoch": 2.58, "learning_rate": 7.063660790740249e-06, "loss": 0.571, "step": 232660 }, { "epoch": 2.58, "learning_rate": 7.062738063601737e-06, "loss": 0.6278, "step": 232665 }, { "epoch": 2.58, "learning_rate": 7.061815336463225e-06, "loss": 0.6147, "step": 232670 }, { "epoch": 2.58, "learning_rate": 7.060892609324711e-06, "loss": 0.5845, "step": 232675 }, { "epoch": 2.58, "learning_rate": 7.059969882186199e-06, "loss": 0.5594, "step": 232680 }, { "epoch": 2.58, "learning_rate": 7.059047155047686e-06, "loss": 0.5972, "step": 232685 }, { "epoch": 2.58, "learning_rate": 7.058124427909174e-06, "loss": 0.5871, "step": 232690 }, { "epoch": 2.58, "learning_rate": 7.057201700770662e-06, "loss": 0.5741, "step": 232695 }, { "epoch": 2.58, "learning_rate": 7.0562789736321495e-06, "loss": 0.6047, "step": 232700 }, { "epoch": 2.58, "learning_rate": 7.055356246493637e-06, "loss": 0.5465, "step": 232705 }, { "epoch": 2.58, "learning_rate": 7.054433519355124e-06, "loss": 0.5836, "step": 232710 }, { "epoch": 2.58, "learning_rate": 7.053510792216612e-06, "loss": 0.5531, "step": 232715 }, { "epoch": 2.58, "learning_rate": 7.0525880650781e-06, "loss": 0.6312, "step": 232720 }, { "epoch": 2.58, "learning_rate": 7.051665337939588e-06, "loss": 0.593, "step": 232725 }, { "epoch": 2.58, "learning_rate": 7.050742610801076e-06, "loss": 0.5916, "step": 232730 }, { "epoch": 2.58, "learning_rate": 7.049819883662563e-06, "loss": 0.5876, "step": 232735 }, { "epoch": 2.58, "learning_rate": 7.048897156524051e-06, "loss": 0.5625, "step": 232740 }, { "epoch": 2.58, "learning_rate": 7.0479744293855385e-06, "loss": 0.5785, "step": 232745 }, { "epoch": 2.58, "learning_rate": 7.047051702247026e-06, "loss": 0.6071, "step": 232750 }, { "epoch": 2.58, "learning_rate": 7.0461289751085125e-06, "loss": 0.5874, "step": 232755 }, { "epoch": 2.58, "learning_rate": 7.04520624797e-06, "loss": 0.6236, "step": 232760 }, { "epoch": 2.58, "learning_rate": 7.044283520831488e-06, "loss": 0.5493, "step": 232765 }, { "epoch": 2.58, "learning_rate": 7.043360793692975e-06, "loss": 0.617, "step": 232770 }, { "epoch": 2.58, "learning_rate": 7.042438066554463e-06, "loss": 0.5744, "step": 232775 }, { "epoch": 2.58, "learning_rate": 7.041515339415951e-06, "loss": 0.5705, "step": 232780 }, { "epoch": 2.58, "learning_rate": 7.040592612277439e-06, "loss": 0.6141, "step": 232785 }, { "epoch": 2.58, "learning_rate": 7.039669885138927e-06, "loss": 0.5586, "step": 232790 }, { "epoch": 2.58, "learning_rate": 7.038747158000414e-06, "loss": 0.547, "step": 232795 }, { "epoch": 2.58, "learning_rate": 7.0378244308619014e-06, "loss": 0.6427, "step": 232800 }, { "epoch": 2.58, "learning_rate": 7.036901703723389e-06, "loss": 0.6102, "step": 232805 }, { "epoch": 2.58, "learning_rate": 7.035978976584877e-06, "loss": 0.5735, "step": 232810 }, { "epoch": 2.58, "learning_rate": 7.035056249446364e-06, "loss": 0.5692, "step": 232815 }, { "epoch": 2.58, "learning_rate": 7.034133522307852e-06, "loss": 0.552, "step": 232820 }, { "epoch": 2.58, "learning_rate": 7.03321079516934e-06, "loss": 0.5482, "step": 232825 }, { "epoch": 2.58, "learning_rate": 7.032288068030826e-06, "loss": 0.5267, "step": 232830 }, { "epoch": 2.58, "learning_rate": 7.031365340892314e-06, "loss": 0.6392, "step": 232835 }, { "epoch": 2.58, "learning_rate": 7.030442613753802e-06, "loss": 0.6201, "step": 232840 }, { "epoch": 2.58, "learning_rate": 7.0295198866152895e-06, "loss": 0.5259, "step": 232845 }, { "epoch": 2.58, "learning_rate": 7.0285971594767765e-06, "loss": 0.5764, "step": 232850 }, { "epoch": 2.58, "learning_rate": 7.027674432338264e-06, "loss": 0.5974, "step": 232855 }, { "epoch": 2.58, "learning_rate": 7.026751705199752e-06, "loss": 0.5509, "step": 232860 }, { "epoch": 2.58, "learning_rate": 7.02582897806124e-06, "loss": 0.5491, "step": 232865 }, { "epoch": 2.58, "learning_rate": 7.024906250922728e-06, "loss": 0.5798, "step": 232870 }, { "epoch": 2.58, "learning_rate": 7.023983523784215e-06, "loss": 0.5942, "step": 232875 }, { "epoch": 2.58, "learning_rate": 7.023060796645703e-06, "loss": 0.5899, "step": 232880 }, { "epoch": 2.58, "learning_rate": 7.022138069507191e-06, "loss": 0.6204, "step": 232885 }, { "epoch": 2.58, "learning_rate": 7.0212153423686785e-06, "loss": 0.6501, "step": 232890 }, { "epoch": 2.58, "learning_rate": 7.020292615230166e-06, "loss": 0.6349, "step": 232895 }, { "epoch": 2.58, "learning_rate": 7.019369888091653e-06, "loss": 0.5687, "step": 232900 }, { "epoch": 2.58, "learning_rate": 7.01844716095314e-06, "loss": 0.5885, "step": 232905 }, { "epoch": 2.58, "learning_rate": 7.017524433814627e-06, "loss": 0.6008, "step": 232910 }, { "epoch": 2.58, "learning_rate": 7.016601706676115e-06, "loss": 0.5664, "step": 232915 }, { "epoch": 2.58, "learning_rate": 7.015678979537603e-06, "loss": 0.5206, "step": 232920 }, { "epoch": 2.58, "learning_rate": 7.014756252399091e-06, "loss": 0.5678, "step": 232925 }, { "epoch": 2.58, "learning_rate": 7.013833525260579e-06, "loss": 0.5676, "step": 232930 }, { "epoch": 2.58, "learning_rate": 7.012910798122066e-06, "loss": 0.5636, "step": 232935 }, { "epoch": 2.58, "learning_rate": 7.0119880709835536e-06, "loss": 0.5338, "step": 232940 }, { "epoch": 2.58, "learning_rate": 7.011065343845041e-06, "loss": 0.5926, "step": 232945 }, { "epoch": 2.58, "learning_rate": 7.010142616706529e-06, "loss": 0.6031, "step": 232950 }, { "epoch": 2.58, "learning_rate": 7.009219889568016e-06, "loss": 0.555, "step": 232955 }, { "epoch": 2.58, "learning_rate": 7.008297162429504e-06, "loss": 0.5743, "step": 232960 }, { "epoch": 2.58, "learning_rate": 7.007374435290992e-06, "loss": 0.5913, "step": 232965 }, { "epoch": 2.58, "learning_rate": 7.00645170815248e-06, "loss": 0.5419, "step": 232970 }, { "epoch": 2.58, "learning_rate": 7.005528981013968e-06, "loss": 0.5834, "step": 232975 }, { "epoch": 2.58, "learning_rate": 7.004606253875454e-06, "loss": 0.5984, "step": 232980 }, { "epoch": 2.58, "learning_rate": 7.003683526736942e-06, "loss": 0.5612, "step": 232985 }, { "epoch": 2.58, "learning_rate": 7.002760799598429e-06, "loss": 0.6461, "step": 232990 }, { "epoch": 2.58, "learning_rate": 7.0018380724599165e-06, "loss": 0.5612, "step": 232995 }, { "epoch": 2.58, "learning_rate": 7.000915345321404e-06, "loss": 0.5871, "step": 233000 }, { "epoch": 2.58, "eval_loss": 0.5574735403060913, "eval_runtime": 70.1957, "eval_samples_per_second": 28.492, "eval_steps_per_second": 14.246, "step": 233000 }, { "epoch": 2.58, "learning_rate": 6.999992618182892e-06, "loss": 0.5518, "step": 233005 }, { "epoch": 2.58, "learning_rate": 6.99906989104438e-06, "loss": 0.595, "step": 233010 }, { "epoch": 2.58, "learning_rate": 6.998147163905867e-06, "loss": 0.5891, "step": 233015 }, { "epoch": 2.58, "learning_rate": 6.997224436767355e-06, "loss": 0.5645, "step": 233020 }, { "epoch": 2.58, "learning_rate": 6.996301709628843e-06, "loss": 0.5515, "step": 233025 }, { "epoch": 2.58, "learning_rate": 6.995378982490331e-06, "loss": 0.5957, "step": 233030 }, { "epoch": 2.58, "learning_rate": 6.9944562553518185e-06, "loss": 0.6139, "step": 233035 }, { "epoch": 2.58, "learning_rate": 6.9935335282133055e-06, "loss": 0.6073, "step": 233040 }, { "epoch": 2.58, "learning_rate": 6.992610801074793e-06, "loss": 0.5924, "step": 233045 }, { "epoch": 2.58, "learning_rate": 6.991688073936281e-06, "loss": 0.5481, "step": 233050 }, { "epoch": 2.58, "learning_rate": 6.990765346797767e-06, "loss": 0.5637, "step": 233055 }, { "epoch": 2.58, "learning_rate": 6.989842619659255e-06, "loss": 0.5877, "step": 233060 }, { "epoch": 2.58, "learning_rate": 6.988919892520743e-06, "loss": 0.5878, "step": 233065 }, { "epoch": 2.58, "learning_rate": 6.98799716538223e-06, "loss": 0.5451, "step": 233070 }, { "epoch": 2.58, "learning_rate": 6.987074438243718e-06, "loss": 0.5602, "step": 233075 }, { "epoch": 2.58, "learning_rate": 6.986151711105206e-06, "loss": 0.594, "step": 233080 }, { "epoch": 2.58, "learning_rate": 6.9852289839666936e-06, "loss": 0.594, "step": 233085 }, { "epoch": 2.58, "learning_rate": 6.984306256828181e-06, "loss": 0.6375, "step": 233090 }, { "epoch": 2.58, "learning_rate": 6.983383529689668e-06, "loss": 0.5662, "step": 233095 }, { "epoch": 2.58, "learning_rate": 6.982460802551156e-06, "loss": 0.6152, "step": 233100 }, { "epoch": 2.58, "learning_rate": 6.981538075412644e-06, "loss": 0.575, "step": 233105 }, { "epoch": 2.58, "learning_rate": 6.980615348274132e-06, "loss": 0.604, "step": 233110 }, { "epoch": 2.58, "learning_rate": 6.97969262113562e-06, "loss": 0.6173, "step": 233115 }, { "epoch": 2.58, "learning_rate": 6.978769893997107e-06, "loss": 0.5793, "step": 233120 }, { "epoch": 2.58, "learning_rate": 6.977847166858595e-06, "loss": 0.5933, "step": 233125 }, { "epoch": 2.58, "learning_rate": 6.976924439720081e-06, "loss": 0.5541, "step": 233130 }, { "epoch": 2.58, "learning_rate": 6.976001712581569e-06, "loss": 0.5766, "step": 233135 }, { "epoch": 2.58, "learning_rate": 6.9750789854430565e-06, "loss": 0.5917, "step": 233140 }, { "epoch": 2.58, "learning_rate": 6.974156258304544e-06, "loss": 0.5935, "step": 233145 }, { "epoch": 2.58, "learning_rate": 6.973233531166032e-06, "loss": 0.5994, "step": 233150 }, { "epoch": 2.58, "learning_rate": 6.972310804027519e-06, "loss": 0.5672, "step": 233155 }, { "epoch": 2.58, "learning_rate": 6.971388076889007e-06, "loss": 0.5959, "step": 233160 }, { "epoch": 2.58, "learning_rate": 6.970465349750495e-06, "loss": 0.5829, "step": 233165 }, { "epoch": 2.58, "learning_rate": 6.969542622611983e-06, "loss": 0.609, "step": 233170 }, { "epoch": 2.58, "learning_rate": 6.968619895473471e-06, "loss": 0.5657, "step": 233175 }, { "epoch": 2.58, "learning_rate": 6.967697168334958e-06, "loss": 0.566, "step": 233180 }, { "epoch": 2.58, "learning_rate": 6.9667744411964454e-06, "loss": 0.6247, "step": 233185 }, { "epoch": 2.58, "learning_rate": 6.965851714057933e-06, "loss": 0.5896, "step": 233190 }, { "epoch": 2.58, "learning_rate": 6.964928986919421e-06, "loss": 0.5687, "step": 233195 }, { "epoch": 2.58, "learning_rate": 6.964006259780909e-06, "loss": 0.5915, "step": 233200 }, { "epoch": 2.58, "learning_rate": 6.963083532642396e-06, "loss": 0.592, "step": 233205 }, { "epoch": 2.58, "learning_rate": 6.962160805503882e-06, "loss": 0.5425, "step": 233210 }, { "epoch": 2.58, "learning_rate": 6.96123807836537e-06, "loss": 0.6585, "step": 233215 }, { "epoch": 2.58, "learning_rate": 6.960315351226858e-06, "loss": 0.5376, "step": 233220 }, { "epoch": 2.58, "learning_rate": 6.959392624088346e-06, "loss": 0.6509, "step": 233225 }, { "epoch": 2.58, "learning_rate": 6.9584698969498335e-06, "loss": 0.5739, "step": 233230 }, { "epoch": 2.58, "learning_rate": 6.9575471698113205e-06, "loss": 0.5479, "step": 233235 }, { "epoch": 2.58, "learning_rate": 6.956624442672808e-06, "loss": 0.6001, "step": 233240 }, { "epoch": 2.58, "learning_rate": 6.955701715534296e-06, "loss": 0.5755, "step": 233245 }, { "epoch": 2.58, "learning_rate": 6.954778988395784e-06, "loss": 0.5919, "step": 233250 }, { "epoch": 2.58, "learning_rate": 6.953856261257272e-06, "loss": 0.5869, "step": 233255 }, { "epoch": 2.58, "learning_rate": 6.952933534118759e-06, "loss": 0.5589, "step": 233260 }, { "epoch": 2.58, "learning_rate": 6.952010806980247e-06, "loss": 0.5607, "step": 233265 }, { "epoch": 2.58, "learning_rate": 6.951088079841735e-06, "loss": 0.5348, "step": 233270 }, { "epoch": 2.58, "learning_rate": 6.9501653527032225e-06, "loss": 0.546, "step": 233275 }, { "epoch": 2.58, "learning_rate": 6.94924262556471e-06, "loss": 0.6581, "step": 233280 }, { "epoch": 2.58, "learning_rate": 6.9483198984261965e-06, "loss": 0.5242, "step": 233285 }, { "epoch": 2.58, "learning_rate": 6.947397171287684e-06, "loss": 0.5514, "step": 233290 }, { "epoch": 2.58, "learning_rate": 6.946474444149171e-06, "loss": 0.5813, "step": 233295 }, { "epoch": 2.58, "learning_rate": 6.945551717010659e-06, "loss": 0.6523, "step": 233300 }, { "epoch": 2.58, "learning_rate": 6.944628989872147e-06, "loss": 0.6013, "step": 233305 }, { "epoch": 2.58, "learning_rate": 6.943706262733635e-06, "loss": 0.6074, "step": 233310 }, { "epoch": 2.58, "learning_rate": 6.942783535595123e-06, "loss": 0.5796, "step": 233315 }, { "epoch": 2.58, "learning_rate": 6.94186080845661e-06, "loss": 0.5915, "step": 233320 }, { "epoch": 2.58, "learning_rate": 6.940938081318098e-06, "loss": 0.5291, "step": 233325 }, { "epoch": 2.58, "learning_rate": 6.9400153541795854e-06, "loss": 0.6106, "step": 233330 }, { "epoch": 2.58, "learning_rate": 6.939092627041073e-06, "loss": 0.5888, "step": 233335 }, { "epoch": 2.58, "learning_rate": 6.938169899902561e-06, "loss": 0.5493, "step": 233340 }, { "epoch": 2.58, "learning_rate": 6.937247172764048e-06, "loss": 0.6038, "step": 233345 }, { "epoch": 2.58, "learning_rate": 6.936324445625536e-06, "loss": 0.5749, "step": 233350 }, { "epoch": 2.58, "learning_rate": 6.935401718487024e-06, "loss": 0.6356, "step": 233355 }, { "epoch": 2.58, "learning_rate": 6.93447899134851e-06, "loss": 0.5889, "step": 233360 }, { "epoch": 2.58, "learning_rate": 6.933556264209998e-06, "loss": 0.548, "step": 233365 }, { "epoch": 2.58, "learning_rate": 6.932633537071486e-06, "loss": 0.6295, "step": 233370 }, { "epoch": 2.58, "learning_rate": 6.931710809932973e-06, "loss": 0.5832, "step": 233375 }, { "epoch": 2.58, "learning_rate": 6.9307880827944605e-06, "loss": 0.6059, "step": 233380 }, { "epoch": 2.58, "learning_rate": 6.929865355655948e-06, "loss": 0.6181, "step": 233385 }, { "epoch": 2.58, "learning_rate": 6.928942628517436e-06, "loss": 0.5726, "step": 233390 }, { "epoch": 2.58, "learning_rate": 6.928019901378924e-06, "loss": 0.5846, "step": 233395 }, { "epoch": 2.58, "learning_rate": 6.927097174240411e-06, "loss": 0.5662, "step": 233400 }, { "epoch": 2.58, "learning_rate": 6.926174447101899e-06, "loss": 0.5354, "step": 233405 }, { "epoch": 2.58, "learning_rate": 6.925251719963387e-06, "loss": 0.5792, "step": 233410 }, { "epoch": 2.58, "learning_rate": 6.924328992824875e-06, "loss": 0.5541, "step": 233415 }, { "epoch": 2.58, "learning_rate": 6.9234062656863625e-06, "loss": 0.6776, "step": 233420 }, { "epoch": 2.58, "learning_rate": 6.9224835385478495e-06, "loss": 0.5794, "step": 233425 }, { "epoch": 2.58, "learning_rate": 6.921560811409337e-06, "loss": 0.638, "step": 233430 }, { "epoch": 2.58, "learning_rate": 6.9206380842708235e-06, "loss": 0.636, "step": 233435 }, { "epoch": 2.58, "learning_rate": 6.919715357132311e-06, "loss": 0.5816, "step": 233440 }, { "epoch": 2.58, "learning_rate": 6.918792629993799e-06, "loss": 0.6034, "step": 233445 }, { "epoch": 2.58, "learning_rate": 6.917869902855287e-06, "loss": 0.5786, "step": 233450 }, { "epoch": 2.58, "learning_rate": 6.916947175716775e-06, "loss": 0.576, "step": 233455 }, { "epoch": 2.59, "learning_rate": 6.916024448578262e-06, "loss": 0.6346, "step": 233460 }, { "epoch": 2.59, "learning_rate": 6.91510172143975e-06, "loss": 0.5869, "step": 233465 }, { "epoch": 2.59, "learning_rate": 6.9141789943012376e-06, "loss": 0.6036, "step": 233470 }, { "epoch": 2.59, "learning_rate": 6.913256267162725e-06, "loss": 0.5571, "step": 233475 }, { "epoch": 2.59, "learning_rate": 6.912333540024213e-06, "loss": 0.571, "step": 233480 }, { "epoch": 2.59, "learning_rate": 6.9114108128857e-06, "loss": 0.593, "step": 233485 }, { "epoch": 2.59, "learning_rate": 6.910488085747188e-06, "loss": 0.5803, "step": 233490 }, { "epoch": 2.59, "learning_rate": 6.909565358608676e-06, "loss": 0.6063, "step": 233495 }, { "epoch": 2.59, "learning_rate": 6.908642631470164e-06, "loss": 0.582, "step": 233500 }, { "epoch": 2.59, "learning_rate": 6.907719904331651e-06, "loss": 0.5697, "step": 233505 }, { "epoch": 2.59, "learning_rate": 6.906797177193138e-06, "loss": 0.5678, "step": 233510 }, { "epoch": 2.59, "learning_rate": 6.905874450054625e-06, "loss": 0.5879, "step": 233515 }, { "epoch": 2.59, "learning_rate": 6.904951722916113e-06, "loss": 0.6561, "step": 233520 }, { "epoch": 2.59, "learning_rate": 6.9040289957776005e-06, "loss": 0.5969, "step": 233525 }, { "epoch": 2.59, "learning_rate": 6.903106268639088e-06, "loss": 0.5934, "step": 233530 }, { "epoch": 2.59, "learning_rate": 6.902183541500576e-06, "loss": 0.5943, "step": 233535 }, { "epoch": 2.59, "learning_rate": 6.901260814362063e-06, "loss": 0.5606, "step": 233540 }, { "epoch": 2.59, "learning_rate": 6.900338087223551e-06, "loss": 0.5868, "step": 233545 }, { "epoch": 2.59, "learning_rate": 6.899415360085039e-06, "loss": 0.5113, "step": 233550 }, { "epoch": 2.59, "learning_rate": 6.898492632946527e-06, "loss": 0.6076, "step": 233555 }, { "epoch": 2.59, "learning_rate": 6.897569905808015e-06, "loss": 0.5567, "step": 233560 }, { "epoch": 2.59, "learning_rate": 6.896647178669502e-06, "loss": 0.6329, "step": 233565 }, { "epoch": 2.59, "learning_rate": 6.8957244515309895e-06, "loss": 0.6497, "step": 233570 }, { "epoch": 2.59, "learning_rate": 6.894801724392477e-06, "loss": 0.5786, "step": 233575 }, { "epoch": 2.59, "learning_rate": 6.893878997253965e-06, "loss": 0.581, "step": 233580 }, { "epoch": 2.59, "learning_rate": 6.892956270115453e-06, "loss": 0.5835, "step": 233585 }, { "epoch": 2.59, "learning_rate": 6.892033542976939e-06, "loss": 0.619, "step": 233590 }, { "epoch": 2.59, "learning_rate": 6.891110815838427e-06, "loss": 0.6137, "step": 233595 }, { "epoch": 2.59, "learning_rate": 6.890188088699914e-06, "loss": 0.607, "step": 233600 }, { "epoch": 2.59, "learning_rate": 6.889265361561402e-06, "loss": 0.5736, "step": 233605 }, { "epoch": 2.59, "learning_rate": 6.88834263442289e-06, "loss": 0.6133, "step": 233610 }, { "epoch": 2.59, "learning_rate": 6.8874199072843776e-06, "loss": 0.5559, "step": 233615 }, { "epoch": 2.59, "learning_rate": 6.8864971801458646e-06, "loss": 0.5755, "step": 233620 }, { "epoch": 2.59, "learning_rate": 6.885574453007352e-06, "loss": 0.6173, "step": 233625 }, { "epoch": 2.59, "learning_rate": 6.88465172586884e-06, "loss": 0.6159, "step": 233630 }, { "epoch": 2.59, "learning_rate": 6.883728998730328e-06, "loss": 0.6471, "step": 233635 }, { "epoch": 2.59, "learning_rate": 6.882806271591816e-06, "loss": 0.5504, "step": 233640 }, { "epoch": 2.59, "learning_rate": 6.881883544453303e-06, "loss": 0.5672, "step": 233645 }, { "epoch": 2.59, "learning_rate": 6.880960817314791e-06, "loss": 0.5771, "step": 233650 }, { "epoch": 2.59, "learning_rate": 6.880038090176279e-06, "loss": 0.6128, "step": 233655 }, { "epoch": 2.59, "learning_rate": 6.8791153630377665e-06, "loss": 0.6428, "step": 233660 }, { "epoch": 2.59, "learning_rate": 6.878192635899253e-06, "loss": 0.5648, "step": 233665 }, { "epoch": 2.59, "learning_rate": 6.8772699087607405e-06, "loss": 0.5439, "step": 233670 }, { "epoch": 2.59, "learning_rate": 6.876347181622228e-06, "loss": 0.6616, "step": 233675 }, { "epoch": 2.59, "learning_rate": 6.875424454483715e-06, "loss": 0.5756, "step": 233680 }, { "epoch": 2.59, "learning_rate": 6.874501727345203e-06, "loss": 0.5769, "step": 233685 }, { "epoch": 2.59, "learning_rate": 6.873579000206691e-06, "loss": 0.587, "step": 233690 }, { "epoch": 2.59, "learning_rate": 6.872656273068179e-06, "loss": 0.5794, "step": 233695 }, { "epoch": 2.59, "learning_rate": 6.871733545929667e-06, "loss": 0.612, "step": 233700 }, { "epoch": 2.59, "learning_rate": 6.870810818791154e-06, "loss": 0.6248, "step": 233705 }, { "epoch": 2.59, "learning_rate": 6.869888091652642e-06, "loss": 0.5513, "step": 233710 }, { "epoch": 2.59, "learning_rate": 6.8689653645141295e-06, "loss": 0.5715, "step": 233715 }, { "epoch": 2.59, "learning_rate": 6.868042637375617e-06, "loss": 0.5478, "step": 233720 }, { "epoch": 2.59, "learning_rate": 6.867119910237105e-06, "loss": 0.6006, "step": 233725 }, { "epoch": 2.59, "learning_rate": 6.866197183098592e-06, "loss": 0.6373, "step": 233730 }, { "epoch": 2.59, "learning_rate": 6.86527445596008e-06, "loss": 0.5812, "step": 233735 }, { "epoch": 2.59, "learning_rate": 6.864351728821566e-06, "loss": 0.6096, "step": 233740 }, { "epoch": 2.59, "learning_rate": 6.863429001683054e-06, "loss": 0.634, "step": 233745 }, { "epoch": 2.59, "learning_rate": 6.862506274544542e-06, "loss": 0.5979, "step": 233750 }, { "epoch": 2.59, "learning_rate": 6.86158354740603e-06, "loss": 0.6096, "step": 233755 }, { "epoch": 2.59, "learning_rate": 6.860660820267517e-06, "loss": 0.5815, "step": 233760 }, { "epoch": 2.59, "learning_rate": 6.8597380931290046e-06, "loss": 0.6709, "step": 233765 }, { "epoch": 2.59, "learning_rate": 6.858815365990492e-06, "loss": 0.5342, "step": 233770 }, { "epoch": 2.59, "learning_rate": 6.85789263885198e-06, "loss": 0.6293, "step": 233775 }, { "epoch": 2.59, "learning_rate": 6.856969911713468e-06, "loss": 0.6205, "step": 233780 }, { "epoch": 2.59, "learning_rate": 6.856047184574955e-06, "loss": 0.6315, "step": 233785 }, { "epoch": 2.59, "learning_rate": 6.855124457436443e-06, "loss": 0.5378, "step": 233790 }, { "epoch": 2.59, "learning_rate": 6.854201730297931e-06, "loss": 0.5677, "step": 233795 }, { "epoch": 2.59, "learning_rate": 6.853279003159419e-06, "loss": 0.6365, "step": 233800 }, { "epoch": 2.59, "learning_rate": 6.8523562760209065e-06, "loss": 0.5665, "step": 233805 }, { "epoch": 2.59, "learning_rate": 6.8514335488823935e-06, "loss": 0.6134, "step": 233810 }, { "epoch": 2.59, "learning_rate": 6.8505108217438805e-06, "loss": 0.6031, "step": 233815 }, { "epoch": 2.59, "learning_rate": 6.8495880946053675e-06, "loss": 0.5701, "step": 233820 }, { "epoch": 2.59, "learning_rate": 6.848665367466855e-06, "loss": 0.5974, "step": 233825 }, { "epoch": 2.59, "learning_rate": 6.847742640328343e-06, "loss": 0.5652, "step": 233830 }, { "epoch": 2.59, "learning_rate": 6.846819913189831e-06, "loss": 0.5949, "step": 233835 }, { "epoch": 2.59, "learning_rate": 6.845897186051319e-06, "loss": 0.6329, "step": 233840 }, { "epoch": 2.59, "learning_rate": 6.844974458912806e-06, "loss": 0.5749, "step": 233845 }, { "epoch": 2.59, "learning_rate": 6.844051731774294e-06, "loss": 0.6171, "step": 233850 }, { "epoch": 2.59, "learning_rate": 6.843129004635782e-06, "loss": 0.572, "step": 233855 }, { "epoch": 2.59, "learning_rate": 6.8422062774972694e-06, "loss": 0.5824, "step": 233860 }, { "epoch": 2.59, "learning_rate": 6.841283550358757e-06, "loss": 0.6025, "step": 233865 }, { "epoch": 2.59, "learning_rate": 6.840360823220244e-06, "loss": 0.6035, "step": 233870 }, { "epoch": 2.59, "learning_rate": 6.839438096081732e-06, "loss": 0.5412, "step": 233875 }, { "epoch": 2.59, "learning_rate": 6.83851536894322e-06, "loss": 0.5342, "step": 233880 }, { "epoch": 2.59, "learning_rate": 6.837592641804708e-06, "loss": 0.6025, "step": 233885 }, { "epoch": 2.59, "learning_rate": 6.836669914666194e-06, "loss": 0.601, "step": 233890 }, { "epoch": 2.59, "learning_rate": 6.835747187527682e-06, "loss": 0.5909, "step": 233895 }, { "epoch": 2.59, "learning_rate": 6.834824460389169e-06, "loss": 0.6262, "step": 233900 }, { "epoch": 2.59, "learning_rate": 6.833901733250657e-06, "loss": 0.5458, "step": 233905 }, { "epoch": 2.59, "learning_rate": 6.8329790061121445e-06, "loss": 0.5568, "step": 233910 }, { "epoch": 2.59, "learning_rate": 6.832056278973632e-06, "loss": 0.5456, "step": 233915 }, { "epoch": 2.59, "learning_rate": 6.83113355183512e-06, "loss": 0.5523, "step": 233920 }, { "epoch": 2.59, "learning_rate": 6.830210824696607e-06, "loss": 0.5859, "step": 233925 }, { "epoch": 2.59, "learning_rate": 6.829288097558095e-06, "loss": 0.5722, "step": 233930 }, { "epoch": 2.59, "learning_rate": 6.828365370419583e-06, "loss": 0.5707, "step": 233935 }, { "epoch": 2.59, "learning_rate": 6.827442643281071e-06, "loss": 0.5857, "step": 233940 }, { "epoch": 2.59, "learning_rate": 6.826519916142559e-06, "loss": 0.5559, "step": 233945 }, { "epoch": 2.59, "learning_rate": 6.825597189004046e-06, "loss": 0.6484, "step": 233950 }, { "epoch": 2.59, "learning_rate": 6.8246744618655335e-06, "loss": 0.5861, "step": 233955 }, { "epoch": 2.59, "learning_rate": 6.823751734727021e-06, "loss": 0.5751, "step": 233960 }, { "epoch": 2.59, "learning_rate": 6.8228290075885075e-06, "loss": 0.5484, "step": 233965 }, { "epoch": 2.59, "learning_rate": 6.821906280449995e-06, "loss": 0.5836, "step": 233970 }, { "epoch": 2.59, "learning_rate": 6.820983553311483e-06, "loss": 0.5498, "step": 233975 }, { "epoch": 2.59, "learning_rate": 6.820060826172971e-06, "loss": 0.5889, "step": 233980 }, { "epoch": 2.59, "learning_rate": 6.819138099034458e-06, "loss": 0.5726, "step": 233985 }, { "epoch": 2.59, "learning_rate": 6.818215371895946e-06, "loss": 0.5715, "step": 233990 }, { "epoch": 2.59, "learning_rate": 6.817292644757434e-06, "loss": 0.6141, "step": 233995 }, { "epoch": 2.59, "learning_rate": 6.816369917618922e-06, "loss": 0.5593, "step": 234000 }, { "epoch": 2.59, "eval_loss": 0.562467098236084, "eval_runtime": 69.9027, "eval_samples_per_second": 28.611, "eval_steps_per_second": 14.306, "step": 234000 }, { "epoch": 2.59, "learning_rate": 6.8154471904804094e-06, "loss": 0.5872, "step": 234005 }, { "epoch": 2.59, "learning_rate": 6.8145244633418964e-06, "loss": 0.5674, "step": 234010 }, { "epoch": 2.59, "learning_rate": 6.813601736203384e-06, "loss": 0.582, "step": 234015 }, { "epoch": 2.59, "learning_rate": 6.812679009064872e-06, "loss": 0.5429, "step": 234020 }, { "epoch": 2.59, "learning_rate": 6.81175628192636e-06, "loss": 0.6111, "step": 234025 }, { "epoch": 2.59, "learning_rate": 6.810833554787848e-06, "loss": 0.538, "step": 234030 }, { "epoch": 2.59, "learning_rate": 6.809910827649335e-06, "loss": 0.5416, "step": 234035 }, { "epoch": 2.59, "learning_rate": 6.808988100510823e-06, "loss": 0.5616, "step": 234040 }, { "epoch": 2.59, "learning_rate": 6.808065373372309e-06, "loss": 0.5977, "step": 234045 }, { "epoch": 2.59, "learning_rate": 6.807142646233797e-06, "loss": 0.5734, "step": 234050 }, { "epoch": 2.59, "learning_rate": 6.8062199190952845e-06, "loss": 0.5287, "step": 234055 }, { "epoch": 2.59, "learning_rate": 6.805297191956772e-06, "loss": 0.5702, "step": 234060 }, { "epoch": 2.59, "learning_rate": 6.804374464818259e-06, "loss": 0.5821, "step": 234065 }, { "epoch": 2.59, "learning_rate": 6.803451737679747e-06, "loss": 0.5712, "step": 234070 }, { "epoch": 2.59, "learning_rate": 6.802529010541235e-06, "loss": 0.5526, "step": 234075 }, { "epoch": 2.59, "learning_rate": 6.801606283402723e-06, "loss": 0.5468, "step": 234080 }, { "epoch": 2.59, "learning_rate": 6.800683556264211e-06, "loss": 0.6433, "step": 234085 }, { "epoch": 2.59, "learning_rate": 6.799760829125698e-06, "loss": 0.5326, "step": 234090 }, { "epoch": 2.59, "learning_rate": 6.798838101987186e-06, "loss": 0.6153, "step": 234095 }, { "epoch": 2.59, "learning_rate": 6.7979153748486735e-06, "loss": 0.6714, "step": 234100 }, { "epoch": 2.59, "learning_rate": 6.796992647710161e-06, "loss": 0.5635, "step": 234105 }, { "epoch": 2.59, "learning_rate": 6.796069920571649e-06, "loss": 0.5976, "step": 234110 }, { "epoch": 2.59, "learning_rate": 6.795147193433136e-06, "loss": 0.5876, "step": 234115 }, { "epoch": 2.59, "learning_rate": 6.794224466294623e-06, "loss": 0.5724, "step": 234120 }, { "epoch": 2.59, "learning_rate": 6.79330173915611e-06, "loss": 0.5871, "step": 234125 }, { "epoch": 2.59, "learning_rate": 6.792379012017598e-06, "loss": 0.6428, "step": 234130 }, { "epoch": 2.59, "learning_rate": 6.791456284879086e-06, "loss": 0.5529, "step": 234135 }, { "epoch": 2.59, "learning_rate": 6.790533557740574e-06, "loss": 0.5475, "step": 234140 }, { "epoch": 2.59, "learning_rate": 6.7896108306020616e-06, "loss": 0.5999, "step": 234145 }, { "epoch": 2.59, "learning_rate": 6.7886881034635486e-06, "loss": 0.5969, "step": 234150 }, { "epoch": 2.59, "learning_rate": 6.787765376325036e-06, "loss": 0.5557, "step": 234155 }, { "epoch": 2.59, "learning_rate": 6.786842649186524e-06, "loss": 0.5663, "step": 234160 }, { "epoch": 2.59, "learning_rate": 6.785919922048012e-06, "loss": 0.6208, "step": 234165 }, { "epoch": 2.59, "learning_rate": 6.784997194909499e-06, "loss": 0.6205, "step": 234170 }, { "epoch": 2.59, "learning_rate": 6.784074467770987e-06, "loss": 0.5744, "step": 234175 }, { "epoch": 2.59, "learning_rate": 6.783151740632475e-06, "loss": 0.606, "step": 234180 }, { "epoch": 2.59, "learning_rate": 6.782229013493963e-06, "loss": 0.5797, "step": 234185 }, { "epoch": 2.59, "learning_rate": 6.7813062863554505e-06, "loss": 0.5897, "step": 234190 }, { "epoch": 2.59, "learning_rate": 6.780383559216937e-06, "loss": 0.5631, "step": 234195 }, { "epoch": 2.59, "learning_rate": 6.7794608320784245e-06, "loss": 0.5598, "step": 234200 }, { "epoch": 2.59, "learning_rate": 6.7785381049399115e-06, "loss": 0.5975, "step": 234205 }, { "epoch": 2.59, "learning_rate": 6.777615377801399e-06, "loss": 0.5419, "step": 234210 }, { "epoch": 2.59, "learning_rate": 6.776692650662887e-06, "loss": 0.6362, "step": 234215 }, { "epoch": 2.59, "learning_rate": 6.775769923524375e-06, "loss": 0.5867, "step": 234220 }, { "epoch": 2.59, "learning_rate": 6.774847196385863e-06, "loss": 0.5809, "step": 234225 }, { "epoch": 2.59, "learning_rate": 6.77392446924735e-06, "loss": 0.5479, "step": 234230 }, { "epoch": 2.59, "learning_rate": 6.773001742108838e-06, "loss": 0.6436, "step": 234235 }, { "epoch": 2.59, "learning_rate": 6.772079014970326e-06, "loss": 0.625, "step": 234240 }, { "epoch": 2.59, "learning_rate": 6.7711562878318135e-06, "loss": 0.5869, "step": 234245 }, { "epoch": 2.59, "learning_rate": 6.770233560693301e-06, "loss": 0.5316, "step": 234250 }, { "epoch": 2.59, "learning_rate": 6.769310833554788e-06, "loss": 0.6735, "step": 234255 }, { "epoch": 2.59, "learning_rate": 6.768388106416276e-06, "loss": 0.5465, "step": 234260 }, { "epoch": 2.59, "learning_rate": 6.767465379277764e-06, "loss": 0.5634, "step": 234265 }, { "epoch": 2.59, "learning_rate": 6.76654265213925e-06, "loss": 0.6322, "step": 234270 }, { "epoch": 2.59, "learning_rate": 6.765619925000738e-06, "loss": 0.5902, "step": 234275 }, { "epoch": 2.59, "learning_rate": 6.764697197862226e-06, "loss": 0.6109, "step": 234280 }, { "epoch": 2.59, "learning_rate": 6.763774470723713e-06, "loss": 0.6098, "step": 234285 }, { "epoch": 2.59, "learning_rate": 6.762851743585201e-06, "loss": 0.5466, "step": 234290 }, { "epoch": 2.59, "learning_rate": 6.7619290164466886e-06, "loss": 0.5974, "step": 234295 }, { "epoch": 2.59, "learning_rate": 6.761006289308176e-06, "loss": 0.6022, "step": 234300 }, { "epoch": 2.59, "learning_rate": 6.760083562169664e-06, "loss": 0.586, "step": 234305 }, { "epoch": 2.59, "learning_rate": 6.759160835031151e-06, "loss": 0.5899, "step": 234310 }, { "epoch": 2.59, "learning_rate": 6.758238107892639e-06, "loss": 0.6285, "step": 234315 }, { "epoch": 2.59, "learning_rate": 6.757315380754127e-06, "loss": 0.6176, "step": 234320 }, { "epoch": 2.59, "learning_rate": 6.756392653615615e-06, "loss": 0.6043, "step": 234325 }, { "epoch": 2.59, "learning_rate": 6.755469926477103e-06, "loss": 0.618, "step": 234330 }, { "epoch": 2.59, "learning_rate": 6.75454719933859e-06, "loss": 0.5913, "step": 234335 }, { "epoch": 2.59, "learning_rate": 6.7536244722000775e-06, "loss": 0.5474, "step": 234340 }, { "epoch": 2.59, "learning_rate": 6.752701745061564e-06, "loss": 0.5491, "step": 234345 }, { "epoch": 2.59, "learning_rate": 6.7517790179230515e-06, "loss": 0.5546, "step": 234350 }, { "epoch": 2.59, "learning_rate": 6.750856290784539e-06, "loss": 0.574, "step": 234355 }, { "epoch": 2.6, "learning_rate": 6.749933563646027e-06, "loss": 0.5932, "step": 234360 }, { "epoch": 2.6, "learning_rate": 6.749010836507515e-06, "loss": 0.6459, "step": 234365 }, { "epoch": 2.6, "learning_rate": 6.748088109369002e-06, "loss": 0.5779, "step": 234370 }, { "epoch": 2.6, "learning_rate": 6.74716538223049e-06, "loss": 0.6008, "step": 234375 }, { "epoch": 2.6, "learning_rate": 6.746242655091978e-06, "loss": 0.5845, "step": 234380 }, { "epoch": 2.6, "learning_rate": 6.745319927953466e-06, "loss": 0.5649, "step": 234385 }, { "epoch": 2.6, "learning_rate": 6.7443972008149534e-06, "loss": 0.6064, "step": 234390 }, { "epoch": 2.6, "learning_rate": 6.7434744736764405e-06, "loss": 0.6353, "step": 234395 }, { "epoch": 2.6, "learning_rate": 6.742551746537928e-06, "loss": 0.5766, "step": 234400 }, { "epoch": 2.6, "learning_rate": 6.741629019399416e-06, "loss": 0.5709, "step": 234405 }, { "epoch": 2.6, "learning_rate": 6.740706292260904e-06, "loss": 0.5748, "step": 234410 }, { "epoch": 2.6, "learning_rate": 6.739783565122392e-06, "loss": 0.6096, "step": 234415 }, { "epoch": 2.6, "learning_rate": 6.738860837983879e-06, "loss": 0.5764, "step": 234420 }, { "epoch": 2.6, "learning_rate": 6.737938110845365e-06, "loss": 0.5837, "step": 234425 }, { "epoch": 2.6, "learning_rate": 6.737015383706853e-06, "loss": 0.5615, "step": 234430 }, { "epoch": 2.6, "learning_rate": 6.736092656568341e-06, "loss": 0.5266, "step": 234435 }, { "epoch": 2.6, "learning_rate": 6.7351699294298285e-06, "loss": 0.5563, "step": 234440 }, { "epoch": 2.6, "learning_rate": 6.734247202291316e-06, "loss": 0.5842, "step": 234445 }, { "epoch": 2.6, "learning_rate": 6.733324475152803e-06, "loss": 0.5536, "step": 234450 }, { "epoch": 2.6, "learning_rate": 6.732401748014291e-06, "loss": 0.6316, "step": 234455 }, { "epoch": 2.6, "learning_rate": 6.731479020875779e-06, "loss": 0.5858, "step": 234460 }, { "epoch": 2.6, "learning_rate": 6.730556293737267e-06, "loss": 0.6028, "step": 234465 }, { "epoch": 2.6, "learning_rate": 6.729633566598755e-06, "loss": 0.5429, "step": 234470 }, { "epoch": 2.6, "learning_rate": 6.728710839460242e-06, "loss": 0.5805, "step": 234475 }, { "epoch": 2.6, "learning_rate": 6.72778811232173e-06, "loss": 0.588, "step": 234480 }, { "epoch": 2.6, "learning_rate": 6.7268653851832175e-06, "loss": 0.5548, "step": 234485 }, { "epoch": 2.6, "learning_rate": 6.725942658044705e-06, "loss": 0.5765, "step": 234490 }, { "epoch": 2.6, "learning_rate": 6.725019930906193e-06, "loss": 0.6347, "step": 234495 }, { "epoch": 2.6, "learning_rate": 6.724097203767679e-06, "loss": 0.6056, "step": 234500 }, { "epoch": 2.6, "learning_rate": 6.723174476629167e-06, "loss": 0.5989, "step": 234505 }, { "epoch": 2.6, "learning_rate": 6.722251749490654e-06, "loss": 0.5773, "step": 234510 }, { "epoch": 2.6, "learning_rate": 6.721329022352142e-06, "loss": 0.5667, "step": 234515 }, { "epoch": 2.6, "learning_rate": 6.72040629521363e-06, "loss": 0.5746, "step": 234520 }, { "epoch": 2.6, "learning_rate": 6.719483568075118e-06, "loss": 0.5882, "step": 234525 }, { "epoch": 2.6, "learning_rate": 6.718560840936606e-06, "loss": 0.5966, "step": 234530 }, { "epoch": 2.6, "learning_rate": 6.717638113798093e-06, "loss": 0.5768, "step": 234535 }, { "epoch": 2.6, "learning_rate": 6.7167153866595804e-06, "loss": 0.5682, "step": 234540 }, { "epoch": 2.6, "learning_rate": 6.715792659521068e-06, "loss": 0.544, "step": 234545 }, { "epoch": 2.6, "learning_rate": 6.714869932382556e-06, "loss": 0.5475, "step": 234550 }, { "epoch": 2.6, "learning_rate": 6.713947205244044e-06, "loss": 0.572, "step": 234555 }, { "epoch": 2.6, "learning_rate": 6.713024478105531e-06, "loss": 0.6037, "step": 234560 }, { "epoch": 2.6, "learning_rate": 6.712101750967019e-06, "loss": 0.5976, "step": 234565 }, { "epoch": 2.6, "learning_rate": 6.711179023828507e-06, "loss": 0.5794, "step": 234570 }, { "epoch": 2.6, "learning_rate": 6.710256296689993e-06, "loss": 0.5624, "step": 234575 }, { "epoch": 2.6, "learning_rate": 6.709333569551481e-06, "loss": 0.5459, "step": 234580 }, { "epoch": 2.6, "learning_rate": 6.7084108424129685e-06, "loss": 0.5475, "step": 234585 }, { "epoch": 2.6, "learning_rate": 6.7074881152744555e-06, "loss": 0.5742, "step": 234590 }, { "epoch": 2.6, "learning_rate": 6.706565388135943e-06, "loss": 0.5393, "step": 234595 }, { "epoch": 2.6, "learning_rate": 6.705642660997431e-06, "loss": 0.588, "step": 234600 }, { "epoch": 2.6, "learning_rate": 6.704719933858919e-06, "loss": 0.6033, "step": 234605 }, { "epoch": 2.6, "learning_rate": 6.703797206720407e-06, "loss": 0.6032, "step": 234610 }, { "epoch": 2.6, "learning_rate": 6.702874479581894e-06, "loss": 0.6199, "step": 234615 }, { "epoch": 2.6, "learning_rate": 6.701951752443382e-06, "loss": 0.6416, "step": 234620 }, { "epoch": 2.6, "learning_rate": 6.70102902530487e-06, "loss": 0.5701, "step": 234625 }, { "epoch": 2.6, "learning_rate": 6.7001062981663575e-06, "loss": 0.622, "step": 234630 }, { "epoch": 2.6, "learning_rate": 6.699183571027845e-06, "loss": 0.5537, "step": 234635 }, { "epoch": 2.6, "learning_rate": 6.698260843889332e-06, "loss": 0.5673, "step": 234640 }, { "epoch": 2.6, "learning_rate": 6.69733811675082e-06, "loss": 0.5667, "step": 234645 }, { "epoch": 2.6, "learning_rate": 6.696415389612306e-06, "loss": 0.5566, "step": 234650 }, { "epoch": 2.6, "learning_rate": 6.695492662473794e-06, "loss": 0.5747, "step": 234655 }, { "epoch": 2.6, "learning_rate": 6.694569935335282e-06, "loss": 0.5951, "step": 234660 }, { "epoch": 2.6, "learning_rate": 6.69364720819677e-06, "loss": 0.6181, "step": 234665 }, { "epoch": 2.6, "learning_rate": 6.692724481058258e-06, "loss": 0.5836, "step": 234670 }, { "epoch": 2.6, "learning_rate": 6.691801753919745e-06, "loss": 0.6055, "step": 234675 }, { "epoch": 2.6, "learning_rate": 6.690879026781233e-06, "loss": 0.5351, "step": 234680 }, { "epoch": 2.6, "learning_rate": 6.6899562996427204e-06, "loss": 0.6047, "step": 234685 }, { "epoch": 2.6, "learning_rate": 6.689033572504208e-06, "loss": 0.5941, "step": 234690 }, { "epoch": 2.6, "learning_rate": 6.688110845365696e-06, "loss": 0.5761, "step": 234695 }, { "epoch": 2.6, "learning_rate": 6.687188118227183e-06, "loss": 0.5958, "step": 234700 }, { "epoch": 2.6, "learning_rate": 6.686265391088671e-06, "loss": 0.5672, "step": 234705 }, { "epoch": 2.6, "learning_rate": 6.685342663950159e-06, "loss": 0.5468, "step": 234710 }, { "epoch": 2.6, "learning_rate": 6.684419936811647e-06, "loss": 0.6005, "step": 234715 }, { "epoch": 2.6, "learning_rate": 6.683497209673134e-06, "loss": 0.5833, "step": 234720 }, { "epoch": 2.6, "learning_rate": 6.682574482534621e-06, "loss": 0.5896, "step": 234725 }, { "epoch": 2.6, "learning_rate": 6.681651755396108e-06, "loss": 0.5698, "step": 234730 }, { "epoch": 2.6, "learning_rate": 6.6807290282575955e-06, "loss": 0.5968, "step": 234735 }, { "epoch": 2.6, "learning_rate": 6.679806301119083e-06, "loss": 0.5983, "step": 234740 }, { "epoch": 2.6, "learning_rate": 6.678883573980571e-06, "loss": 0.5524, "step": 234745 }, { "epoch": 2.6, "learning_rate": 6.677960846842059e-06, "loss": 0.5916, "step": 234750 }, { "epoch": 2.6, "learning_rate": 6.677038119703546e-06, "loss": 0.5932, "step": 234755 }, { "epoch": 2.6, "learning_rate": 6.676115392565034e-06, "loss": 0.5646, "step": 234760 }, { "epoch": 2.6, "learning_rate": 6.675192665426522e-06, "loss": 0.6198, "step": 234765 }, { "epoch": 2.6, "learning_rate": 6.67426993828801e-06, "loss": 0.5903, "step": 234770 }, { "epoch": 2.6, "learning_rate": 6.6733472111494975e-06, "loss": 0.5899, "step": 234775 }, { "epoch": 2.6, "learning_rate": 6.6724244840109845e-06, "loss": 0.5911, "step": 234780 }, { "epoch": 2.6, "learning_rate": 6.671501756872472e-06, "loss": 0.5153, "step": 234785 }, { "epoch": 2.6, "learning_rate": 6.67057902973396e-06, "loss": 0.5577, "step": 234790 }, { "epoch": 2.6, "learning_rate": 6.669656302595448e-06, "loss": 0.5847, "step": 234795 }, { "epoch": 2.6, "learning_rate": 6.668733575456934e-06, "loss": 0.5407, "step": 234800 }, { "epoch": 2.6, "learning_rate": 6.667810848318422e-06, "loss": 0.5892, "step": 234805 }, { "epoch": 2.6, "learning_rate": 6.66688812117991e-06, "loss": 0.6036, "step": 234810 }, { "epoch": 2.6, "learning_rate": 6.665965394041397e-06, "loss": 0.5381, "step": 234815 }, { "epoch": 2.6, "learning_rate": 6.665042666902885e-06, "loss": 0.5522, "step": 234820 }, { "epoch": 2.6, "learning_rate": 6.6641199397643726e-06, "loss": 0.5537, "step": 234825 }, { "epoch": 2.6, "learning_rate": 6.66319721262586e-06, "loss": 0.5764, "step": 234830 }, { "epoch": 2.6, "learning_rate": 6.662274485487347e-06, "loss": 0.5975, "step": 234835 }, { "epoch": 2.6, "learning_rate": 6.661351758348835e-06, "loss": 0.551, "step": 234840 }, { "epoch": 2.6, "learning_rate": 6.660429031210323e-06, "loss": 0.556, "step": 234845 }, { "epoch": 2.6, "learning_rate": 6.659506304071811e-06, "loss": 0.6347, "step": 234850 }, { "epoch": 2.6, "learning_rate": 6.658583576933299e-06, "loss": 0.5915, "step": 234855 }, { "epoch": 2.6, "learning_rate": 6.657660849794786e-06, "loss": 0.6093, "step": 234860 }, { "epoch": 2.6, "learning_rate": 6.656738122656274e-06, "loss": 0.6039, "step": 234865 }, { "epoch": 2.6, "learning_rate": 6.6558153955177615e-06, "loss": 0.5831, "step": 234870 }, { "epoch": 2.6, "learning_rate": 6.654892668379249e-06, "loss": 0.5582, "step": 234875 }, { "epoch": 2.6, "learning_rate": 6.6539699412407355e-06, "loss": 0.5399, "step": 234880 }, { "epoch": 2.6, "learning_rate": 6.653047214102223e-06, "loss": 0.6118, "step": 234885 }, { "epoch": 2.6, "learning_rate": 6.652124486963711e-06, "loss": 0.582, "step": 234890 }, { "epoch": 2.6, "learning_rate": 6.651201759825198e-06, "loss": 0.6025, "step": 234895 }, { "epoch": 2.6, "learning_rate": 6.650279032686686e-06, "loss": 0.5639, "step": 234900 }, { "epoch": 2.6, "learning_rate": 6.649356305548174e-06, "loss": 0.5643, "step": 234905 }, { "epoch": 2.6, "learning_rate": 6.648433578409662e-06, "loss": 0.5841, "step": 234910 }, { "epoch": 2.6, "learning_rate": 6.64751085127115e-06, "loss": 0.615, "step": 234915 }, { "epoch": 2.6, "learning_rate": 6.646588124132637e-06, "loss": 0.6279, "step": 234920 }, { "epoch": 2.6, "learning_rate": 6.6456653969941245e-06, "loss": 0.6, "step": 234925 }, { "epoch": 2.6, "learning_rate": 6.644742669855612e-06, "loss": 0.5695, "step": 234930 }, { "epoch": 2.6, "learning_rate": 6.6438199427171e-06, "loss": 0.5729, "step": 234935 }, { "epoch": 2.6, "learning_rate": 6.642897215578588e-06, "loss": 0.6279, "step": 234940 }, { "epoch": 2.6, "learning_rate": 6.641974488440075e-06, "loss": 0.6337, "step": 234945 }, { "epoch": 2.6, "learning_rate": 6.641051761301563e-06, "loss": 0.5522, "step": 234950 }, { "epoch": 2.6, "learning_rate": 6.640129034163049e-06, "loss": 0.5799, "step": 234955 }, { "epoch": 2.6, "learning_rate": 6.639206307024537e-06, "loss": 0.6099, "step": 234960 }, { "epoch": 2.6, "learning_rate": 6.638283579886025e-06, "loss": 0.6288, "step": 234965 }, { "epoch": 2.6, "learning_rate": 6.6373608527475126e-06, "loss": 0.5427, "step": 234970 }, { "epoch": 2.6, "learning_rate": 6.6364381256089996e-06, "loss": 0.5672, "step": 234975 }, { "epoch": 2.6, "learning_rate": 6.635515398470487e-06, "loss": 0.5574, "step": 234980 }, { "epoch": 2.6, "learning_rate": 6.634592671331975e-06, "loss": 0.5452, "step": 234985 }, { "epoch": 2.6, "learning_rate": 6.633669944193463e-06, "loss": 0.5796, "step": 234990 }, { "epoch": 2.6, "learning_rate": 6.632747217054951e-06, "loss": 0.6302, "step": 234995 }, { "epoch": 2.6, "learning_rate": 6.631824489916438e-06, "loss": 0.6167, "step": 235000 }, { "epoch": 2.6, "eval_loss": 0.5450388789176941, "eval_runtime": 69.8447, "eval_samples_per_second": 28.635, "eval_steps_per_second": 14.317, "step": 235000 }, { "epoch": 2.6, "learning_rate": 6.630901762777926e-06, "loss": 0.5543, "step": 235005 }, { "epoch": 2.6, "learning_rate": 6.629979035639414e-06, "loss": 0.6214, "step": 235010 }, { "epoch": 2.6, "learning_rate": 6.6290563085009015e-06, "loss": 0.6017, "step": 235015 }, { "epoch": 2.6, "learning_rate": 6.628133581362389e-06, "loss": 0.5267, "step": 235020 }, { "epoch": 2.6, "learning_rate": 6.627210854223876e-06, "loss": 0.5845, "step": 235025 }, { "epoch": 2.6, "learning_rate": 6.626288127085363e-06, "loss": 0.5629, "step": 235030 }, { "epoch": 2.6, "learning_rate": 6.62536539994685e-06, "loss": 0.5596, "step": 235035 }, { "epoch": 2.6, "learning_rate": 6.624442672808338e-06, "loss": 0.5807, "step": 235040 }, { "epoch": 2.6, "learning_rate": 6.623519945669826e-06, "loss": 0.5927, "step": 235045 }, { "epoch": 2.6, "learning_rate": 6.622597218531314e-06, "loss": 0.5436, "step": 235050 }, { "epoch": 2.6, "learning_rate": 6.621674491392802e-06, "loss": 0.6025, "step": 235055 }, { "epoch": 2.6, "learning_rate": 6.620751764254289e-06, "loss": 0.6066, "step": 235060 }, { "epoch": 2.6, "learning_rate": 6.619829037115777e-06, "loss": 0.5455, "step": 235065 }, { "epoch": 2.6, "learning_rate": 6.6189063099772644e-06, "loss": 0.5701, "step": 235070 }, { "epoch": 2.6, "learning_rate": 6.617983582838752e-06, "loss": 0.5438, "step": 235075 }, { "epoch": 2.6, "learning_rate": 6.61706085570024e-06, "loss": 0.5875, "step": 235080 }, { "epoch": 2.6, "learning_rate": 6.616138128561727e-06, "loss": 0.561, "step": 235085 }, { "epoch": 2.6, "learning_rate": 6.615215401423215e-06, "loss": 0.5902, "step": 235090 }, { "epoch": 2.6, "learning_rate": 6.614292674284703e-06, "loss": 0.5527, "step": 235095 }, { "epoch": 2.6, "learning_rate": 6.613369947146191e-06, "loss": 0.573, "step": 235100 }, { "epoch": 2.6, "learning_rate": 6.612447220007677e-06, "loss": 0.5969, "step": 235105 }, { "epoch": 2.6, "learning_rate": 6.611524492869165e-06, "loss": 0.6297, "step": 235110 }, { "epoch": 2.6, "learning_rate": 6.610601765730652e-06, "loss": 0.6096, "step": 235115 }, { "epoch": 2.6, "learning_rate": 6.6096790385921395e-06, "loss": 0.5218, "step": 235120 }, { "epoch": 2.6, "learning_rate": 6.608756311453627e-06, "loss": 0.541, "step": 235125 }, { "epoch": 2.6, "learning_rate": 6.607833584315115e-06, "loss": 0.5663, "step": 235130 }, { "epoch": 2.6, "learning_rate": 6.606910857176603e-06, "loss": 0.587, "step": 235135 }, { "epoch": 2.6, "learning_rate": 6.60598813003809e-06, "loss": 0.5942, "step": 235140 }, { "epoch": 2.6, "learning_rate": 6.605065402899578e-06, "loss": 0.5902, "step": 235145 }, { "epoch": 2.6, "learning_rate": 6.604142675761066e-06, "loss": 0.5644, "step": 235150 }, { "epoch": 2.6, "learning_rate": 6.603219948622554e-06, "loss": 0.557, "step": 235155 }, { "epoch": 2.6, "learning_rate": 6.6022972214840415e-06, "loss": 0.5751, "step": 235160 }, { "epoch": 2.6, "learning_rate": 6.6013744943455285e-06, "loss": 0.5685, "step": 235165 }, { "epoch": 2.6, "learning_rate": 6.600451767207016e-06, "loss": 0.531, "step": 235170 }, { "epoch": 2.6, "learning_rate": 6.599529040068504e-06, "loss": 0.6304, "step": 235175 }, { "epoch": 2.6, "learning_rate": 6.59860631292999e-06, "loss": 0.5989, "step": 235180 }, { "epoch": 2.6, "learning_rate": 6.597683585791478e-06, "loss": 0.634, "step": 235185 }, { "epoch": 2.6, "learning_rate": 6.596760858652966e-06, "loss": 0.6162, "step": 235190 }, { "epoch": 2.6, "learning_rate": 6.595838131514454e-06, "loss": 0.5772, "step": 235195 }, { "epoch": 2.6, "learning_rate": 6.594915404375941e-06, "loss": 0.5781, "step": 235200 }, { "epoch": 2.6, "learning_rate": 6.593992677237429e-06, "loss": 0.6333, "step": 235205 }, { "epoch": 2.6, "learning_rate": 6.593069950098917e-06, "loss": 0.578, "step": 235210 }, { "epoch": 2.6, "learning_rate": 6.5921472229604044e-06, "loss": 0.5911, "step": 235215 }, { "epoch": 2.6, "learning_rate": 6.591224495821892e-06, "loss": 0.5657, "step": 235220 }, { "epoch": 2.6, "learning_rate": 6.590301768683379e-06, "loss": 0.6066, "step": 235225 }, { "epoch": 2.6, "learning_rate": 6.589379041544867e-06, "loss": 0.5977, "step": 235230 }, { "epoch": 2.6, "learning_rate": 6.588456314406355e-06, "loss": 0.6255, "step": 235235 }, { "epoch": 2.6, "learning_rate": 6.587533587267843e-06, "loss": 0.5448, "step": 235240 }, { "epoch": 2.6, "learning_rate": 6.586610860129331e-06, "loss": 0.5852, "step": 235245 }, { "epoch": 2.6, "learning_rate": 6.585688132990818e-06, "loss": 0.5567, "step": 235250 }, { "epoch": 2.6, "learning_rate": 6.584765405852304e-06, "loss": 0.5744, "step": 235255 }, { "epoch": 2.6, "learning_rate": 6.583842678713792e-06, "loss": 0.5801, "step": 235260 }, { "epoch": 2.61, "learning_rate": 6.5829199515752795e-06, "loss": 0.607, "step": 235265 }, { "epoch": 2.61, "learning_rate": 6.581997224436767e-06, "loss": 0.5929, "step": 235270 }, { "epoch": 2.61, "learning_rate": 6.581074497298255e-06, "loss": 0.5664, "step": 235275 }, { "epoch": 2.61, "learning_rate": 6.580151770159742e-06, "loss": 0.5787, "step": 235280 }, { "epoch": 2.61, "learning_rate": 6.57922904302123e-06, "loss": 0.5701, "step": 235285 }, { "epoch": 2.61, "learning_rate": 6.578306315882718e-06, "loss": 0.5643, "step": 235290 }, { "epoch": 2.61, "learning_rate": 6.577383588744206e-06, "loss": 0.6347, "step": 235295 }, { "epoch": 2.61, "learning_rate": 6.576460861605694e-06, "loss": 0.6361, "step": 235300 }, { "epoch": 2.61, "learning_rate": 6.575538134467181e-06, "loss": 0.5575, "step": 235305 }, { "epoch": 2.61, "learning_rate": 6.5746154073286685e-06, "loss": 0.5768, "step": 235310 }, { "epoch": 2.61, "learning_rate": 6.573692680190156e-06, "loss": 0.648, "step": 235315 }, { "epoch": 2.61, "learning_rate": 6.572769953051644e-06, "loss": 0.5283, "step": 235320 }, { "epoch": 2.61, "learning_rate": 6.571847225913132e-06, "loss": 0.5908, "step": 235325 }, { "epoch": 2.61, "learning_rate": 6.570924498774619e-06, "loss": 0.5823, "step": 235330 }, { "epoch": 2.61, "learning_rate": 6.570001771636106e-06, "loss": 0.5891, "step": 235335 }, { "epoch": 2.61, "learning_rate": 6.569079044497593e-06, "loss": 0.6216, "step": 235340 }, { "epoch": 2.61, "learning_rate": 6.568156317359081e-06, "loss": 0.6221, "step": 235345 }, { "epoch": 2.61, "learning_rate": 6.567233590220569e-06, "loss": 0.5925, "step": 235350 }, { "epoch": 2.61, "learning_rate": 6.5663108630820566e-06, "loss": 0.5722, "step": 235355 }, { "epoch": 2.61, "learning_rate": 6.5653881359435444e-06, "loss": 0.5786, "step": 235360 }, { "epoch": 2.61, "learning_rate": 6.5644654088050314e-06, "loss": 0.6016, "step": 235365 }, { "epoch": 2.61, "learning_rate": 6.563542681666519e-06, "loss": 0.5756, "step": 235370 }, { "epoch": 2.61, "learning_rate": 6.562619954528007e-06, "loss": 0.6619, "step": 235375 }, { "epoch": 2.61, "learning_rate": 6.561697227389495e-06, "loss": 0.5026, "step": 235380 }, { "epoch": 2.61, "learning_rate": 6.560774500250982e-06, "loss": 0.6089, "step": 235385 }, { "epoch": 2.61, "learning_rate": 6.55985177311247e-06, "loss": 0.5389, "step": 235390 }, { "epoch": 2.61, "learning_rate": 6.558929045973958e-06, "loss": 0.568, "step": 235395 }, { "epoch": 2.61, "learning_rate": 6.5580063188354455e-06, "loss": 0.6205, "step": 235400 }, { "epoch": 2.61, "learning_rate": 6.557083591696933e-06, "loss": 0.5705, "step": 235405 }, { "epoch": 2.61, "learning_rate": 6.5561608645584195e-06, "loss": 0.5427, "step": 235410 }, { "epoch": 2.61, "learning_rate": 6.555238137419907e-06, "loss": 0.6035, "step": 235415 }, { "epoch": 2.61, "learning_rate": 6.554315410281394e-06, "loss": 0.5797, "step": 235420 }, { "epoch": 2.61, "learning_rate": 6.553392683142882e-06, "loss": 0.5386, "step": 235425 }, { "epoch": 2.61, "learning_rate": 6.55246995600437e-06, "loss": 0.5346, "step": 235430 }, { "epoch": 2.61, "learning_rate": 6.551547228865858e-06, "loss": 0.5579, "step": 235435 }, { "epoch": 2.61, "learning_rate": 6.550624501727346e-06, "loss": 0.5489, "step": 235440 }, { "epoch": 2.61, "learning_rate": 6.549701774588833e-06, "loss": 0.5856, "step": 235445 }, { "epoch": 2.61, "learning_rate": 6.548779047450321e-06, "loss": 0.5903, "step": 235450 }, { "epoch": 2.61, "learning_rate": 6.5478563203118085e-06, "loss": 0.6255, "step": 235455 }, { "epoch": 2.61, "learning_rate": 6.546933593173296e-06, "loss": 0.5572, "step": 235460 }, { "epoch": 2.61, "learning_rate": 6.546010866034784e-06, "loss": 0.5468, "step": 235465 }, { "epoch": 2.61, "learning_rate": 6.545088138896271e-06, "loss": 0.5505, "step": 235470 }, { "epoch": 2.61, "learning_rate": 6.544165411757759e-06, "loss": 0.5845, "step": 235475 }, { "epoch": 2.61, "learning_rate": 6.543242684619247e-06, "loss": 0.6063, "step": 235480 }, { "epoch": 2.61, "learning_rate": 6.542319957480733e-06, "loss": 0.5854, "step": 235485 }, { "epoch": 2.61, "learning_rate": 6.541397230342221e-06, "loss": 0.5251, "step": 235490 }, { "epoch": 2.61, "learning_rate": 6.540474503203709e-06, "loss": 0.591, "step": 235495 }, { "epoch": 2.61, "learning_rate": 6.5395517760651966e-06, "loss": 0.5618, "step": 235500 }, { "epoch": 2.61, "learning_rate": 6.5386290489266836e-06, "loss": 0.5641, "step": 235505 }, { "epoch": 2.61, "learning_rate": 6.537706321788171e-06, "loss": 0.5929, "step": 235510 }, { "epoch": 2.61, "learning_rate": 6.536783594649659e-06, "loss": 0.5814, "step": 235515 }, { "epoch": 2.61, "learning_rate": 6.535860867511147e-06, "loss": 0.5934, "step": 235520 }, { "epoch": 2.61, "learning_rate": 6.534938140372634e-06, "loss": 0.5902, "step": 235525 }, { "epoch": 2.61, "learning_rate": 6.534015413234122e-06, "loss": 0.5763, "step": 235530 }, { "epoch": 2.61, "learning_rate": 6.53309268609561e-06, "loss": 0.6003, "step": 235535 }, { "epoch": 2.61, "learning_rate": 6.532169958957098e-06, "loss": 0.6365, "step": 235540 }, { "epoch": 2.61, "learning_rate": 6.5312472318185855e-06, "loss": 0.5432, "step": 235545 }, { "epoch": 2.61, "learning_rate": 6.5303245046800725e-06, "loss": 0.5895, "step": 235550 }, { "epoch": 2.61, "learning_rate": 6.52940177754156e-06, "loss": 0.6089, "step": 235555 }, { "epoch": 2.61, "learning_rate": 6.5284790504030465e-06, "loss": 0.5846, "step": 235560 }, { "epoch": 2.61, "learning_rate": 6.527556323264534e-06, "loss": 0.613, "step": 235565 }, { "epoch": 2.61, "learning_rate": 6.526633596126022e-06, "loss": 0.5642, "step": 235570 }, { "epoch": 2.61, "learning_rate": 6.52571086898751e-06, "loss": 0.5915, "step": 235575 }, { "epoch": 2.61, "learning_rate": 6.524788141848998e-06, "loss": 0.4973, "step": 235580 }, { "epoch": 2.61, "learning_rate": 6.523865414710485e-06, "loss": 0.5266, "step": 235585 }, { "epoch": 2.61, "learning_rate": 6.522942687571973e-06, "loss": 0.5793, "step": 235590 }, { "epoch": 2.61, "learning_rate": 6.522019960433461e-06, "loss": 0.5953, "step": 235595 }, { "epoch": 2.61, "learning_rate": 6.5210972332949485e-06, "loss": 0.5652, "step": 235600 }, { "epoch": 2.61, "learning_rate": 6.520174506156436e-06, "loss": 0.5305, "step": 235605 }, { "epoch": 2.61, "learning_rate": 6.519251779017923e-06, "loss": 0.5777, "step": 235610 }, { "epoch": 2.61, "learning_rate": 6.518329051879411e-06, "loss": 0.6066, "step": 235615 }, { "epoch": 2.61, "learning_rate": 6.517406324740899e-06, "loss": 0.5631, "step": 235620 }, { "epoch": 2.61, "learning_rate": 6.516483597602387e-06, "loss": 0.5733, "step": 235625 }, { "epoch": 2.61, "learning_rate": 6.515560870463875e-06, "loss": 0.5829, "step": 235630 }, { "epoch": 2.61, "learning_rate": 6.514638143325361e-06, "loss": 0.6033, "step": 235635 }, { "epoch": 2.61, "learning_rate": 6.513715416186848e-06, "loss": 0.5801, "step": 235640 }, { "epoch": 2.61, "learning_rate": 6.512792689048336e-06, "loss": 0.5473, "step": 235645 }, { "epoch": 2.61, "learning_rate": 6.5118699619098236e-06, "loss": 0.5318, "step": 235650 }, { "epoch": 2.61, "learning_rate": 6.510947234771311e-06, "loss": 0.5541, "step": 235655 }, { "epoch": 2.61, "learning_rate": 6.510024507632799e-06, "loss": 0.5355, "step": 235660 }, { "epoch": 2.61, "learning_rate": 6.509101780494286e-06, "loss": 0.5847, "step": 235665 }, { "epoch": 2.61, "learning_rate": 6.508179053355774e-06, "loss": 0.5975, "step": 235670 }, { "epoch": 2.61, "learning_rate": 6.507256326217262e-06, "loss": 0.5246, "step": 235675 }, { "epoch": 2.61, "learning_rate": 6.50633359907875e-06, "loss": 0.5292, "step": 235680 }, { "epoch": 2.61, "learning_rate": 6.505410871940238e-06, "loss": 0.6017, "step": 235685 }, { "epoch": 2.61, "learning_rate": 6.504488144801725e-06, "loss": 0.5972, "step": 235690 }, { "epoch": 2.61, "learning_rate": 6.5035654176632125e-06, "loss": 0.5605, "step": 235695 }, { "epoch": 2.61, "learning_rate": 6.5026426905247e-06, "loss": 0.5702, "step": 235700 }, { "epoch": 2.61, "learning_rate": 6.501719963386188e-06, "loss": 0.6535, "step": 235705 }, { "epoch": 2.61, "learning_rate": 6.500797236247676e-06, "loss": 0.5867, "step": 235710 }, { "epoch": 2.61, "learning_rate": 6.499874509109162e-06, "loss": 0.5499, "step": 235715 }, { "epoch": 2.61, "learning_rate": 6.49895178197065e-06, "loss": 0.582, "step": 235720 }, { "epoch": 2.61, "learning_rate": 6.498029054832137e-06, "loss": 0.5753, "step": 235725 }, { "epoch": 2.61, "learning_rate": 6.497106327693625e-06, "loss": 0.5815, "step": 235730 }, { "epoch": 2.61, "learning_rate": 6.496183600555113e-06, "loss": 0.5569, "step": 235735 }, { "epoch": 2.61, "learning_rate": 6.495260873416601e-06, "loss": 0.618, "step": 235740 }, { "epoch": 2.61, "learning_rate": 6.4943381462780884e-06, "loss": 0.6386, "step": 235745 }, { "epoch": 2.61, "learning_rate": 6.4934154191395754e-06, "loss": 0.5706, "step": 235750 }, { "epoch": 2.61, "learning_rate": 6.492492692001063e-06, "loss": 0.5689, "step": 235755 }, { "epoch": 2.61, "learning_rate": 6.491569964862551e-06, "loss": 0.5562, "step": 235760 }, { "epoch": 2.61, "learning_rate": 6.490647237724039e-06, "loss": 0.5598, "step": 235765 }, { "epoch": 2.61, "learning_rate": 6.489724510585527e-06, "loss": 0.6162, "step": 235770 }, { "epoch": 2.61, "learning_rate": 6.488801783447014e-06, "loss": 0.6062, "step": 235775 }, { "epoch": 2.61, "learning_rate": 6.487879056308502e-06, "loss": 0.6443, "step": 235780 }, { "epoch": 2.61, "learning_rate": 6.4869563291699895e-06, "loss": 0.6146, "step": 235785 }, { "epoch": 2.61, "learning_rate": 6.486033602031476e-06, "loss": 0.5831, "step": 235790 }, { "epoch": 2.61, "learning_rate": 6.4851108748929635e-06, "loss": 0.532, "step": 235795 }, { "epoch": 2.61, "learning_rate": 6.484188147754451e-06, "loss": 0.5818, "step": 235800 }, { "epoch": 2.61, "learning_rate": 6.483265420615938e-06, "loss": 0.5477, "step": 235805 }, { "epoch": 2.61, "learning_rate": 6.482342693477426e-06, "loss": 0.5964, "step": 235810 }, { "epoch": 2.61, "learning_rate": 6.481419966338914e-06, "loss": 0.5334, "step": 235815 }, { "epoch": 2.61, "learning_rate": 6.480497239200402e-06, "loss": 0.5978, "step": 235820 }, { "epoch": 2.61, "learning_rate": 6.47957451206189e-06, "loss": 0.5989, "step": 235825 }, { "epoch": 2.61, "learning_rate": 6.478651784923377e-06, "loss": 0.5596, "step": 235830 }, { "epoch": 2.61, "learning_rate": 6.477729057784865e-06, "loss": 0.6209, "step": 235835 }, { "epoch": 2.61, "learning_rate": 6.4768063306463525e-06, "loss": 0.5367, "step": 235840 }, { "epoch": 2.61, "learning_rate": 6.47588360350784e-06, "loss": 0.6363, "step": 235845 }, { "epoch": 2.61, "learning_rate": 6.474960876369328e-06, "loss": 0.5906, "step": 235850 }, { "epoch": 2.61, "learning_rate": 6.474038149230815e-06, "loss": 0.6135, "step": 235855 }, { "epoch": 2.61, "learning_rate": 6.473115422092303e-06, "loss": 0.6119, "step": 235860 }, { "epoch": 2.61, "learning_rate": 6.472192694953789e-06, "loss": 0.5699, "step": 235865 }, { "epoch": 2.61, "learning_rate": 6.471269967815277e-06, "loss": 0.6292, "step": 235870 }, { "epoch": 2.61, "learning_rate": 6.470347240676765e-06, "loss": 0.6023, "step": 235875 }, { "epoch": 2.61, "learning_rate": 6.469424513538253e-06, "loss": 0.5442, "step": 235880 }, { "epoch": 2.61, "learning_rate": 6.468501786399741e-06, "loss": 0.6215, "step": 235885 }, { "epoch": 2.61, "learning_rate": 6.467579059261228e-06, "loss": 0.5973, "step": 235890 }, { "epoch": 2.61, "learning_rate": 6.4666563321227154e-06, "loss": 0.5425, "step": 235895 }, { "epoch": 2.61, "learning_rate": 6.465733604984203e-06, "loss": 0.5604, "step": 235900 }, { "epoch": 2.61, "learning_rate": 6.464810877845691e-06, "loss": 0.6132, "step": 235905 }, { "epoch": 2.61, "learning_rate": 6.463888150707179e-06, "loss": 0.5825, "step": 235910 }, { "epoch": 2.61, "learning_rate": 6.462965423568666e-06, "loss": 0.6025, "step": 235915 }, { "epoch": 2.61, "learning_rate": 6.462042696430154e-06, "loss": 0.6288, "step": 235920 }, { "epoch": 2.61, "learning_rate": 6.461119969291642e-06, "loss": 0.6185, "step": 235925 }, { "epoch": 2.61, "learning_rate": 6.4601972421531295e-06, "loss": 0.5968, "step": 235930 }, { "epoch": 2.61, "learning_rate": 6.4592745150146165e-06, "loss": 0.5945, "step": 235935 }, { "epoch": 2.61, "learning_rate": 6.4583517878761035e-06, "loss": 0.5151, "step": 235940 }, { "epoch": 2.61, "learning_rate": 6.4574290607375905e-06, "loss": 0.583, "step": 235945 }, { "epoch": 2.61, "learning_rate": 6.456506333599078e-06, "loss": 0.6302, "step": 235950 }, { "epoch": 2.61, "learning_rate": 6.455583606460566e-06, "loss": 0.6151, "step": 235955 }, { "epoch": 2.61, "learning_rate": 6.454660879322054e-06, "loss": 0.6267, "step": 235960 }, { "epoch": 2.61, "learning_rate": 6.453738152183542e-06, "loss": 0.6158, "step": 235965 }, { "epoch": 2.61, "learning_rate": 6.452815425045029e-06, "loss": 0.6016, "step": 235970 }, { "epoch": 2.61, "learning_rate": 6.451892697906517e-06, "loss": 0.5845, "step": 235975 }, { "epoch": 2.61, "learning_rate": 6.450969970768005e-06, "loss": 0.5611, "step": 235980 }, { "epoch": 2.61, "learning_rate": 6.4500472436294925e-06, "loss": 0.5614, "step": 235985 }, { "epoch": 2.61, "learning_rate": 6.44912451649098e-06, "loss": 0.6281, "step": 235990 }, { "epoch": 2.61, "learning_rate": 6.448201789352467e-06, "loss": 0.585, "step": 235995 }, { "epoch": 2.61, "learning_rate": 6.447279062213955e-06, "loss": 0.5828, "step": 236000 }, { "epoch": 2.61, "eval_loss": 0.5627485513687134, "eval_runtime": 69.9734, "eval_samples_per_second": 28.582, "eval_steps_per_second": 14.291, "step": 236000 }, { "epoch": 2.61, "learning_rate": 6.446356335075443e-06, "loss": 0.5737, "step": 236005 }, { "epoch": 2.61, "learning_rate": 6.445433607936931e-06, "loss": 0.5722, "step": 236010 }, { "epoch": 2.61, "learning_rate": 6.444510880798417e-06, "loss": 0.5556, "step": 236015 }, { "epoch": 2.61, "learning_rate": 6.443588153659905e-06, "loss": 0.5642, "step": 236020 }, { "epoch": 2.61, "learning_rate": 6.442665426521393e-06, "loss": 0.5708, "step": 236025 }, { "epoch": 2.61, "learning_rate": 6.44174269938288e-06, "loss": 0.5964, "step": 236030 }, { "epoch": 2.61, "learning_rate": 6.4408199722443676e-06, "loss": 0.5867, "step": 236035 }, { "epoch": 2.61, "learning_rate": 6.439897245105855e-06, "loss": 0.5921, "step": 236040 }, { "epoch": 2.61, "learning_rate": 6.438974517967343e-06, "loss": 0.5707, "step": 236045 }, { "epoch": 2.61, "learning_rate": 6.438051790828831e-06, "loss": 0.5633, "step": 236050 }, { "epoch": 2.61, "learning_rate": 6.437129063690318e-06, "loss": 0.5606, "step": 236055 }, { "epoch": 2.61, "learning_rate": 6.436206336551806e-06, "loss": 0.5762, "step": 236060 }, { "epoch": 2.61, "learning_rate": 6.435283609413294e-06, "loss": 0.6164, "step": 236065 }, { "epoch": 2.61, "learning_rate": 6.434360882274782e-06, "loss": 0.5872, "step": 236070 }, { "epoch": 2.61, "learning_rate": 6.433438155136269e-06, "loss": 0.6225, "step": 236075 }, { "epoch": 2.61, "learning_rate": 6.4325154279977565e-06, "loss": 0.5698, "step": 236080 }, { "epoch": 2.61, "learning_rate": 6.431592700859244e-06, "loss": 0.6385, "step": 236085 }, { "epoch": 2.61, "learning_rate": 6.4306699737207305e-06, "loss": 0.5491, "step": 236090 }, { "epoch": 2.61, "learning_rate": 6.429747246582218e-06, "loss": 0.5925, "step": 236095 }, { "epoch": 2.61, "learning_rate": 6.428824519443706e-06, "loss": 0.6053, "step": 236100 }, { "epoch": 2.61, "learning_rate": 6.427901792305194e-06, "loss": 0.592, "step": 236105 }, { "epoch": 2.61, "learning_rate": 6.426979065166681e-06, "loss": 0.6376, "step": 236110 }, { "epoch": 2.61, "learning_rate": 6.426056338028169e-06, "loss": 0.534, "step": 236115 }, { "epoch": 2.61, "learning_rate": 6.425133610889657e-06, "loss": 0.5907, "step": 236120 }, { "epoch": 2.61, "learning_rate": 6.424210883751145e-06, "loss": 0.6331, "step": 236125 }, { "epoch": 2.61, "learning_rate": 6.4232881566126325e-06, "loss": 0.5704, "step": 236130 }, { "epoch": 2.61, "learning_rate": 6.4223654294741195e-06, "loss": 0.5794, "step": 236135 }, { "epoch": 2.61, "learning_rate": 6.421442702335607e-06, "loss": 0.5343, "step": 236140 }, { "epoch": 2.61, "learning_rate": 6.420519975197095e-06, "loss": 0.5815, "step": 236145 }, { "epoch": 2.61, "learning_rate": 6.419597248058583e-06, "loss": 0.574, "step": 236150 }, { "epoch": 2.61, "learning_rate": 6.418674520920071e-06, "loss": 0.5763, "step": 236155 }, { "epoch": 2.61, "learning_rate": 6.417751793781558e-06, "loss": 0.5563, "step": 236160 }, { "epoch": 2.61, "learning_rate": 6.416829066643046e-06, "loss": 0.5864, "step": 236165 }, { "epoch": 2.62, "learning_rate": 6.415906339504532e-06, "loss": 0.6082, "step": 236170 }, { "epoch": 2.62, "learning_rate": 6.41498361236602e-06, "loss": 0.5528, "step": 236175 }, { "epoch": 2.62, "learning_rate": 6.4140608852275076e-06, "loss": 0.5606, "step": 236180 }, { "epoch": 2.62, "learning_rate": 6.413138158088995e-06, "loss": 0.6249, "step": 236185 }, { "epoch": 2.62, "learning_rate": 6.412215430950482e-06, "loss": 0.5368, "step": 236190 }, { "epoch": 2.62, "learning_rate": 6.41129270381197e-06, "loss": 0.5836, "step": 236195 }, { "epoch": 2.62, "learning_rate": 6.410369976673458e-06, "loss": 0.6087, "step": 236200 }, { "epoch": 2.62, "learning_rate": 6.409447249534946e-06, "loss": 0.6008, "step": 236205 }, { "epoch": 2.62, "learning_rate": 6.408524522396434e-06, "loss": 0.6263, "step": 236210 }, { "epoch": 2.62, "learning_rate": 6.407601795257921e-06, "loss": 0.6015, "step": 236215 }, { "epoch": 2.62, "learning_rate": 6.406679068119409e-06, "loss": 0.5824, "step": 236220 }, { "epoch": 2.62, "learning_rate": 6.4057563409808965e-06, "loss": 0.6011, "step": 236225 }, { "epoch": 2.62, "learning_rate": 6.404833613842384e-06, "loss": 0.6144, "step": 236230 }, { "epoch": 2.62, "learning_rate": 6.403910886703872e-06, "loss": 0.5284, "step": 236235 }, { "epoch": 2.62, "learning_rate": 6.402988159565359e-06, "loss": 0.544, "step": 236240 }, { "epoch": 2.62, "learning_rate": 6.402065432426846e-06, "loss": 0.5322, "step": 236245 }, { "epoch": 2.62, "learning_rate": 6.401142705288333e-06, "loss": 0.6001, "step": 236250 }, { "epoch": 2.62, "learning_rate": 6.400219978149821e-06, "loss": 0.5638, "step": 236255 }, { "epoch": 2.62, "learning_rate": 6.399297251011309e-06, "loss": 0.5669, "step": 236260 }, { "epoch": 2.62, "learning_rate": 6.398374523872797e-06, "loss": 0.6294, "step": 236265 }, { "epoch": 2.62, "learning_rate": 6.397451796734285e-06, "loss": 0.622, "step": 236270 }, { "epoch": 2.62, "learning_rate": 6.396529069595772e-06, "loss": 0.5925, "step": 236275 }, { "epoch": 2.62, "learning_rate": 6.3956063424572595e-06, "loss": 0.5913, "step": 236280 }, { "epoch": 2.62, "learning_rate": 6.394683615318747e-06, "loss": 0.5784, "step": 236285 }, { "epoch": 2.62, "learning_rate": 6.393760888180235e-06, "loss": 0.5003, "step": 236290 }, { "epoch": 2.62, "learning_rate": 6.392838161041723e-06, "loss": 0.6019, "step": 236295 }, { "epoch": 2.62, "learning_rate": 6.39191543390321e-06, "loss": 0.5844, "step": 236300 }, { "epoch": 2.62, "learning_rate": 6.390992706764698e-06, "loss": 0.5907, "step": 236305 }, { "epoch": 2.62, "learning_rate": 6.390069979626186e-06, "loss": 0.5903, "step": 236310 }, { "epoch": 2.62, "learning_rate": 6.3891472524876735e-06, "loss": 0.5783, "step": 236315 }, { "epoch": 2.62, "learning_rate": 6.38822452534916e-06, "loss": 0.6236, "step": 236320 }, { "epoch": 2.62, "learning_rate": 6.3873017982106475e-06, "loss": 0.5611, "step": 236325 }, { "epoch": 2.62, "learning_rate": 6.3863790710721346e-06, "loss": 0.5518, "step": 236330 }, { "epoch": 2.62, "learning_rate": 6.385456343933622e-06, "loss": 0.5514, "step": 236335 }, { "epoch": 2.62, "learning_rate": 6.38453361679511e-06, "loss": 0.5177, "step": 236340 }, { "epoch": 2.62, "learning_rate": 6.383610889656598e-06, "loss": 0.6071, "step": 236345 }, { "epoch": 2.62, "learning_rate": 6.382688162518086e-06, "loss": 0.5621, "step": 236350 }, { "epoch": 2.62, "learning_rate": 6.381765435379573e-06, "loss": 0.5291, "step": 236355 }, { "epoch": 2.62, "learning_rate": 6.380842708241061e-06, "loss": 0.6138, "step": 236360 }, { "epoch": 2.62, "learning_rate": 6.379919981102549e-06, "loss": 0.6288, "step": 236365 }, { "epoch": 2.62, "learning_rate": 6.3789972539640365e-06, "loss": 0.5518, "step": 236370 }, { "epoch": 2.62, "learning_rate": 6.378074526825524e-06, "loss": 0.5615, "step": 236375 }, { "epoch": 2.62, "learning_rate": 6.377151799687011e-06, "loss": 0.5738, "step": 236380 }, { "epoch": 2.62, "learning_rate": 6.376229072548499e-06, "loss": 0.5977, "step": 236385 }, { "epoch": 2.62, "learning_rate": 6.375306345409987e-06, "loss": 0.6285, "step": 236390 }, { "epoch": 2.62, "learning_rate": 6.374383618271473e-06, "loss": 0.6079, "step": 236395 }, { "epoch": 2.62, "learning_rate": 6.373460891132961e-06, "loss": 0.6052, "step": 236400 }, { "epoch": 2.62, "learning_rate": 6.372538163994449e-06, "loss": 0.5606, "step": 236405 }, { "epoch": 2.62, "learning_rate": 6.371615436855937e-06, "loss": 0.5835, "step": 236410 }, { "epoch": 2.62, "learning_rate": 6.370692709717424e-06, "loss": 0.5992, "step": 236415 }, { "epoch": 2.62, "learning_rate": 6.369769982578912e-06, "loss": 0.6127, "step": 236420 }, { "epoch": 2.62, "learning_rate": 6.3688472554403994e-06, "loss": 0.5622, "step": 236425 }, { "epoch": 2.62, "learning_rate": 6.367924528301887e-06, "loss": 0.5919, "step": 236430 }, { "epoch": 2.62, "learning_rate": 6.367001801163375e-06, "loss": 0.576, "step": 236435 }, { "epoch": 2.62, "learning_rate": 6.366079074024862e-06, "loss": 0.5949, "step": 236440 }, { "epoch": 2.62, "learning_rate": 6.36515634688635e-06, "loss": 0.5624, "step": 236445 }, { "epoch": 2.62, "learning_rate": 6.364233619747838e-06, "loss": 0.5595, "step": 236450 }, { "epoch": 2.62, "learning_rate": 6.363310892609326e-06, "loss": 0.593, "step": 236455 }, { "epoch": 2.62, "learning_rate": 6.3623881654708135e-06, "loss": 0.5094, "step": 236460 }, { "epoch": 2.62, "learning_rate": 6.3614654383323005e-06, "loss": 0.6037, "step": 236465 }, { "epoch": 2.62, "learning_rate": 6.360542711193787e-06, "loss": 0.5321, "step": 236470 }, { "epoch": 2.62, "learning_rate": 6.3596199840552745e-06, "loss": 0.5873, "step": 236475 }, { "epoch": 2.62, "learning_rate": 6.358697256916762e-06, "loss": 0.6003, "step": 236480 }, { "epoch": 2.62, "learning_rate": 6.35777452977825e-06, "loss": 0.5939, "step": 236485 }, { "epoch": 2.62, "learning_rate": 6.356851802639738e-06, "loss": 0.5624, "step": 236490 }, { "epoch": 2.62, "learning_rate": 6.355929075501225e-06, "loss": 0.6183, "step": 236495 }, { "epoch": 2.62, "learning_rate": 6.355006348362713e-06, "loss": 0.5299, "step": 236500 }, { "epoch": 2.62, "learning_rate": 6.354083621224201e-06, "loss": 0.5874, "step": 236505 }, { "epoch": 2.62, "learning_rate": 6.353160894085689e-06, "loss": 0.5501, "step": 236510 }, { "epoch": 2.62, "learning_rate": 6.3522381669471765e-06, "loss": 0.5852, "step": 236515 }, { "epoch": 2.62, "learning_rate": 6.3513154398086635e-06, "loss": 0.5882, "step": 236520 }, { "epoch": 2.62, "learning_rate": 6.350392712670151e-06, "loss": 0.5756, "step": 236525 }, { "epoch": 2.62, "learning_rate": 6.349469985531639e-06, "loss": 0.5853, "step": 236530 }, { "epoch": 2.62, "learning_rate": 6.348547258393127e-06, "loss": 0.5969, "step": 236535 }, { "epoch": 2.62, "learning_rate": 6.347624531254615e-06, "loss": 0.5571, "step": 236540 }, { "epoch": 2.62, "learning_rate": 6.346701804116102e-06, "loss": 0.6091, "step": 236545 }, { "epoch": 2.62, "learning_rate": 6.345779076977589e-06, "loss": 0.5699, "step": 236550 }, { "epoch": 2.62, "learning_rate": 6.344856349839076e-06, "loss": 0.5569, "step": 236555 }, { "epoch": 2.62, "learning_rate": 6.343933622700564e-06, "loss": 0.6629, "step": 236560 }, { "epoch": 2.62, "learning_rate": 6.343010895562052e-06, "loss": 0.5364, "step": 236565 }, { "epoch": 2.62, "learning_rate": 6.3420881684235394e-06, "loss": 0.5944, "step": 236570 }, { "epoch": 2.62, "learning_rate": 6.341165441285027e-06, "loss": 0.5958, "step": 236575 }, { "epoch": 2.62, "learning_rate": 6.340242714146514e-06, "loss": 0.5589, "step": 236580 }, { "epoch": 2.62, "learning_rate": 6.339319987008002e-06, "loss": 0.6189, "step": 236585 }, { "epoch": 2.62, "learning_rate": 6.33839725986949e-06, "loss": 0.5725, "step": 236590 }, { "epoch": 2.62, "learning_rate": 6.337474532730978e-06, "loss": 0.608, "step": 236595 }, { "epoch": 2.62, "learning_rate": 6.336551805592466e-06, "loss": 0.5998, "step": 236600 }, { "epoch": 2.62, "learning_rate": 6.335629078453953e-06, "loss": 0.5769, "step": 236605 }, { "epoch": 2.62, "learning_rate": 6.3347063513154405e-06, "loss": 0.5416, "step": 236610 }, { "epoch": 2.62, "learning_rate": 6.333783624176928e-06, "loss": 0.6445, "step": 236615 }, { "epoch": 2.62, "learning_rate": 6.332860897038416e-06, "loss": 0.5726, "step": 236620 }, { "epoch": 2.62, "learning_rate": 6.331938169899902e-06, "loss": 0.5951, "step": 236625 }, { "epoch": 2.62, "learning_rate": 6.33101544276139e-06, "loss": 0.5421, "step": 236630 }, { "epoch": 2.62, "learning_rate": 6.330092715622877e-06, "loss": 0.5954, "step": 236635 }, { "epoch": 2.62, "learning_rate": 6.329169988484365e-06, "loss": 0.5469, "step": 236640 }, { "epoch": 2.62, "learning_rate": 6.328247261345853e-06, "loss": 0.5481, "step": 236645 }, { "epoch": 2.62, "learning_rate": 6.327324534207341e-06, "loss": 0.5861, "step": 236650 }, { "epoch": 2.62, "learning_rate": 6.326401807068829e-06, "loss": 0.5982, "step": 236655 }, { "epoch": 2.62, "learning_rate": 6.325479079930316e-06, "loss": 0.5769, "step": 236660 }, { "epoch": 2.62, "learning_rate": 6.3245563527918035e-06, "loss": 0.6069, "step": 236665 }, { "epoch": 2.62, "learning_rate": 6.323633625653291e-06, "loss": 0.5863, "step": 236670 }, { "epoch": 2.62, "learning_rate": 6.322710898514779e-06, "loss": 0.5977, "step": 236675 }, { "epoch": 2.62, "learning_rate": 6.321788171376267e-06, "loss": 0.5975, "step": 236680 }, { "epoch": 2.62, "learning_rate": 6.320865444237754e-06, "loss": 0.5757, "step": 236685 }, { "epoch": 2.62, "learning_rate": 6.319942717099242e-06, "loss": 0.6027, "step": 236690 }, { "epoch": 2.62, "learning_rate": 6.31901998996073e-06, "loss": 0.6062, "step": 236695 }, { "epoch": 2.62, "learning_rate": 6.318097262822216e-06, "loss": 0.6024, "step": 236700 }, { "epoch": 2.62, "learning_rate": 6.317174535683704e-06, "loss": 0.57, "step": 236705 }, { "epoch": 2.62, "learning_rate": 6.3162518085451916e-06, "loss": 0.5594, "step": 236710 }, { "epoch": 2.62, "learning_rate": 6.315329081406679e-06, "loss": 0.5891, "step": 236715 }, { "epoch": 2.62, "learning_rate": 6.314406354268166e-06, "loss": 0.5418, "step": 236720 }, { "epoch": 2.62, "learning_rate": 6.313483627129654e-06, "loss": 0.6123, "step": 236725 }, { "epoch": 2.62, "learning_rate": 6.312560899991142e-06, "loss": 0.5622, "step": 236730 }, { "epoch": 2.62, "learning_rate": 6.31163817285263e-06, "loss": 0.6233, "step": 236735 }, { "epoch": 2.62, "learning_rate": 6.310715445714117e-06, "loss": 0.6184, "step": 236740 }, { "epoch": 2.62, "learning_rate": 6.309792718575605e-06, "loss": 0.5495, "step": 236745 }, { "epoch": 2.62, "learning_rate": 6.308869991437093e-06, "loss": 0.6037, "step": 236750 }, { "epoch": 2.62, "learning_rate": 6.3079472642985805e-06, "loss": 0.5412, "step": 236755 }, { "epoch": 2.62, "learning_rate": 6.307024537160068e-06, "loss": 0.5916, "step": 236760 }, { "epoch": 2.62, "learning_rate": 6.306101810021555e-06, "loss": 0.5817, "step": 236765 }, { "epoch": 2.62, "learning_rate": 6.305179082883043e-06, "loss": 0.595, "step": 236770 }, { "epoch": 2.62, "learning_rate": 6.304256355744529e-06, "loss": 0.5517, "step": 236775 }, { "epoch": 2.62, "learning_rate": 6.303333628606017e-06, "loss": 0.5877, "step": 236780 }, { "epoch": 2.62, "learning_rate": 6.302410901467505e-06, "loss": 0.6136, "step": 236785 }, { "epoch": 2.62, "learning_rate": 6.301488174328993e-06, "loss": 0.6043, "step": 236790 }, { "epoch": 2.62, "learning_rate": 6.300565447190481e-06, "loss": 0.5553, "step": 236795 }, { "epoch": 2.62, "learning_rate": 6.299642720051968e-06, "loss": 0.553, "step": 236800 }, { "epoch": 2.62, "learning_rate": 6.298719992913456e-06, "loss": 0.6143, "step": 236805 }, { "epoch": 2.62, "learning_rate": 6.2977972657749435e-06, "loss": 0.6344, "step": 236810 }, { "epoch": 2.62, "learning_rate": 6.296874538636431e-06, "loss": 0.5784, "step": 236815 }, { "epoch": 2.62, "learning_rate": 6.295951811497919e-06, "loss": 0.5654, "step": 236820 }, { "epoch": 2.62, "learning_rate": 6.295029084359406e-06, "loss": 0.6273, "step": 236825 }, { "epoch": 2.62, "learning_rate": 6.294106357220894e-06, "loss": 0.5534, "step": 236830 }, { "epoch": 2.62, "learning_rate": 6.293183630082382e-06, "loss": 0.5604, "step": 236835 }, { "epoch": 2.62, "learning_rate": 6.29226090294387e-06, "loss": 0.5935, "step": 236840 }, { "epoch": 2.62, "learning_rate": 6.2913381758053576e-06, "loss": 0.595, "step": 236845 }, { "epoch": 2.62, "learning_rate": 6.290415448666844e-06, "loss": 0.604, "step": 236850 }, { "epoch": 2.62, "learning_rate": 6.289492721528331e-06, "loss": 0.5803, "step": 236855 }, { "epoch": 2.62, "learning_rate": 6.2885699943898186e-06, "loss": 0.5465, "step": 236860 }, { "epoch": 2.62, "learning_rate": 6.287647267251306e-06, "loss": 0.5567, "step": 236865 }, { "epoch": 2.62, "learning_rate": 6.286724540112794e-06, "loss": 0.564, "step": 236870 }, { "epoch": 2.62, "learning_rate": 6.285801812974282e-06, "loss": 0.5727, "step": 236875 }, { "epoch": 2.62, "learning_rate": 6.284879085835769e-06, "loss": 0.5693, "step": 236880 }, { "epoch": 2.62, "learning_rate": 6.283956358697257e-06, "loss": 0.5807, "step": 236885 }, { "epoch": 2.62, "learning_rate": 6.283033631558745e-06, "loss": 0.5795, "step": 236890 }, { "epoch": 2.62, "learning_rate": 6.282110904420233e-06, "loss": 0.5721, "step": 236895 }, { "epoch": 2.62, "learning_rate": 6.2811881772817205e-06, "loss": 0.6031, "step": 236900 }, { "epoch": 2.62, "learning_rate": 6.2802654501432075e-06, "loss": 0.5459, "step": 236905 }, { "epoch": 2.62, "learning_rate": 6.279342723004695e-06, "loss": 0.5735, "step": 236910 }, { "epoch": 2.62, "learning_rate": 6.278419995866183e-06, "loss": 0.5855, "step": 236915 }, { "epoch": 2.62, "learning_rate": 6.277497268727671e-06, "loss": 0.5696, "step": 236920 }, { "epoch": 2.62, "learning_rate": 6.276574541589157e-06, "loss": 0.5945, "step": 236925 }, { "epoch": 2.62, "learning_rate": 6.275651814450645e-06, "loss": 0.6011, "step": 236930 }, { "epoch": 2.62, "learning_rate": 6.274729087312133e-06, "loss": 0.5518, "step": 236935 }, { "epoch": 2.62, "learning_rate": 6.27380636017362e-06, "loss": 0.5744, "step": 236940 }, { "epoch": 2.62, "learning_rate": 6.272883633035108e-06, "loss": 0.5594, "step": 236945 }, { "epoch": 2.62, "learning_rate": 6.271960905896596e-06, "loss": 0.5087, "step": 236950 }, { "epoch": 2.62, "learning_rate": 6.2710381787580834e-06, "loss": 0.6012, "step": 236955 }, { "epoch": 2.62, "learning_rate": 6.270115451619571e-06, "loss": 0.5498, "step": 236960 }, { "epoch": 2.62, "learning_rate": 6.269192724481058e-06, "loss": 0.5818, "step": 236965 }, { "epoch": 2.62, "learning_rate": 6.268269997342546e-06, "loss": 0.5711, "step": 236970 }, { "epoch": 2.62, "learning_rate": 6.267347270204034e-06, "loss": 0.5914, "step": 236975 }, { "epoch": 2.62, "learning_rate": 6.266424543065522e-06, "loss": 0.5322, "step": 236980 }, { "epoch": 2.62, "learning_rate": 6.26550181592701e-06, "loss": 0.5892, "step": 236985 }, { "epoch": 2.62, "learning_rate": 6.264579088788497e-06, "loss": 0.5888, "step": 236990 }, { "epoch": 2.62, "learning_rate": 6.2636563616499845e-06, "loss": 0.5699, "step": 236995 }, { "epoch": 2.62, "learning_rate": 6.262733634511472e-06, "loss": 0.5411, "step": 237000 }, { "epoch": 2.62, "eval_loss": 0.5497919917106628, "eval_runtime": 69.8071, "eval_samples_per_second": 28.65, "eval_steps_per_second": 14.325, "step": 237000 }, { "epoch": 2.62, "learning_rate": 6.2618109073729585e-06, "loss": 0.5874, "step": 237005 }, { "epoch": 2.62, "learning_rate": 6.260888180234446e-06, "loss": 0.5842, "step": 237010 }, { "epoch": 2.62, "learning_rate": 6.259965453095934e-06, "loss": 0.5473, "step": 237015 }, { "epoch": 2.62, "learning_rate": 6.259042725957421e-06, "loss": 0.5855, "step": 237020 }, { "epoch": 2.62, "learning_rate": 6.258119998818909e-06, "loss": 0.5761, "step": 237025 }, { "epoch": 2.62, "learning_rate": 6.257197271680397e-06, "loss": 0.5536, "step": 237030 }, { "epoch": 2.62, "learning_rate": 6.256274544541885e-06, "loss": 0.5786, "step": 237035 }, { "epoch": 2.62, "learning_rate": 6.255351817403373e-06, "loss": 0.582, "step": 237040 }, { "epoch": 2.62, "learning_rate": 6.25442909026486e-06, "loss": 0.549, "step": 237045 }, { "epoch": 2.62, "learning_rate": 6.2535063631263475e-06, "loss": 0.5991, "step": 237050 }, { "epoch": 2.62, "learning_rate": 6.252583635987835e-06, "loss": 0.5584, "step": 237055 }, { "epoch": 2.62, "learning_rate": 6.251660908849323e-06, "loss": 0.5249, "step": 237060 }, { "epoch": 2.62, "learning_rate": 6.250738181710811e-06, "loss": 0.5939, "step": 237065 }, { "epoch": 2.63, "learning_rate": 6.249815454572298e-06, "loss": 0.6029, "step": 237070 }, { "epoch": 2.63, "learning_rate": 6.248892727433785e-06, "loss": 0.6004, "step": 237075 }, { "epoch": 2.63, "learning_rate": 6.247970000295273e-06, "loss": 0.598, "step": 237080 }, { "epoch": 2.63, "learning_rate": 6.247047273156761e-06, "loss": 0.6022, "step": 237085 }, { "epoch": 2.63, "learning_rate": 6.246124546018249e-06, "loss": 0.5856, "step": 237090 }, { "epoch": 2.63, "learning_rate": 6.2452018188797364e-06, "loss": 0.5868, "step": 237095 }, { "epoch": 2.63, "learning_rate": 6.2442790917412234e-06, "loss": 0.6366, "step": 237100 }, { "epoch": 2.63, "learning_rate": 6.2433563646027104e-06, "loss": 0.5678, "step": 237105 }, { "epoch": 2.63, "learning_rate": 6.242433637464198e-06, "loss": 0.5661, "step": 237110 }, { "epoch": 2.63, "learning_rate": 6.241510910325686e-06, "loss": 0.5815, "step": 237115 }, { "epoch": 2.63, "learning_rate": 6.240588183187174e-06, "loss": 0.5843, "step": 237120 }, { "epoch": 2.63, "learning_rate": 6.239665456048662e-06, "loss": 0.5297, "step": 237125 }, { "epoch": 2.63, "learning_rate": 6.238742728910149e-06, "loss": 0.5753, "step": 237130 }, { "epoch": 2.63, "learning_rate": 6.237820001771636e-06, "loss": 0.6043, "step": 237135 }, { "epoch": 2.63, "learning_rate": 6.236897274633124e-06, "loss": 0.5614, "step": 237140 }, { "epoch": 2.63, "learning_rate": 6.2359745474946115e-06, "loss": 0.5947, "step": 237145 }, { "epoch": 2.63, "learning_rate": 6.235051820356099e-06, "loss": 0.5645, "step": 237150 }, { "epoch": 2.63, "learning_rate": 6.234129093217586e-06, "loss": 0.5288, "step": 237155 }, { "epoch": 2.63, "learning_rate": 6.233206366079074e-06, "loss": 0.5926, "step": 237160 }, { "epoch": 2.63, "learning_rate": 6.232283638940562e-06, "loss": 0.611, "step": 237165 }, { "epoch": 2.63, "learning_rate": 6.23136091180205e-06, "loss": 0.5802, "step": 237170 }, { "epoch": 2.63, "learning_rate": 6.230438184663537e-06, "loss": 0.5701, "step": 237175 }, { "epoch": 2.63, "learning_rate": 6.229515457525025e-06, "loss": 0.5675, "step": 237180 }, { "epoch": 2.63, "learning_rate": 6.228592730386512e-06, "loss": 0.596, "step": 237185 }, { "epoch": 2.63, "learning_rate": 6.227670003248e-06, "loss": 0.5551, "step": 237190 }, { "epoch": 2.63, "learning_rate": 6.2267472761094875e-06, "loss": 0.5334, "step": 237195 }, { "epoch": 2.63, "learning_rate": 6.225824548970975e-06, "loss": 0.5985, "step": 237200 }, { "epoch": 2.63, "learning_rate": 6.224901821832463e-06, "loss": 0.5499, "step": 237205 }, { "epoch": 2.63, "learning_rate": 6.22397909469395e-06, "loss": 0.5509, "step": 237210 }, { "epoch": 2.63, "learning_rate": 6.223056367555437e-06, "loss": 0.5513, "step": 237215 }, { "epoch": 2.63, "learning_rate": 6.222133640416925e-06, "loss": 0.5525, "step": 237220 }, { "epoch": 2.63, "learning_rate": 6.221210913278413e-06, "loss": 0.5706, "step": 237225 }, { "epoch": 2.63, "learning_rate": 6.220288186139901e-06, "loss": 0.5472, "step": 237230 }, { "epoch": 2.63, "learning_rate": 6.2193654590013886e-06, "loss": 0.5872, "step": 237235 }, { "epoch": 2.63, "learning_rate": 6.2184427318628756e-06, "loss": 0.5977, "step": 237240 }, { "epoch": 2.63, "learning_rate": 6.2175200047243634e-06, "loss": 0.5468, "step": 237245 }, { "epoch": 2.63, "learning_rate": 6.2165972775858504e-06, "loss": 0.5603, "step": 237250 }, { "epoch": 2.63, "learning_rate": 6.215674550447338e-06, "loss": 0.5912, "step": 237255 }, { "epoch": 2.63, "learning_rate": 6.214751823308826e-06, "loss": 0.5415, "step": 237260 }, { "epoch": 2.63, "learning_rate": 6.213829096170314e-06, "loss": 0.5442, "step": 237265 }, { "epoch": 2.63, "learning_rate": 6.212906369031801e-06, "loss": 0.6072, "step": 237270 }, { "epoch": 2.63, "learning_rate": 6.211983641893289e-06, "loss": 0.6504, "step": 237275 }, { "epoch": 2.63, "learning_rate": 6.211060914754777e-06, "loss": 0.5768, "step": 237280 }, { "epoch": 2.63, "learning_rate": 6.2101381876162645e-06, "loss": 0.5701, "step": 237285 }, { "epoch": 2.63, "learning_rate": 6.2092154604777515e-06, "loss": 0.5438, "step": 237290 }, { "epoch": 2.63, "learning_rate": 6.2082927333392385e-06, "loss": 0.5711, "step": 237295 }, { "epoch": 2.63, "learning_rate": 6.207370006200726e-06, "loss": 0.6008, "step": 237300 }, { "epoch": 2.63, "learning_rate": 6.206447279062214e-06, "loss": 0.5761, "step": 237305 }, { "epoch": 2.63, "learning_rate": 6.205524551923702e-06, "loss": 0.5617, "step": 237310 }, { "epoch": 2.63, "learning_rate": 6.20460182478519e-06, "loss": 0.5558, "step": 237315 }, { "epoch": 2.63, "learning_rate": 6.203679097646677e-06, "loss": 0.5737, "step": 237320 }, { "epoch": 2.63, "learning_rate": 6.202756370508164e-06, "loss": 0.5635, "step": 237325 }, { "epoch": 2.63, "learning_rate": 6.201833643369652e-06, "loss": 0.6302, "step": 237330 }, { "epoch": 2.63, "learning_rate": 6.20091091623114e-06, "loss": 0.5874, "step": 237335 }, { "epoch": 2.63, "learning_rate": 6.1999881890926275e-06, "loss": 0.5394, "step": 237340 }, { "epoch": 2.63, "learning_rate": 6.199065461954115e-06, "loss": 0.5952, "step": 237345 }, { "epoch": 2.63, "learning_rate": 6.198142734815602e-06, "loss": 0.5619, "step": 237350 }, { "epoch": 2.63, "learning_rate": 6.19722000767709e-06, "loss": 0.5775, "step": 237355 }, { "epoch": 2.63, "learning_rate": 6.196297280538578e-06, "loss": 0.5937, "step": 237360 }, { "epoch": 2.63, "learning_rate": 6.195374553400065e-06, "loss": 0.5962, "step": 237365 }, { "epoch": 2.63, "learning_rate": 6.194451826261553e-06, "loss": 0.5482, "step": 237370 }, { "epoch": 2.63, "learning_rate": 6.193529099123041e-06, "loss": 0.5748, "step": 237375 }, { "epoch": 2.63, "learning_rate": 6.192606371984528e-06, "loss": 0.5803, "step": 237380 }, { "epoch": 2.63, "learning_rate": 6.1916836448460156e-06, "loss": 0.5443, "step": 237385 }, { "epoch": 2.63, "learning_rate": 6.190760917707503e-06, "loss": 0.6209, "step": 237390 }, { "epoch": 2.63, "learning_rate": 6.189838190568991e-06, "loss": 0.5997, "step": 237395 }, { "epoch": 2.63, "learning_rate": 6.188915463430478e-06, "loss": 0.6218, "step": 237400 }, { "epoch": 2.63, "learning_rate": 6.187992736291965e-06, "loss": 0.5141, "step": 237405 }, { "epoch": 2.63, "learning_rate": 6.187070009153453e-06, "loss": 0.5793, "step": 237410 }, { "epoch": 2.63, "learning_rate": 6.186147282014941e-06, "loss": 0.5436, "step": 237415 }, { "epoch": 2.63, "learning_rate": 6.185224554876429e-06, "loss": 0.5445, "step": 237420 }, { "epoch": 2.63, "learning_rate": 6.184301827737917e-06, "loss": 0.5424, "step": 237425 }, { "epoch": 2.63, "learning_rate": 6.183379100599404e-06, "loss": 0.5159, "step": 237430 }, { "epoch": 2.63, "learning_rate": 6.1824563734608915e-06, "loss": 0.5657, "step": 237435 }, { "epoch": 2.63, "learning_rate": 6.1815336463223785e-06, "loss": 0.5659, "step": 237440 }, { "epoch": 2.63, "learning_rate": 6.180610919183866e-06, "loss": 0.6337, "step": 237445 }, { "epoch": 2.63, "learning_rate": 6.179688192045354e-06, "loss": 0.5516, "step": 237450 }, { "epoch": 2.63, "learning_rate": 6.178765464906842e-06, "loss": 0.5654, "step": 237455 }, { "epoch": 2.63, "learning_rate": 6.177842737768329e-06, "loss": 0.5806, "step": 237460 }, { "epoch": 2.63, "learning_rate": 6.176920010629817e-06, "loss": 0.5612, "step": 237465 }, { "epoch": 2.63, "learning_rate": 6.175997283491305e-06, "loss": 0.5591, "step": 237470 }, { "epoch": 2.63, "learning_rate": 6.175074556352793e-06, "loss": 0.5634, "step": 237475 }, { "epoch": 2.63, "learning_rate": 6.17415182921428e-06, "loss": 0.5761, "step": 237480 }, { "epoch": 2.63, "learning_rate": 6.1732291020757675e-06, "loss": 0.6068, "step": 237485 }, { "epoch": 2.63, "learning_rate": 6.1723063749372545e-06, "loss": 0.6654, "step": 237490 }, { "epoch": 2.63, "learning_rate": 6.171383647798742e-06, "loss": 0.6054, "step": 237495 }, { "epoch": 2.63, "learning_rate": 6.17046092066023e-06, "loss": 0.5456, "step": 237500 }, { "epoch": 2.63, "learning_rate": 6.169538193521718e-06, "loss": 0.6178, "step": 237505 }, { "epoch": 2.63, "learning_rate": 6.168615466383206e-06, "loss": 0.5789, "step": 237510 }, { "epoch": 2.63, "learning_rate": 6.167692739244693e-06, "loss": 0.591, "step": 237515 }, { "epoch": 2.63, "learning_rate": 6.16677001210618e-06, "loss": 0.555, "step": 237520 }, { "epoch": 2.63, "learning_rate": 6.165847284967668e-06, "loss": 0.5222, "step": 237525 }, { "epoch": 2.63, "learning_rate": 6.1649245578291556e-06, "loss": 0.5769, "step": 237530 }, { "epoch": 2.63, "learning_rate": 6.164001830690643e-06, "loss": 0.6052, "step": 237535 }, { "epoch": 2.63, "learning_rate": 6.163079103552131e-06, "loss": 0.5728, "step": 237540 }, { "epoch": 2.63, "learning_rate": 6.162156376413618e-06, "loss": 0.6039, "step": 237545 }, { "epoch": 2.63, "learning_rate": 6.161233649275106e-06, "loss": 0.5198, "step": 237550 }, { "epoch": 2.63, "learning_rate": 6.160310922136593e-06, "loss": 0.5436, "step": 237555 }, { "epoch": 2.63, "learning_rate": 6.159388194998081e-06, "loss": 0.5538, "step": 237560 }, { "epoch": 2.63, "learning_rate": 6.158465467859569e-06, "loss": 0.6219, "step": 237565 }, { "epoch": 2.63, "learning_rate": 6.157542740721056e-06, "loss": 0.5617, "step": 237570 }, { "epoch": 2.63, "learning_rate": 6.156620013582544e-06, "loss": 0.5622, "step": 237575 }, { "epoch": 2.63, "learning_rate": 6.1556972864440315e-06, "loss": 0.5665, "step": 237580 }, { "epoch": 2.63, "learning_rate": 6.154774559305519e-06, "loss": 0.6158, "step": 237585 }, { "epoch": 2.63, "learning_rate": 6.153851832167006e-06, "loss": 0.631, "step": 237590 }, { "epoch": 2.63, "learning_rate": 6.152929105028494e-06, "loss": 0.5833, "step": 237595 }, { "epoch": 2.63, "learning_rate": 6.152006377889981e-06, "loss": 0.5816, "step": 237600 }, { "epoch": 2.63, "learning_rate": 6.151083650751469e-06, "loss": 0.5503, "step": 237605 }, { "epoch": 2.63, "learning_rate": 6.150160923612957e-06, "loss": 0.5811, "step": 237610 }, { "epoch": 2.63, "learning_rate": 6.149238196474445e-06, "loss": 0.5757, "step": 237615 }, { "epoch": 2.63, "learning_rate": 6.148315469335933e-06, "loss": 0.565, "step": 237620 }, { "epoch": 2.63, "learning_rate": 6.14739274219742e-06, "loss": 0.5828, "step": 237625 }, { "epoch": 2.63, "learning_rate": 6.146470015058907e-06, "loss": 0.5782, "step": 237630 }, { "epoch": 2.63, "learning_rate": 6.1455472879203944e-06, "loss": 0.5321, "step": 237635 }, { "epoch": 2.63, "learning_rate": 6.144624560781882e-06, "loss": 0.58, "step": 237640 }, { "epoch": 2.63, "learning_rate": 6.14370183364337e-06, "loss": 0.6007, "step": 237645 }, { "epoch": 2.63, "learning_rate": 6.142779106504858e-06, "loss": 0.5584, "step": 237650 }, { "epoch": 2.63, "learning_rate": 6.141856379366345e-06, "loss": 0.5488, "step": 237655 }, { "epoch": 2.63, "learning_rate": 6.140933652227833e-06, "loss": 0.5604, "step": 237660 }, { "epoch": 2.63, "learning_rate": 6.14001092508932e-06, "loss": 0.5736, "step": 237665 }, { "epoch": 2.63, "learning_rate": 6.139088197950808e-06, "loss": 0.6133, "step": 237670 }, { "epoch": 2.63, "learning_rate": 6.1381654708122955e-06, "loss": 0.5696, "step": 237675 }, { "epoch": 2.63, "learning_rate": 6.1372427436737825e-06, "loss": 0.6279, "step": 237680 }, { "epoch": 2.63, "learning_rate": 6.13632001653527e-06, "loss": 0.6121, "step": 237685 }, { "epoch": 2.63, "learning_rate": 6.135397289396758e-06, "loss": 0.5979, "step": 237690 }, { "epoch": 2.63, "learning_rate": 6.134474562258246e-06, "loss": 0.5787, "step": 237695 }, { "epoch": 2.63, "learning_rate": 6.133551835119734e-06, "loss": 0.5933, "step": 237700 }, { "epoch": 2.63, "learning_rate": 6.132629107981221e-06, "loss": 0.5979, "step": 237705 }, { "epoch": 2.63, "learning_rate": 6.131706380842708e-06, "loss": 0.5499, "step": 237710 }, { "epoch": 2.63, "learning_rate": 6.130783653704196e-06, "loss": 0.5845, "step": 237715 }, { "epoch": 2.63, "learning_rate": 6.129860926565684e-06, "loss": 0.5683, "step": 237720 }, { "epoch": 2.63, "learning_rate": 6.1289381994271715e-06, "loss": 0.6383, "step": 237725 }, { "epoch": 2.63, "learning_rate": 6.128015472288659e-06, "loss": 0.5832, "step": 237730 }, { "epoch": 2.63, "learning_rate": 6.127092745150146e-06, "loss": 0.5926, "step": 237735 }, { "epoch": 2.63, "learning_rate": 6.126170018011634e-06, "loss": 0.565, "step": 237740 }, { "epoch": 2.63, "learning_rate": 6.125247290873121e-06, "loss": 0.6061, "step": 237745 }, { "epoch": 2.63, "learning_rate": 6.124324563734609e-06, "loss": 0.603, "step": 237750 }, { "epoch": 2.63, "learning_rate": 6.123401836596097e-06, "loss": 0.5865, "step": 237755 }, { "epoch": 2.63, "learning_rate": 6.122479109457585e-06, "loss": 0.5715, "step": 237760 }, { "epoch": 2.63, "learning_rate": 6.121556382319072e-06, "loss": 0.5874, "step": 237765 }, { "epoch": 2.63, "learning_rate": 6.12063365518056e-06, "loss": 0.6263, "step": 237770 }, { "epoch": 2.63, "learning_rate": 6.1197109280420474e-06, "loss": 0.5576, "step": 237775 }, { "epoch": 2.63, "learning_rate": 6.1187882009035344e-06, "loss": 0.6046, "step": 237780 }, { "epoch": 2.63, "learning_rate": 6.117865473765022e-06, "loss": 0.5728, "step": 237785 }, { "epoch": 2.63, "learning_rate": 6.11694274662651e-06, "loss": 0.5541, "step": 237790 }, { "epoch": 2.63, "learning_rate": 6.116020019487997e-06, "loss": 0.5711, "step": 237795 }, { "epoch": 2.63, "learning_rate": 6.115097292349485e-06, "loss": 0.5773, "step": 237800 }, { "epoch": 2.63, "learning_rate": 6.114174565210973e-06, "loss": 0.6641, "step": 237805 }, { "epoch": 2.63, "learning_rate": 6.113251838072461e-06, "loss": 0.5432, "step": 237810 }, { "epoch": 2.63, "learning_rate": 6.1123291109339485e-06, "loss": 0.5782, "step": 237815 }, { "epoch": 2.63, "learning_rate": 6.111406383795435e-06, "loss": 0.591, "step": 237820 }, { "epoch": 2.63, "learning_rate": 6.1104836566569225e-06, "loss": 0.5925, "step": 237825 }, { "epoch": 2.63, "learning_rate": 6.10956092951841e-06, "loss": 0.5508, "step": 237830 }, { "epoch": 2.63, "learning_rate": 6.108638202379898e-06, "loss": 0.5863, "step": 237835 }, { "epoch": 2.63, "learning_rate": 6.107715475241386e-06, "loss": 0.5616, "step": 237840 }, { "epoch": 2.63, "learning_rate": 6.106792748102873e-06, "loss": 0.5556, "step": 237845 }, { "epoch": 2.63, "learning_rate": 6.105870020964361e-06, "loss": 0.5126, "step": 237850 }, { "epoch": 2.63, "learning_rate": 6.104947293825848e-06, "loss": 0.5845, "step": 237855 }, { "epoch": 2.63, "learning_rate": 6.104024566687336e-06, "loss": 0.5768, "step": 237860 }, { "epoch": 2.63, "learning_rate": 6.103101839548824e-06, "loss": 0.5591, "step": 237865 }, { "epoch": 2.63, "learning_rate": 6.1021791124103115e-06, "loss": 0.5835, "step": 237870 }, { "epoch": 2.63, "learning_rate": 6.1012563852717985e-06, "loss": 0.5548, "step": 237875 }, { "epoch": 2.63, "learning_rate": 6.100333658133286e-06, "loss": 0.5738, "step": 237880 }, { "epoch": 2.63, "learning_rate": 6.099410930994774e-06, "loss": 0.5804, "step": 237885 }, { "epoch": 2.63, "learning_rate": 6.098488203856262e-06, "loss": 0.5584, "step": 237890 }, { "epoch": 2.63, "learning_rate": 6.097565476717749e-06, "loss": 0.585, "step": 237895 }, { "epoch": 2.63, "learning_rate": 6.096642749579237e-06, "loss": 0.5979, "step": 237900 }, { "epoch": 2.63, "learning_rate": 6.095720022440724e-06, "loss": 0.5479, "step": 237905 }, { "epoch": 2.63, "learning_rate": 6.094797295302212e-06, "loss": 0.5565, "step": 237910 }, { "epoch": 2.63, "learning_rate": 6.0938745681636996e-06, "loss": 0.6146, "step": 237915 }, { "epoch": 2.63, "learning_rate": 6.092951841025187e-06, "loss": 0.6363, "step": 237920 }, { "epoch": 2.63, "learning_rate": 6.092029113886675e-06, "loss": 0.594, "step": 237925 }, { "epoch": 2.63, "learning_rate": 6.091106386748162e-06, "loss": 0.539, "step": 237930 }, { "epoch": 2.63, "learning_rate": 6.090183659609649e-06, "loss": 0.5907, "step": 237935 }, { "epoch": 2.63, "learning_rate": 6.089260932471137e-06, "loss": 0.637, "step": 237940 }, { "epoch": 2.63, "learning_rate": 6.088338205332625e-06, "loss": 0.582, "step": 237945 }, { "epoch": 2.63, "learning_rate": 6.087415478194113e-06, "loss": 0.5468, "step": 237950 }, { "epoch": 2.63, "learning_rate": 6.0864927510556e-06, "loss": 0.5353, "step": 237955 }, { "epoch": 2.63, "learning_rate": 6.085570023917088e-06, "loss": 0.5461, "step": 237960 }, { "epoch": 2.63, "learning_rate": 6.0846472967785755e-06, "loss": 0.5864, "step": 237965 }, { "epoch": 2.63, "learning_rate": 6.0837245696400625e-06, "loss": 0.5247, "step": 237970 }, { "epoch": 2.64, "learning_rate": 6.08280184250155e-06, "loss": 0.5987, "step": 237975 }, { "epoch": 2.64, "learning_rate": 6.081879115363038e-06, "loss": 0.622, "step": 237980 }, { "epoch": 2.64, "learning_rate": 6.080956388224525e-06, "loss": 0.5504, "step": 237985 }, { "epoch": 2.64, "learning_rate": 6.080033661086013e-06, "loss": 0.5564, "step": 237990 }, { "epoch": 2.64, "learning_rate": 6.079110933947501e-06, "loss": 0.5771, "step": 237995 }, { "epoch": 2.64, "learning_rate": 6.078188206808989e-06, "loss": 0.6168, "step": 238000 }, { "epoch": 2.64, "eval_loss": 0.5890692472457886, "eval_runtime": 69.8184, "eval_samples_per_second": 28.646, "eval_steps_per_second": 14.323, "step": 238000 }, { "epoch": 2.64, "learning_rate": 6.077265479670477e-06, "loss": 0.5573, "step": 238005 }, { "epoch": 2.64, "learning_rate": 6.076342752531964e-06, "loss": 0.5741, "step": 238010 }, { "epoch": 2.64, "learning_rate": 6.075420025393451e-06, "loss": 0.6642, "step": 238015 }, { "epoch": 2.64, "learning_rate": 6.0744972982549385e-06, "loss": 0.5758, "step": 238020 }, { "epoch": 2.64, "learning_rate": 6.073574571116426e-06, "loss": 0.617, "step": 238025 }, { "epoch": 2.64, "learning_rate": 6.072651843977914e-06, "loss": 0.6383, "step": 238030 }, { "epoch": 2.64, "learning_rate": 6.071729116839402e-06, "loss": 0.5345, "step": 238035 }, { "epoch": 2.64, "learning_rate": 6.070806389700889e-06, "loss": 0.5668, "step": 238040 }, { "epoch": 2.64, "learning_rate": 6.069883662562376e-06, "loss": 0.5781, "step": 238045 }, { "epoch": 2.64, "learning_rate": 6.068960935423864e-06, "loss": 0.594, "step": 238050 }, { "epoch": 2.64, "learning_rate": 6.068038208285352e-06, "loss": 0.5487, "step": 238055 }, { "epoch": 2.64, "learning_rate": 6.0671154811468396e-06, "loss": 0.6146, "step": 238060 }, { "epoch": 2.64, "learning_rate": 6.066192754008327e-06, "loss": 0.5768, "step": 238065 }, { "epoch": 2.64, "learning_rate": 6.065270026869814e-06, "loss": 0.6106, "step": 238070 }, { "epoch": 2.64, "learning_rate": 6.064347299731302e-06, "loss": 0.5741, "step": 238075 }, { "epoch": 2.64, "learning_rate": 6.06342457259279e-06, "loss": 0.5558, "step": 238080 }, { "epoch": 2.64, "learning_rate": 6.062501845454277e-06, "loss": 0.6182, "step": 238085 }, { "epoch": 2.64, "learning_rate": 6.061579118315765e-06, "loss": 0.5971, "step": 238090 }, { "epoch": 2.64, "learning_rate": 6.060656391177252e-06, "loss": 0.6247, "step": 238095 }, { "epoch": 2.64, "learning_rate": 6.05973366403874e-06, "loss": 0.5597, "step": 238100 }, { "epoch": 2.64, "learning_rate": 6.058810936900228e-06, "loss": 0.5247, "step": 238105 }, { "epoch": 2.64, "learning_rate": 6.0578882097617155e-06, "loss": 0.588, "step": 238110 }, { "epoch": 2.64, "learning_rate": 6.056965482623203e-06, "loss": 0.643, "step": 238115 }, { "epoch": 2.64, "learning_rate": 6.05604275548469e-06, "loss": 0.5222, "step": 238120 }, { "epoch": 2.64, "learning_rate": 6.055120028346177e-06, "loss": 0.5569, "step": 238125 }, { "epoch": 2.64, "learning_rate": 6.054197301207665e-06, "loss": 0.5263, "step": 238130 }, { "epoch": 2.64, "learning_rate": 6.053274574069153e-06, "loss": 0.5662, "step": 238135 }, { "epoch": 2.64, "learning_rate": 6.052351846930641e-06, "loss": 0.6096, "step": 238140 }, { "epoch": 2.64, "learning_rate": 6.051429119792129e-06, "loss": 0.613, "step": 238145 }, { "epoch": 2.64, "learning_rate": 6.050506392653616e-06, "loss": 0.623, "step": 238150 }, { "epoch": 2.64, "learning_rate": 6.049583665515104e-06, "loss": 0.5893, "step": 238155 }, { "epoch": 2.64, "learning_rate": 6.048660938376591e-06, "loss": 0.6111, "step": 238160 }, { "epoch": 2.64, "learning_rate": 6.0477382112380785e-06, "loss": 0.6192, "step": 238165 }, { "epoch": 2.64, "learning_rate": 6.046815484099566e-06, "loss": 0.6149, "step": 238170 }, { "epoch": 2.64, "learning_rate": 6.045892756961054e-06, "loss": 0.5969, "step": 238175 }, { "epoch": 2.64, "learning_rate": 6.044970029822541e-06, "loss": 0.6375, "step": 238180 }, { "epoch": 2.64, "learning_rate": 6.044047302684029e-06, "loss": 0.5496, "step": 238185 }, { "epoch": 2.64, "learning_rate": 6.043124575545517e-06, "loss": 0.6338, "step": 238190 }, { "epoch": 2.64, "learning_rate": 6.042201848407005e-06, "loss": 0.612, "step": 238195 }, { "epoch": 2.64, "learning_rate": 6.041279121268492e-06, "loss": 0.607, "step": 238200 }, { "epoch": 2.64, "learning_rate": 6.0403563941299795e-06, "loss": 0.5928, "step": 238205 }, { "epoch": 2.64, "learning_rate": 6.0394336669914666e-06, "loss": 0.5535, "step": 238210 }, { "epoch": 2.64, "learning_rate": 6.038510939852954e-06, "loss": 0.5785, "step": 238215 }, { "epoch": 2.64, "learning_rate": 6.037588212714442e-06, "loss": 0.6662, "step": 238220 }, { "epoch": 2.64, "learning_rate": 6.03666548557593e-06, "loss": 0.6477, "step": 238225 }, { "epoch": 2.64, "learning_rate": 6.035742758437417e-06, "loss": 0.6064, "step": 238230 }, { "epoch": 2.64, "learning_rate": 6.034820031298904e-06, "loss": 0.5527, "step": 238235 }, { "epoch": 2.64, "learning_rate": 6.033897304160392e-06, "loss": 0.5862, "step": 238240 }, { "epoch": 2.64, "learning_rate": 6.03297457702188e-06, "loss": 0.5797, "step": 238245 }, { "epoch": 2.64, "learning_rate": 6.032051849883368e-06, "loss": 0.6432, "step": 238250 }, { "epoch": 2.64, "learning_rate": 6.0311291227448555e-06, "loss": 0.6509, "step": 238255 }, { "epoch": 2.64, "learning_rate": 6.0302063956063425e-06, "loss": 0.5401, "step": 238260 }, { "epoch": 2.64, "learning_rate": 6.02928366846783e-06, "loss": 0.5677, "step": 238265 }, { "epoch": 2.64, "learning_rate": 6.028360941329318e-06, "loss": 0.5667, "step": 238270 }, { "epoch": 2.64, "learning_rate": 6.027438214190805e-06, "loss": 0.6356, "step": 238275 }, { "epoch": 2.64, "learning_rate": 6.026515487052293e-06, "loss": 0.5284, "step": 238280 }, { "epoch": 2.64, "learning_rate": 6.025592759913781e-06, "loss": 0.5847, "step": 238285 }, { "epoch": 2.64, "learning_rate": 6.024670032775268e-06, "loss": 0.556, "step": 238290 }, { "epoch": 2.64, "learning_rate": 6.023747305636756e-06, "loss": 0.5853, "step": 238295 }, { "epoch": 2.64, "learning_rate": 6.022824578498244e-06, "loss": 0.5825, "step": 238300 }, { "epoch": 2.64, "learning_rate": 6.0219018513597314e-06, "loss": 0.5835, "step": 238305 }, { "epoch": 2.64, "learning_rate": 6.0209791242212184e-06, "loss": 0.5682, "step": 238310 }, { "epoch": 2.64, "learning_rate": 6.020056397082706e-06, "loss": 0.6465, "step": 238315 }, { "epoch": 2.64, "learning_rate": 6.019133669944193e-06, "loss": 0.6208, "step": 238320 }, { "epoch": 2.64, "learning_rate": 6.018210942805681e-06, "loss": 0.584, "step": 238325 }, { "epoch": 2.64, "learning_rate": 6.017288215667169e-06, "loss": 0.5914, "step": 238330 }, { "epoch": 2.64, "learning_rate": 6.016365488528657e-06, "loss": 0.5884, "step": 238335 }, { "epoch": 2.64, "learning_rate": 6.015442761390145e-06, "loss": 0.5953, "step": 238340 }, { "epoch": 2.64, "learning_rate": 6.014520034251632e-06, "loss": 0.5742, "step": 238345 }, { "epoch": 2.64, "learning_rate": 6.013597307113119e-06, "loss": 0.5708, "step": 238350 }, { "epoch": 2.64, "learning_rate": 6.0126745799746065e-06, "loss": 0.5722, "step": 238355 }, { "epoch": 2.64, "learning_rate": 6.011751852836094e-06, "loss": 0.5735, "step": 238360 }, { "epoch": 2.64, "learning_rate": 6.010829125697582e-06, "loss": 0.5606, "step": 238365 }, { "epoch": 2.64, "learning_rate": 6.009906398559069e-06, "loss": 0.6093, "step": 238370 }, { "epoch": 2.64, "learning_rate": 6.008983671420557e-06, "loss": 0.5664, "step": 238375 }, { "epoch": 2.64, "learning_rate": 6.008060944282045e-06, "loss": 0.5437, "step": 238380 }, { "epoch": 2.64, "learning_rate": 6.007138217143533e-06, "loss": 0.5628, "step": 238385 }, { "epoch": 2.64, "learning_rate": 6.00621549000502e-06, "loss": 0.6027, "step": 238390 }, { "epoch": 2.64, "learning_rate": 6.005292762866508e-06, "loss": 0.5935, "step": 238395 }, { "epoch": 2.64, "learning_rate": 6.004370035727995e-06, "loss": 0.559, "step": 238400 }, { "epoch": 2.64, "learning_rate": 6.0034473085894825e-06, "loss": 0.618, "step": 238405 }, { "epoch": 2.64, "learning_rate": 6.00252458145097e-06, "loss": 0.5574, "step": 238410 }, { "epoch": 2.64, "learning_rate": 6.001601854312458e-06, "loss": 0.5604, "step": 238415 }, { "epoch": 2.64, "learning_rate": 6.000679127173946e-06, "loss": 0.5231, "step": 238420 }, { "epoch": 2.64, "learning_rate": 5.999756400035433e-06, "loss": 0.6526, "step": 238425 }, { "epoch": 2.64, "learning_rate": 5.99883367289692e-06, "loss": 0.6221, "step": 238430 }, { "epoch": 2.64, "learning_rate": 5.997910945758408e-06, "loss": 0.6385, "step": 238435 }, { "epoch": 2.64, "learning_rate": 5.996988218619896e-06, "loss": 0.5975, "step": 238440 }, { "epoch": 2.64, "learning_rate": 5.996065491481384e-06, "loss": 0.5764, "step": 238445 }, { "epoch": 2.64, "learning_rate": 5.9951427643428714e-06, "loss": 0.5626, "step": 238450 }, { "epoch": 2.64, "learning_rate": 5.9942200372043584e-06, "loss": 0.5483, "step": 238455 }, { "epoch": 2.64, "learning_rate": 5.993297310065846e-06, "loss": 0.5505, "step": 238460 }, { "epoch": 2.64, "learning_rate": 5.992374582927333e-06, "loss": 0.5246, "step": 238465 }, { "epoch": 2.64, "learning_rate": 5.991451855788821e-06, "loss": 0.5651, "step": 238470 }, { "epoch": 2.64, "learning_rate": 5.990529128650309e-06, "loss": 0.6015, "step": 238475 }, { "epoch": 2.64, "learning_rate": 5.989606401511797e-06, "loss": 0.5493, "step": 238480 }, { "epoch": 2.64, "learning_rate": 5.988683674373284e-06, "loss": 0.5955, "step": 238485 }, { "epoch": 2.64, "learning_rate": 5.987760947234772e-06, "loss": 0.5787, "step": 238490 }, { "epoch": 2.64, "learning_rate": 5.9868382200962595e-06, "loss": 0.5899, "step": 238495 }, { "epoch": 2.64, "learning_rate": 5.9859154929577465e-06, "loss": 0.5538, "step": 238500 }, { "epoch": 2.64, "learning_rate": 5.984992765819234e-06, "loss": 0.5831, "step": 238505 }, { "epoch": 2.64, "learning_rate": 5.984070038680721e-06, "loss": 0.5992, "step": 238510 }, { "epoch": 2.64, "learning_rate": 5.983147311542209e-06, "loss": 0.6237, "step": 238515 }, { "epoch": 2.64, "learning_rate": 5.982224584403697e-06, "loss": 0.591, "step": 238520 }, { "epoch": 2.64, "learning_rate": 5.981301857265185e-06, "loss": 0.5805, "step": 238525 }, { "epoch": 2.64, "learning_rate": 5.980379130126673e-06, "loss": 0.5818, "step": 238530 }, { "epoch": 2.64, "learning_rate": 5.97945640298816e-06, "loss": 0.5328, "step": 238535 }, { "epoch": 2.64, "learning_rate": 5.978533675849647e-06, "loss": 0.5637, "step": 238540 }, { "epoch": 2.64, "learning_rate": 5.977610948711135e-06, "loss": 0.5932, "step": 238545 }, { "epoch": 2.64, "learning_rate": 5.9766882215726225e-06, "loss": 0.5946, "step": 238550 }, { "epoch": 2.64, "learning_rate": 5.97576549443411e-06, "loss": 0.546, "step": 238555 }, { "epoch": 2.64, "learning_rate": 5.974842767295598e-06, "loss": 0.5743, "step": 238560 }, { "epoch": 2.64, "learning_rate": 5.973920040157085e-06, "loss": 0.5891, "step": 238565 }, { "epoch": 2.64, "learning_rate": 5.972997313018573e-06, "loss": 0.5989, "step": 238570 }, { "epoch": 2.64, "learning_rate": 5.972074585880061e-06, "loss": 0.594, "step": 238575 }, { "epoch": 2.64, "learning_rate": 5.971151858741548e-06, "loss": 0.6557, "step": 238580 }, { "epoch": 2.64, "learning_rate": 5.970229131603036e-06, "loss": 0.5611, "step": 238585 }, { "epoch": 2.64, "learning_rate": 5.9693064044645236e-06, "loss": 0.5571, "step": 238590 }, { "epoch": 2.64, "learning_rate": 5.9683836773260106e-06, "loss": 0.5546, "step": 238595 }, { "epoch": 2.64, "learning_rate": 5.967460950187498e-06, "loss": 0.5777, "step": 238600 }, { "epoch": 2.64, "learning_rate": 5.966538223048986e-06, "loss": 0.5938, "step": 238605 }, { "epoch": 2.64, "learning_rate": 5.965615495910474e-06, "loss": 0.5703, "step": 238610 }, { "epoch": 2.64, "learning_rate": 5.964692768771961e-06, "loss": 0.5438, "step": 238615 }, { "epoch": 2.64, "learning_rate": 5.963770041633449e-06, "loss": 0.5805, "step": 238620 }, { "epoch": 2.64, "learning_rate": 5.962847314494936e-06, "loss": 0.5692, "step": 238625 }, { "epoch": 2.64, "learning_rate": 5.961924587356424e-06, "loss": 0.57, "step": 238630 }, { "epoch": 2.64, "learning_rate": 5.961001860217912e-06, "loss": 0.608, "step": 238635 }, { "epoch": 2.64, "learning_rate": 5.9600791330793995e-06, "loss": 0.5407, "step": 238640 }, { "epoch": 2.64, "learning_rate": 5.9591564059408865e-06, "loss": 0.5803, "step": 238645 }, { "epoch": 2.64, "learning_rate": 5.958233678802374e-06, "loss": 0.6399, "step": 238650 }, { "epoch": 2.64, "learning_rate": 5.957310951663861e-06, "loss": 0.6054, "step": 238655 }, { "epoch": 2.64, "learning_rate": 5.956388224525349e-06, "loss": 0.6089, "step": 238660 }, { "epoch": 2.64, "learning_rate": 5.955465497386837e-06, "loss": 0.6347, "step": 238665 }, { "epoch": 2.64, "learning_rate": 5.954542770248325e-06, "loss": 0.5823, "step": 238670 }, { "epoch": 2.64, "learning_rate": 5.953620043109812e-06, "loss": 0.5752, "step": 238675 }, { "epoch": 2.64, "learning_rate": 5.9526973159713e-06, "loss": 0.5494, "step": 238680 }, { "epoch": 2.64, "learning_rate": 5.951774588832788e-06, "loss": 0.5618, "step": 238685 }, { "epoch": 2.64, "learning_rate": 5.950851861694275e-06, "loss": 0.5734, "step": 238690 }, { "epoch": 2.64, "learning_rate": 5.9499291345557625e-06, "loss": 0.6075, "step": 238695 }, { "epoch": 2.64, "learning_rate": 5.94900640741725e-06, "loss": 0.5687, "step": 238700 }, { "epoch": 2.64, "learning_rate": 5.948083680278737e-06, "loss": 0.588, "step": 238705 }, { "epoch": 2.64, "learning_rate": 5.947160953140225e-06, "loss": 0.5641, "step": 238710 }, { "epoch": 2.64, "learning_rate": 5.946238226001713e-06, "loss": 0.5725, "step": 238715 }, { "epoch": 2.64, "learning_rate": 5.945315498863201e-06, "loss": 0.5949, "step": 238720 }, { "epoch": 2.64, "learning_rate": 5.944392771724689e-06, "loss": 0.5506, "step": 238725 }, { "epoch": 2.64, "learning_rate": 5.943470044586176e-06, "loss": 0.5497, "step": 238730 }, { "epoch": 2.64, "learning_rate": 5.942547317447663e-06, "loss": 0.5925, "step": 238735 }, { "epoch": 2.64, "learning_rate": 5.9416245903091506e-06, "loss": 0.5686, "step": 238740 }, { "epoch": 2.64, "learning_rate": 5.940701863170638e-06, "loss": 0.6322, "step": 238745 }, { "epoch": 2.64, "learning_rate": 5.939779136032126e-06, "loss": 0.6236, "step": 238750 }, { "epoch": 2.64, "learning_rate": 5.938856408893614e-06, "loss": 0.611, "step": 238755 }, { "epoch": 2.64, "learning_rate": 5.937933681755101e-06, "loss": 0.5758, "step": 238760 }, { "epoch": 2.64, "learning_rate": 5.937010954616589e-06, "loss": 0.5891, "step": 238765 }, { "epoch": 2.64, "learning_rate": 5.936088227478076e-06, "loss": 0.6351, "step": 238770 }, { "epoch": 2.64, "learning_rate": 5.935165500339564e-06, "loss": 0.5674, "step": 238775 }, { "epoch": 2.64, "learning_rate": 5.934242773201052e-06, "loss": 0.6098, "step": 238780 }, { "epoch": 2.64, "learning_rate": 5.933320046062539e-06, "loss": 0.6002, "step": 238785 }, { "epoch": 2.64, "learning_rate": 5.9323973189240265e-06, "loss": 0.5574, "step": 238790 }, { "epoch": 2.64, "learning_rate": 5.931474591785514e-06, "loss": 0.5665, "step": 238795 }, { "epoch": 2.64, "learning_rate": 5.930551864647002e-06, "loss": 0.545, "step": 238800 }, { "epoch": 2.64, "learning_rate": 5.929629137508489e-06, "loss": 0.5477, "step": 238805 }, { "epoch": 2.64, "learning_rate": 5.928706410369977e-06, "loss": 0.6005, "step": 238810 }, { "epoch": 2.64, "learning_rate": 5.927783683231464e-06, "loss": 0.5922, "step": 238815 }, { "epoch": 2.64, "learning_rate": 5.926860956092952e-06, "loss": 0.5954, "step": 238820 }, { "epoch": 2.64, "learning_rate": 5.92593822895444e-06, "loss": 0.5482, "step": 238825 }, { "epoch": 2.64, "learning_rate": 5.925015501815928e-06, "loss": 0.5744, "step": 238830 }, { "epoch": 2.64, "learning_rate": 5.9240927746774154e-06, "loss": 0.5704, "step": 238835 }, { "epoch": 2.64, "learning_rate": 5.9231700475389024e-06, "loss": 0.5864, "step": 238840 }, { "epoch": 2.64, "learning_rate": 5.9222473204003895e-06, "loss": 0.5852, "step": 238845 }, { "epoch": 2.64, "learning_rate": 5.921324593261877e-06, "loss": 0.5618, "step": 238850 }, { "epoch": 2.64, "learning_rate": 5.920401866123365e-06, "loss": 0.5601, "step": 238855 }, { "epoch": 2.64, "learning_rate": 5.919479138984853e-06, "loss": 0.605, "step": 238860 }, { "epoch": 2.64, "learning_rate": 5.918556411846341e-06, "loss": 0.5762, "step": 238865 }, { "epoch": 2.64, "learning_rate": 5.917633684707828e-06, "loss": 0.5847, "step": 238870 }, { "epoch": 2.64, "learning_rate": 5.916710957569316e-06, "loss": 0.6145, "step": 238875 }, { "epoch": 2.65, "learning_rate": 5.915788230430803e-06, "loss": 0.6017, "step": 238880 }, { "epoch": 2.65, "learning_rate": 5.9148655032922905e-06, "loss": 0.5747, "step": 238885 }, { "epoch": 2.65, "learning_rate": 5.913942776153778e-06, "loss": 0.6028, "step": 238890 }, { "epoch": 2.65, "learning_rate": 5.913020049015266e-06, "loss": 0.5576, "step": 238895 }, { "epoch": 2.65, "learning_rate": 5.912097321876753e-06, "loss": 0.6353, "step": 238900 }, { "epoch": 2.65, "learning_rate": 5.911174594738241e-06, "loss": 0.616, "step": 238905 }, { "epoch": 2.65, "learning_rate": 5.910251867599729e-06, "loss": 0.6247, "step": 238910 }, { "epoch": 2.65, "learning_rate": 5.909329140461217e-06, "loss": 0.5631, "step": 238915 }, { "epoch": 2.65, "learning_rate": 5.908406413322704e-06, "loss": 0.5948, "step": 238920 }, { "epoch": 2.65, "learning_rate": 5.907483686184191e-06, "loss": 0.6282, "step": 238925 }, { "epoch": 2.65, "learning_rate": 5.906560959045679e-06, "loss": 0.5389, "step": 238930 }, { "epoch": 2.65, "learning_rate": 5.9056382319071665e-06, "loss": 0.6041, "step": 238935 }, { "epoch": 2.65, "learning_rate": 5.904715504768654e-06, "loss": 0.557, "step": 238940 }, { "epoch": 2.65, "learning_rate": 5.903792777630142e-06, "loss": 0.555, "step": 238945 }, { "epoch": 2.65, "learning_rate": 5.902870050491629e-06, "loss": 0.5953, "step": 238950 }, { "epoch": 2.65, "learning_rate": 5.901947323353117e-06, "loss": 0.563, "step": 238955 }, { "epoch": 2.65, "learning_rate": 5.901024596214604e-06, "loss": 0.5412, "step": 238960 }, { "epoch": 2.65, "learning_rate": 5.900101869076092e-06, "loss": 0.5841, "step": 238965 }, { "epoch": 2.65, "learning_rate": 5.89917914193758e-06, "loss": 0.5919, "step": 238970 }, { "epoch": 2.65, "learning_rate": 5.898256414799068e-06, "loss": 0.5825, "step": 238975 }, { "epoch": 2.65, "learning_rate": 5.897333687660555e-06, "loss": 0.5649, "step": 238980 }, { "epoch": 2.65, "learning_rate": 5.8964109605220424e-06, "loss": 0.5662, "step": 238985 }, { "epoch": 2.65, "learning_rate": 5.89548823338353e-06, "loss": 0.6333, "step": 238990 }, { "epoch": 2.65, "learning_rate": 5.894565506245017e-06, "loss": 0.518, "step": 238995 }, { "epoch": 2.65, "learning_rate": 5.893642779106505e-06, "loss": 0.6508, "step": 239000 }, { "epoch": 2.65, "eval_loss": 0.5811243057250977, "eval_runtime": 69.8193, "eval_samples_per_second": 28.645, "eval_steps_per_second": 14.323, "step": 239000 }, { "epoch": 2.65, "learning_rate": 5.892720051967993e-06, "loss": 0.5936, "step": 239005 }, { "epoch": 2.65, "learning_rate": 5.89179732482948e-06, "loss": 0.5341, "step": 239010 }, { "epoch": 2.65, "learning_rate": 5.890874597690968e-06, "loss": 0.5899, "step": 239015 }, { "epoch": 2.65, "learning_rate": 5.889951870552456e-06, "loss": 0.5849, "step": 239020 }, { "epoch": 2.65, "learning_rate": 5.8890291434139435e-06, "loss": 0.5907, "step": 239025 }, { "epoch": 2.65, "learning_rate": 5.888106416275431e-06, "loss": 0.5544, "step": 239030 }, { "epoch": 2.65, "learning_rate": 5.8871836891369175e-06, "loss": 0.6105, "step": 239035 }, { "epoch": 2.65, "learning_rate": 5.886260961998405e-06, "loss": 0.5666, "step": 239040 }, { "epoch": 2.65, "learning_rate": 5.885338234859893e-06, "loss": 0.6175, "step": 239045 }, { "epoch": 2.65, "learning_rate": 5.884415507721381e-06, "loss": 0.5843, "step": 239050 }, { "epoch": 2.65, "learning_rate": 5.883492780582869e-06, "loss": 0.5997, "step": 239055 }, { "epoch": 2.65, "learning_rate": 5.882570053444356e-06, "loss": 0.606, "step": 239060 }, { "epoch": 2.65, "learning_rate": 5.881647326305844e-06, "loss": 0.5478, "step": 239065 }, { "epoch": 2.65, "learning_rate": 5.880724599167331e-06, "loss": 0.5494, "step": 239070 }, { "epoch": 2.65, "learning_rate": 5.879801872028819e-06, "loss": 0.5888, "step": 239075 }, { "epoch": 2.65, "learning_rate": 5.8788791448903065e-06, "loss": 0.6129, "step": 239080 }, { "epoch": 2.65, "learning_rate": 5.877956417751794e-06, "loss": 0.6057, "step": 239085 }, { "epoch": 2.65, "learning_rate": 5.877033690613281e-06, "loss": 0.5874, "step": 239090 }, { "epoch": 2.65, "learning_rate": 5.876110963474769e-06, "loss": 0.654, "step": 239095 }, { "epoch": 2.65, "learning_rate": 5.875188236336257e-06, "loss": 0.5659, "step": 239100 }, { "epoch": 2.65, "learning_rate": 5.874265509197745e-06, "loss": 0.6141, "step": 239105 }, { "epoch": 2.65, "learning_rate": 5.873342782059232e-06, "loss": 0.5736, "step": 239110 }, { "epoch": 2.65, "learning_rate": 5.87242005492072e-06, "loss": 0.5941, "step": 239115 }, { "epoch": 2.65, "learning_rate": 5.871497327782207e-06, "loss": 0.5168, "step": 239120 }, { "epoch": 2.65, "learning_rate": 5.870574600643695e-06, "loss": 0.6066, "step": 239125 }, { "epoch": 2.65, "learning_rate": 5.8696518735051824e-06, "loss": 0.6051, "step": 239130 }, { "epoch": 2.65, "learning_rate": 5.86872914636667e-06, "loss": 0.5785, "step": 239135 }, { "epoch": 2.65, "learning_rate": 5.867806419228158e-06, "loss": 0.5419, "step": 239140 }, { "epoch": 2.65, "learning_rate": 5.866883692089645e-06, "loss": 0.5897, "step": 239145 }, { "epoch": 2.65, "learning_rate": 5.865960964951132e-06, "loss": 0.5808, "step": 239150 }, { "epoch": 2.65, "learning_rate": 5.86503823781262e-06, "loss": 0.596, "step": 239155 }, { "epoch": 2.65, "learning_rate": 5.864115510674108e-06, "loss": 0.5337, "step": 239160 }, { "epoch": 2.65, "learning_rate": 5.863192783535596e-06, "loss": 0.6133, "step": 239165 }, { "epoch": 2.65, "learning_rate": 5.8622700563970835e-06, "loss": 0.5782, "step": 239170 }, { "epoch": 2.65, "learning_rate": 5.8613473292585705e-06, "loss": 0.6152, "step": 239175 }, { "epoch": 2.65, "learning_rate": 5.860424602120058e-06, "loss": 0.5897, "step": 239180 }, { "epoch": 2.65, "learning_rate": 5.859501874981545e-06, "loss": 0.5906, "step": 239185 }, { "epoch": 2.65, "learning_rate": 5.858579147843033e-06, "loss": 0.5321, "step": 239190 }, { "epoch": 2.65, "learning_rate": 5.857656420704521e-06, "loss": 0.589, "step": 239195 }, { "epoch": 2.65, "learning_rate": 5.856733693566008e-06, "loss": 0.5548, "step": 239200 }, { "epoch": 2.65, "learning_rate": 5.855810966427496e-06, "loss": 0.5652, "step": 239205 }, { "epoch": 2.65, "learning_rate": 5.854888239288984e-06, "loss": 0.5608, "step": 239210 }, { "epoch": 2.65, "learning_rate": 5.853965512150472e-06, "loss": 0.5731, "step": 239215 }, { "epoch": 2.65, "learning_rate": 5.8530427850119595e-06, "loss": 0.6148, "step": 239220 }, { "epoch": 2.65, "learning_rate": 5.8521200578734465e-06, "loss": 0.5415, "step": 239225 }, { "epoch": 2.65, "learning_rate": 5.8511973307349335e-06, "loss": 0.5453, "step": 239230 }, { "epoch": 2.65, "learning_rate": 5.850274603596421e-06, "loss": 0.5381, "step": 239235 }, { "epoch": 2.65, "learning_rate": 5.849351876457909e-06, "loss": 0.5685, "step": 239240 }, { "epoch": 2.65, "learning_rate": 5.848429149319397e-06, "loss": 0.6024, "step": 239245 }, { "epoch": 2.65, "learning_rate": 5.847506422180885e-06, "loss": 0.5847, "step": 239250 }, { "epoch": 2.65, "learning_rate": 5.846583695042372e-06, "loss": 0.6237, "step": 239255 }, { "epoch": 2.65, "learning_rate": 5.845660967903859e-06, "loss": 0.5644, "step": 239260 }, { "epoch": 2.65, "learning_rate": 5.844738240765347e-06, "loss": 0.5963, "step": 239265 }, { "epoch": 2.65, "learning_rate": 5.8438155136268346e-06, "loss": 0.6276, "step": 239270 }, { "epoch": 2.65, "learning_rate": 5.842892786488322e-06, "loss": 0.5983, "step": 239275 }, { "epoch": 2.65, "learning_rate": 5.84197005934981e-06, "loss": 0.6012, "step": 239280 }, { "epoch": 2.65, "learning_rate": 5.841047332211297e-06, "loss": 0.5903, "step": 239285 }, { "epoch": 2.65, "learning_rate": 5.840124605072785e-06, "loss": 0.5935, "step": 239290 }, { "epoch": 2.65, "learning_rate": 5.839201877934273e-06, "loss": 0.6056, "step": 239295 }, { "epoch": 2.65, "learning_rate": 5.83827915079576e-06, "loss": 0.5902, "step": 239300 }, { "epoch": 2.65, "learning_rate": 5.837356423657248e-06, "loss": 0.5507, "step": 239305 }, { "epoch": 2.65, "learning_rate": 5.836433696518735e-06, "loss": 0.5675, "step": 239310 }, { "epoch": 2.65, "learning_rate": 5.835510969380223e-06, "loss": 0.6446, "step": 239315 }, { "epoch": 2.65, "learning_rate": 5.8345882422417105e-06, "loss": 0.6131, "step": 239320 }, { "epoch": 2.65, "learning_rate": 5.833665515103198e-06, "loss": 0.5586, "step": 239325 }, { "epoch": 2.65, "learning_rate": 5.832742787964686e-06, "loss": 0.5688, "step": 239330 }, { "epoch": 2.65, "learning_rate": 5.831820060826173e-06, "loss": 0.5539, "step": 239335 }, { "epoch": 2.65, "learning_rate": 5.83089733368766e-06, "loss": 0.5945, "step": 239340 }, { "epoch": 2.65, "learning_rate": 5.829974606549148e-06, "loss": 0.5179, "step": 239345 }, { "epoch": 2.65, "learning_rate": 5.829051879410636e-06, "loss": 0.5757, "step": 239350 }, { "epoch": 2.65, "learning_rate": 5.828129152272124e-06, "loss": 0.6223, "step": 239355 }, { "epoch": 2.65, "learning_rate": 5.827206425133612e-06, "loss": 0.5757, "step": 239360 }, { "epoch": 2.65, "learning_rate": 5.826283697995099e-06, "loss": 0.6044, "step": 239365 }, { "epoch": 2.65, "learning_rate": 5.8253609708565865e-06, "loss": 0.5468, "step": 239370 }, { "epoch": 2.65, "learning_rate": 5.8244382437180735e-06, "loss": 0.5683, "step": 239375 }, { "epoch": 2.65, "learning_rate": 5.823515516579561e-06, "loss": 0.598, "step": 239380 }, { "epoch": 2.65, "learning_rate": 5.822592789441049e-06, "loss": 0.6097, "step": 239385 }, { "epoch": 2.65, "learning_rate": 5.821670062302537e-06, "loss": 0.5663, "step": 239390 }, { "epoch": 2.65, "learning_rate": 5.820747335164024e-06, "loss": 0.5613, "step": 239395 }, { "epoch": 2.65, "learning_rate": 5.819824608025512e-06, "loss": 0.5811, "step": 239400 }, { "epoch": 2.65, "learning_rate": 5.818901880887e-06, "loss": 0.5735, "step": 239405 }, { "epoch": 2.65, "learning_rate": 5.8179791537484876e-06, "loss": 0.4913, "step": 239410 }, { "epoch": 2.65, "learning_rate": 5.8170564266099746e-06, "loss": 0.5677, "step": 239415 }, { "epoch": 2.65, "learning_rate": 5.816133699471462e-06, "loss": 0.5656, "step": 239420 }, { "epoch": 2.65, "learning_rate": 5.815210972332949e-06, "loss": 0.6024, "step": 239425 }, { "epoch": 2.65, "learning_rate": 5.814288245194437e-06, "loss": 0.5703, "step": 239430 }, { "epoch": 2.65, "learning_rate": 5.813365518055925e-06, "loss": 0.5844, "step": 239435 }, { "epoch": 2.65, "learning_rate": 5.812442790917413e-06, "loss": 0.5821, "step": 239440 }, { "epoch": 2.65, "learning_rate": 5.8115200637789e-06, "loss": 0.5798, "step": 239445 }, { "epoch": 2.65, "learning_rate": 5.810597336640387e-06, "loss": 0.596, "step": 239450 }, { "epoch": 2.65, "learning_rate": 5.809674609501875e-06, "loss": 0.5813, "step": 239455 }, { "epoch": 2.65, "learning_rate": 5.808751882363363e-06, "loss": 0.595, "step": 239460 }, { "epoch": 2.65, "learning_rate": 5.8078291552248505e-06, "loss": 0.5816, "step": 239465 }, { "epoch": 2.65, "learning_rate": 5.806906428086338e-06, "loss": 0.5927, "step": 239470 }, { "epoch": 2.65, "learning_rate": 5.805983700947825e-06, "loss": 0.551, "step": 239475 }, { "epoch": 2.65, "learning_rate": 5.805060973809313e-06, "loss": 0.5847, "step": 239480 }, { "epoch": 2.65, "learning_rate": 5.804138246670801e-06, "loss": 0.6061, "step": 239485 }, { "epoch": 2.65, "learning_rate": 5.803215519532288e-06, "loss": 0.5742, "step": 239490 }, { "epoch": 2.65, "learning_rate": 5.802292792393776e-06, "loss": 0.5957, "step": 239495 }, { "epoch": 2.65, "learning_rate": 5.801370065255264e-06, "loss": 0.5755, "step": 239500 }, { "epoch": 2.65, "learning_rate": 5.800447338116751e-06, "loss": 0.5451, "step": 239505 }, { "epoch": 2.65, "learning_rate": 5.799524610978239e-06, "loss": 0.5332, "step": 239510 }, { "epoch": 2.65, "learning_rate": 5.7986018838397264e-06, "loss": 0.5525, "step": 239515 }, { "epoch": 2.65, "learning_rate": 5.797679156701214e-06, "loss": 0.5545, "step": 239520 }, { "epoch": 2.65, "learning_rate": 5.796756429562701e-06, "loss": 0.6122, "step": 239525 }, { "epoch": 2.65, "learning_rate": 5.795833702424189e-06, "loss": 0.5565, "step": 239530 }, { "epoch": 2.65, "learning_rate": 5.794910975285676e-06, "loss": 0.5562, "step": 239535 }, { "epoch": 2.65, "learning_rate": 5.793988248147164e-06, "loss": 0.5515, "step": 239540 }, { "epoch": 2.65, "learning_rate": 5.793065521008652e-06, "loss": 0.6006, "step": 239545 }, { "epoch": 2.65, "learning_rate": 5.79214279387014e-06, "loss": 0.555, "step": 239550 }, { "epoch": 2.65, "learning_rate": 5.7912200667316275e-06, "loss": 0.5972, "step": 239555 }, { "epoch": 2.65, "learning_rate": 5.7902973395931145e-06, "loss": 0.6049, "step": 239560 }, { "epoch": 2.65, "learning_rate": 5.7893746124546015e-06, "loss": 0.5784, "step": 239565 }, { "epoch": 2.65, "learning_rate": 5.788451885316089e-06, "loss": 0.5733, "step": 239570 }, { "epoch": 2.65, "learning_rate": 5.787529158177577e-06, "loss": 0.5642, "step": 239575 }, { "epoch": 2.65, "learning_rate": 5.786606431039065e-06, "loss": 0.5809, "step": 239580 }, { "epoch": 2.65, "learning_rate": 5.785683703900552e-06, "loss": 0.5466, "step": 239585 }, { "epoch": 2.65, "learning_rate": 5.78476097676204e-06, "loss": 0.6146, "step": 239590 }, { "epoch": 2.65, "learning_rate": 5.783838249623528e-06, "loss": 0.5777, "step": 239595 }, { "epoch": 2.65, "learning_rate": 5.782915522485016e-06, "loss": 0.6128, "step": 239600 }, { "epoch": 2.65, "learning_rate": 5.781992795346503e-06, "loss": 0.6012, "step": 239605 }, { "epoch": 2.65, "learning_rate": 5.7810700682079905e-06, "loss": 0.6246, "step": 239610 }, { "epoch": 2.65, "learning_rate": 5.7801473410694775e-06, "loss": 0.5847, "step": 239615 }, { "epoch": 2.65, "learning_rate": 5.779224613930965e-06, "loss": 0.6335, "step": 239620 }, { "epoch": 2.65, "learning_rate": 5.778301886792453e-06, "loss": 0.6194, "step": 239625 }, { "epoch": 2.65, "learning_rate": 5.777379159653941e-06, "loss": 0.602, "step": 239630 }, { "epoch": 2.65, "learning_rate": 5.776456432515429e-06, "loss": 0.6336, "step": 239635 }, { "epoch": 2.65, "learning_rate": 5.775533705376916e-06, "loss": 0.5846, "step": 239640 }, { "epoch": 2.65, "learning_rate": 5.774610978238403e-06, "loss": 0.5499, "step": 239645 }, { "epoch": 2.65, "learning_rate": 5.773688251099891e-06, "loss": 0.561, "step": 239650 }, { "epoch": 2.65, "learning_rate": 5.772765523961379e-06, "loss": 0.5457, "step": 239655 }, { "epoch": 2.65, "learning_rate": 5.7718427968228664e-06, "loss": 0.5198, "step": 239660 }, { "epoch": 2.65, "learning_rate": 5.770920069684354e-06, "loss": 0.5612, "step": 239665 }, { "epoch": 2.65, "learning_rate": 5.769997342545841e-06, "loss": 0.6266, "step": 239670 }, { "epoch": 2.65, "learning_rate": 5.769074615407329e-06, "loss": 0.6027, "step": 239675 }, { "epoch": 2.65, "learning_rate": 5.768151888268816e-06, "loss": 0.5934, "step": 239680 }, { "epoch": 2.65, "learning_rate": 5.767229161130304e-06, "loss": 0.5442, "step": 239685 }, { "epoch": 2.65, "learning_rate": 5.766306433991792e-06, "loss": 0.5691, "step": 239690 }, { "epoch": 2.65, "learning_rate": 5.76538370685328e-06, "loss": 0.5889, "step": 239695 }, { "epoch": 2.65, "learning_rate": 5.764460979714767e-06, "loss": 0.6648, "step": 239700 }, { "epoch": 2.65, "learning_rate": 5.7635382525762545e-06, "loss": 0.6112, "step": 239705 }, { "epoch": 2.65, "learning_rate": 5.762615525437742e-06, "loss": 0.6109, "step": 239710 }, { "epoch": 2.65, "learning_rate": 5.761692798299229e-06, "loss": 0.6206, "step": 239715 }, { "epoch": 2.65, "learning_rate": 5.760770071160717e-06, "loss": 0.5513, "step": 239720 }, { "epoch": 2.65, "learning_rate": 5.759847344022204e-06, "loss": 0.5893, "step": 239725 }, { "epoch": 2.65, "learning_rate": 5.758924616883692e-06, "loss": 0.5963, "step": 239730 }, { "epoch": 2.65, "learning_rate": 5.75800188974518e-06, "loss": 0.5464, "step": 239735 }, { "epoch": 2.65, "learning_rate": 5.757079162606668e-06, "loss": 0.5867, "step": 239740 }, { "epoch": 2.65, "learning_rate": 5.756156435468156e-06, "loss": 0.5904, "step": 239745 }, { "epoch": 2.65, "learning_rate": 5.755233708329643e-06, "loss": 0.6373, "step": 239750 }, { "epoch": 2.65, "learning_rate": 5.75431098119113e-06, "loss": 0.582, "step": 239755 }, { "epoch": 2.65, "learning_rate": 5.7533882540526175e-06, "loss": 0.5714, "step": 239760 }, { "epoch": 2.65, "learning_rate": 5.752465526914105e-06, "loss": 0.5538, "step": 239765 }, { "epoch": 2.65, "learning_rate": 5.751542799775593e-06, "loss": 0.5389, "step": 239770 }, { "epoch": 2.65, "learning_rate": 5.750620072637081e-06, "loss": 0.498, "step": 239775 }, { "epoch": 2.66, "learning_rate": 5.749697345498568e-06, "loss": 0.611, "step": 239780 }, { "epoch": 2.66, "learning_rate": 5.748774618360056e-06, "loss": 0.5787, "step": 239785 }, { "epoch": 2.66, "learning_rate": 5.747851891221544e-06, "loss": 0.6421, "step": 239790 }, { "epoch": 2.66, "learning_rate": 5.746929164083031e-06, "loss": 0.6126, "step": 239795 }, { "epoch": 2.66, "learning_rate": 5.7460064369445186e-06, "loss": 0.5763, "step": 239800 }, { "epoch": 2.66, "learning_rate": 5.745083709806006e-06, "loss": 0.5956, "step": 239805 }, { "epoch": 2.66, "learning_rate": 5.7441609826674934e-06, "loss": 0.6069, "step": 239810 }, { "epoch": 2.66, "learning_rate": 5.743238255528981e-06, "loss": 0.5895, "step": 239815 }, { "epoch": 2.66, "learning_rate": 5.742315528390469e-06, "loss": 0.5664, "step": 239820 }, { "epoch": 2.66, "learning_rate": 5.741392801251957e-06, "loss": 0.5658, "step": 239825 }, { "epoch": 2.66, "learning_rate": 5.740470074113444e-06, "loss": 0.5837, "step": 239830 }, { "epoch": 2.66, "learning_rate": 5.739547346974932e-06, "loss": 0.5858, "step": 239835 }, { "epoch": 2.66, "learning_rate": 5.738624619836419e-06, "loss": 0.5615, "step": 239840 }, { "epoch": 2.66, "learning_rate": 5.737701892697907e-06, "loss": 0.5901, "step": 239845 }, { "epoch": 2.66, "learning_rate": 5.7367791655593945e-06, "loss": 0.5398, "step": 239850 }, { "epoch": 2.66, "learning_rate": 5.735856438420882e-06, "loss": 0.5724, "step": 239855 }, { "epoch": 2.66, "learning_rate": 5.734933711282369e-06, "loss": 0.5762, "step": 239860 }, { "epoch": 2.66, "learning_rate": 5.734010984143857e-06, "loss": 0.5807, "step": 239865 }, { "epoch": 2.66, "learning_rate": 5.733088257005344e-06, "loss": 0.6354, "step": 239870 }, { "epoch": 2.66, "learning_rate": 5.732165529866832e-06, "loss": 0.5588, "step": 239875 }, { "epoch": 2.66, "learning_rate": 5.73124280272832e-06, "loss": 0.6479, "step": 239880 }, { "epoch": 2.66, "learning_rate": 5.730320075589808e-06, "loss": 0.5992, "step": 239885 }, { "epoch": 2.66, "learning_rate": 5.729397348451295e-06, "loss": 0.5998, "step": 239890 }, { "epoch": 2.66, "learning_rate": 5.728474621312783e-06, "loss": 0.5469, "step": 239895 }, { "epoch": 2.66, "learning_rate": 5.7275518941742705e-06, "loss": 0.5957, "step": 239900 }, { "epoch": 2.66, "learning_rate": 5.7266291670357575e-06, "loss": 0.585, "step": 239905 }, { "epoch": 2.66, "learning_rate": 5.725706439897245e-06, "loss": 0.6206, "step": 239910 }, { "epoch": 2.66, "learning_rate": 5.724783712758733e-06, "loss": 0.5514, "step": 239915 }, { "epoch": 2.66, "learning_rate": 5.72386098562022e-06, "loss": 0.6134, "step": 239920 }, { "epoch": 2.66, "learning_rate": 5.722938258481708e-06, "loss": 0.6016, "step": 239925 }, { "epoch": 2.66, "learning_rate": 5.722015531343196e-06, "loss": 0.5801, "step": 239930 }, { "epoch": 2.66, "learning_rate": 5.721092804204684e-06, "loss": 0.5699, "step": 239935 }, { "epoch": 2.66, "learning_rate": 5.7201700770661716e-06, "loss": 0.5585, "step": 239940 }, { "epoch": 2.66, "learning_rate": 5.7192473499276586e-06, "loss": 0.5634, "step": 239945 }, { "epoch": 2.66, "learning_rate": 5.7183246227891456e-06, "loss": 0.5285, "step": 239950 }, { "epoch": 2.66, "learning_rate": 5.717401895650633e-06, "loss": 0.5953, "step": 239955 }, { "epoch": 2.66, "learning_rate": 5.716479168512121e-06, "loss": 0.6326, "step": 239960 }, { "epoch": 2.66, "learning_rate": 5.715556441373609e-06, "loss": 0.6254, "step": 239965 }, { "epoch": 2.66, "learning_rate": 5.714633714235097e-06, "loss": 0.5641, "step": 239970 }, { "epoch": 2.66, "learning_rate": 5.713710987096584e-06, "loss": 0.5774, "step": 239975 }, { "epoch": 2.66, "learning_rate": 5.712788259958071e-06, "loss": 0.5553, "step": 239980 }, { "epoch": 2.66, "learning_rate": 5.711865532819559e-06, "loss": 0.5937, "step": 239985 }, { "epoch": 2.66, "learning_rate": 5.710942805681047e-06, "loss": 0.6633, "step": 239990 }, { "epoch": 2.66, "learning_rate": 5.7100200785425345e-06, "loss": 0.591, "step": 239995 }, { "epoch": 2.66, "learning_rate": 5.7090973514040215e-06, "loss": 0.6322, "step": 240000 }, { "epoch": 2.66, "eval_loss": 0.5648983716964722, "eval_runtime": 69.67, "eval_samples_per_second": 28.707, "eval_steps_per_second": 14.353, "step": 240000 }, { "epoch": 2.66, "learning_rate": 5.708174624265509e-06, "loss": 0.5936, "step": 240005 }, { "epoch": 2.66, "learning_rate": 5.707251897126997e-06, "loss": 0.603, "step": 240010 }, { "epoch": 2.66, "learning_rate": 5.706329169988485e-06, "loss": 0.5652, "step": 240015 }, { "epoch": 2.66, "learning_rate": 5.705406442849972e-06, "loss": 0.5519, "step": 240020 }, { "epoch": 2.66, "learning_rate": 5.70448371571146e-06, "loss": 0.6309, "step": 240025 }, { "epoch": 2.66, "learning_rate": 5.703560988572947e-06, "loss": 0.6215, "step": 240030 }, { "epoch": 2.66, "learning_rate": 5.702638261434435e-06, "loss": 0.5832, "step": 240035 }, { "epoch": 2.66, "learning_rate": 5.701715534295923e-06, "loss": 0.5955, "step": 240040 }, { "epoch": 2.66, "learning_rate": 5.7007928071574105e-06, "loss": 0.5698, "step": 240045 }, { "epoch": 2.66, "learning_rate": 5.699870080018898e-06, "loss": 0.5516, "step": 240050 }, { "epoch": 2.66, "learning_rate": 5.698947352880385e-06, "loss": 0.5774, "step": 240055 }, { "epoch": 2.66, "learning_rate": 5.698024625741872e-06, "loss": 0.5614, "step": 240060 }, { "epoch": 2.66, "learning_rate": 5.69710189860336e-06, "loss": 0.5605, "step": 240065 }, { "epoch": 2.66, "learning_rate": 5.696179171464848e-06, "loss": 0.5502, "step": 240070 }, { "epoch": 2.66, "learning_rate": 5.695256444326336e-06, "loss": 0.5707, "step": 240075 }, { "epoch": 2.66, "learning_rate": 5.694333717187824e-06, "loss": 0.5582, "step": 240080 }, { "epoch": 2.66, "learning_rate": 5.693410990049311e-06, "loss": 0.5474, "step": 240085 }, { "epoch": 2.66, "learning_rate": 5.6924882629107985e-06, "loss": 0.5593, "step": 240090 }, { "epoch": 2.66, "learning_rate": 5.6915655357722856e-06, "loss": 0.6083, "step": 240095 }, { "epoch": 2.66, "learning_rate": 5.690642808633773e-06, "loss": 0.5928, "step": 240100 }, { "epoch": 2.66, "learning_rate": 5.689720081495261e-06, "loss": 0.5436, "step": 240105 }, { "epoch": 2.66, "learning_rate": 5.688797354356749e-06, "loss": 0.5872, "step": 240110 }, { "epoch": 2.66, "learning_rate": 5.687874627218236e-06, "loss": 0.558, "step": 240115 }, { "epoch": 2.66, "learning_rate": 5.686951900079724e-06, "loss": 0.5632, "step": 240120 }, { "epoch": 2.66, "learning_rate": 5.686029172941212e-06, "loss": 0.5871, "step": 240125 }, { "epoch": 2.66, "learning_rate": 5.6851064458027e-06, "loss": 0.5362, "step": 240130 }, { "epoch": 2.66, "learning_rate": 5.684183718664187e-06, "loss": 0.6198, "step": 240135 }, { "epoch": 2.66, "learning_rate": 5.683260991525674e-06, "loss": 0.5146, "step": 240140 }, { "epoch": 2.66, "learning_rate": 5.6823382643871615e-06, "loss": 0.5868, "step": 240145 }, { "epoch": 2.66, "learning_rate": 5.681415537248649e-06, "loss": 0.5918, "step": 240150 }, { "epoch": 2.66, "learning_rate": 5.680492810110137e-06, "loss": 0.5373, "step": 240155 }, { "epoch": 2.66, "learning_rate": 5.679570082971625e-06, "loss": 0.5833, "step": 240160 }, { "epoch": 2.66, "learning_rate": 5.678647355833112e-06, "loss": 0.5949, "step": 240165 }, { "epoch": 2.66, "learning_rate": 5.677724628694599e-06, "loss": 0.5946, "step": 240170 }, { "epoch": 2.66, "learning_rate": 5.676801901556087e-06, "loss": 0.5942, "step": 240175 }, { "epoch": 2.66, "learning_rate": 5.675879174417575e-06, "loss": 0.5778, "step": 240180 }, { "epoch": 2.66, "learning_rate": 5.674956447279063e-06, "loss": 0.5467, "step": 240185 }, { "epoch": 2.66, "learning_rate": 5.6740337201405504e-06, "loss": 0.5505, "step": 240190 }, { "epoch": 2.66, "learning_rate": 5.6731109930020374e-06, "loss": 0.5651, "step": 240195 }, { "epoch": 2.66, "learning_rate": 5.672188265863525e-06, "loss": 0.5554, "step": 240200 }, { "epoch": 2.66, "learning_rate": 5.671265538725013e-06, "loss": 0.588, "step": 240205 }, { "epoch": 2.66, "learning_rate": 5.6703428115865e-06, "loss": 0.6001, "step": 240210 }, { "epoch": 2.66, "learning_rate": 5.669420084447988e-06, "loss": 0.5812, "step": 240215 }, { "epoch": 2.66, "learning_rate": 5.668497357309476e-06, "loss": 0.6046, "step": 240220 }, { "epoch": 2.66, "learning_rate": 5.667574630170963e-06, "loss": 0.5721, "step": 240225 }, { "epoch": 2.66, "learning_rate": 5.666651903032451e-06, "loss": 0.5341, "step": 240230 }, { "epoch": 2.66, "learning_rate": 5.6657291758939385e-06, "loss": 0.6138, "step": 240235 }, { "epoch": 2.66, "learning_rate": 5.664806448755426e-06, "loss": 0.5941, "step": 240240 }, { "epoch": 2.66, "learning_rate": 5.663883721616914e-06, "loss": 0.6004, "step": 240245 }, { "epoch": 2.66, "learning_rate": 5.6629609944784e-06, "loss": 0.595, "step": 240250 }, { "epoch": 2.66, "learning_rate": 5.662038267339888e-06, "loss": 0.5842, "step": 240255 }, { "epoch": 2.66, "learning_rate": 5.661115540201376e-06, "loss": 0.5964, "step": 240260 }, { "epoch": 2.66, "learning_rate": 5.660192813062864e-06, "loss": 0.4945, "step": 240265 }, { "epoch": 2.66, "learning_rate": 5.659270085924352e-06, "loss": 0.5782, "step": 240270 }, { "epoch": 2.66, "learning_rate": 5.658347358785839e-06, "loss": 0.6623, "step": 240275 }, { "epoch": 2.66, "learning_rate": 5.657424631647327e-06, "loss": 0.5889, "step": 240280 }, { "epoch": 2.66, "learning_rate": 5.656501904508814e-06, "loss": 0.5594, "step": 240285 }, { "epoch": 2.66, "learning_rate": 5.6555791773703015e-06, "loss": 0.5964, "step": 240290 }, { "epoch": 2.66, "learning_rate": 5.654656450231789e-06, "loss": 0.6003, "step": 240295 }, { "epoch": 2.66, "learning_rate": 5.653733723093277e-06, "loss": 0.5082, "step": 240300 }, { "epoch": 2.66, "learning_rate": 5.652810995954764e-06, "loss": 0.5427, "step": 240305 }, { "epoch": 2.66, "learning_rate": 5.651888268816252e-06, "loss": 0.5776, "step": 240310 }, { "epoch": 2.66, "learning_rate": 5.65096554167774e-06, "loss": 0.5974, "step": 240315 }, { "epoch": 2.66, "learning_rate": 5.650042814539228e-06, "loss": 0.5921, "step": 240320 }, { "epoch": 2.66, "learning_rate": 5.649120087400715e-06, "loss": 0.5548, "step": 240325 }, { "epoch": 2.66, "learning_rate": 5.648197360262203e-06, "loss": 0.6038, "step": 240330 }, { "epoch": 2.66, "learning_rate": 5.64727463312369e-06, "loss": 0.5681, "step": 240335 }, { "epoch": 2.66, "learning_rate": 5.6463519059851774e-06, "loss": 0.5747, "step": 240340 }, { "epoch": 2.66, "learning_rate": 5.645429178846665e-06, "loss": 0.6195, "step": 240345 }, { "epoch": 2.66, "learning_rate": 5.644506451708153e-06, "loss": 0.5629, "step": 240350 }, { "epoch": 2.66, "learning_rate": 5.643583724569641e-06, "loss": 0.6005, "step": 240355 }, { "epoch": 2.66, "learning_rate": 5.642660997431128e-06, "loss": 0.5565, "step": 240360 }, { "epoch": 2.66, "learning_rate": 5.641738270292615e-06, "loss": 0.5666, "step": 240365 }, { "epoch": 2.66, "learning_rate": 5.640815543154103e-06, "loss": 0.5694, "step": 240370 }, { "epoch": 2.66, "learning_rate": 5.639892816015591e-06, "loss": 0.5894, "step": 240375 }, { "epoch": 2.66, "learning_rate": 5.6389700888770785e-06, "loss": 0.5514, "step": 240380 }, { "epoch": 2.66, "learning_rate": 5.638047361738566e-06, "loss": 0.5409, "step": 240385 }, { "epoch": 2.66, "learning_rate": 5.637124634600053e-06, "loss": 0.5116, "step": 240390 }, { "epoch": 2.66, "learning_rate": 5.636201907461541e-06, "loss": 0.5861, "step": 240395 }, { "epoch": 2.66, "learning_rate": 5.635279180323028e-06, "loss": 0.6436, "step": 240400 }, { "epoch": 2.66, "learning_rate": 5.634356453184516e-06, "loss": 0.5848, "step": 240405 }, { "epoch": 2.66, "learning_rate": 5.633433726046004e-06, "loss": 0.5617, "step": 240410 }, { "epoch": 2.66, "learning_rate": 5.632510998907491e-06, "loss": 0.6163, "step": 240415 }, { "epoch": 2.66, "learning_rate": 5.631588271768979e-06, "loss": 0.567, "step": 240420 }, { "epoch": 2.66, "learning_rate": 5.630665544630467e-06, "loss": 0.5991, "step": 240425 }, { "epoch": 2.66, "learning_rate": 5.6297428174919545e-06, "loss": 0.5244, "step": 240430 }, { "epoch": 2.66, "learning_rate": 5.628820090353442e-06, "loss": 0.5595, "step": 240435 }, { "epoch": 2.66, "learning_rate": 5.627897363214929e-06, "loss": 0.6235, "step": 240440 }, { "epoch": 2.66, "learning_rate": 5.626974636076416e-06, "loss": 0.5788, "step": 240445 }, { "epoch": 2.66, "learning_rate": 5.626051908937904e-06, "loss": 0.6059, "step": 240450 }, { "epoch": 2.66, "learning_rate": 5.625129181799392e-06, "loss": 0.6157, "step": 240455 }, { "epoch": 2.66, "learning_rate": 5.62420645466088e-06, "loss": 0.5888, "step": 240460 }, { "epoch": 2.66, "learning_rate": 5.623283727522368e-06, "loss": 0.5865, "step": 240465 }, { "epoch": 2.66, "learning_rate": 5.622361000383855e-06, "loss": 0.5892, "step": 240470 }, { "epoch": 2.66, "learning_rate": 5.621438273245342e-06, "loss": 0.6164, "step": 240475 }, { "epoch": 2.66, "learning_rate": 5.6205155461068296e-06, "loss": 0.538, "step": 240480 }, { "epoch": 2.66, "learning_rate": 5.619592818968317e-06, "loss": 0.5763, "step": 240485 }, { "epoch": 2.66, "learning_rate": 5.618670091829805e-06, "loss": 0.591, "step": 240490 }, { "epoch": 2.66, "learning_rate": 5.617747364691293e-06, "loss": 0.584, "step": 240495 }, { "epoch": 2.66, "learning_rate": 5.61682463755278e-06, "loss": 0.5668, "step": 240500 }, { "epoch": 2.66, "learning_rate": 5.615901910414268e-06, "loss": 0.5426, "step": 240505 }, { "epoch": 2.66, "learning_rate": 5.614979183275756e-06, "loss": 0.5972, "step": 240510 }, { "epoch": 2.66, "learning_rate": 5.614056456137243e-06, "loss": 0.5893, "step": 240515 }, { "epoch": 2.66, "learning_rate": 5.613133728998731e-06, "loss": 0.6061, "step": 240520 }, { "epoch": 2.66, "learning_rate": 5.612211001860218e-06, "loss": 0.6349, "step": 240525 }, { "epoch": 2.66, "learning_rate": 5.6112882747217055e-06, "loss": 0.516, "step": 240530 }, { "epoch": 2.66, "learning_rate": 5.610365547583193e-06, "loss": 0.5803, "step": 240535 }, { "epoch": 2.66, "learning_rate": 5.609442820444681e-06, "loss": 0.5606, "step": 240540 }, { "epoch": 2.66, "learning_rate": 5.608520093306169e-06, "loss": 0.58, "step": 240545 }, { "epoch": 2.66, "learning_rate": 5.607597366167656e-06, "loss": 0.5827, "step": 240550 }, { "epoch": 2.66, "learning_rate": 5.606674639029143e-06, "loss": 0.5449, "step": 240555 }, { "epoch": 2.66, "learning_rate": 5.605751911890631e-06, "loss": 0.6114, "step": 240560 }, { "epoch": 2.66, "learning_rate": 5.604829184752119e-06, "loss": 0.569, "step": 240565 }, { "epoch": 2.66, "learning_rate": 5.603906457613607e-06, "loss": 0.5383, "step": 240570 }, { "epoch": 2.66, "learning_rate": 5.6029837304750945e-06, "loss": 0.5694, "step": 240575 }, { "epoch": 2.66, "learning_rate": 5.6020610033365815e-06, "loss": 0.5443, "step": 240580 }, { "epoch": 2.66, "learning_rate": 5.601138276198069e-06, "loss": 0.5654, "step": 240585 }, { "epoch": 2.66, "learning_rate": 5.600215549059556e-06, "loss": 0.5566, "step": 240590 }, { "epoch": 2.66, "learning_rate": 5.599292821921044e-06, "loss": 0.5735, "step": 240595 }, { "epoch": 2.66, "learning_rate": 5.598370094782532e-06, "loss": 0.5953, "step": 240600 }, { "epoch": 2.66, "learning_rate": 5.59744736764402e-06, "loss": 0.5916, "step": 240605 }, { "epoch": 2.66, "learning_rate": 5.596524640505507e-06, "loss": 0.5988, "step": 240610 }, { "epoch": 2.66, "learning_rate": 5.595601913366995e-06, "loss": 0.5751, "step": 240615 }, { "epoch": 2.66, "learning_rate": 5.5946791862284826e-06, "loss": 0.5478, "step": 240620 }, { "epoch": 2.66, "learning_rate": 5.5937564590899696e-06, "loss": 0.572, "step": 240625 }, { "epoch": 2.66, "learning_rate": 5.592833731951457e-06, "loss": 0.5824, "step": 240630 }, { "epoch": 2.66, "learning_rate": 5.591911004812945e-06, "loss": 0.5173, "step": 240635 }, { "epoch": 2.66, "learning_rate": 5.590988277674432e-06, "loss": 0.5122, "step": 240640 }, { "epoch": 2.66, "learning_rate": 5.59006555053592e-06, "loss": 0.592, "step": 240645 }, { "epoch": 2.66, "learning_rate": 5.589142823397408e-06, "loss": 0.5831, "step": 240650 }, { "epoch": 2.66, "learning_rate": 5.588220096258896e-06, "loss": 0.5452, "step": 240655 }, { "epoch": 2.66, "learning_rate": 5.587297369120384e-06, "loss": 0.5534, "step": 240660 }, { "epoch": 2.66, "learning_rate": 5.58637464198187e-06, "loss": 0.5635, "step": 240665 }, { "epoch": 2.66, "learning_rate": 5.585451914843358e-06, "loss": 0.6128, "step": 240670 }, { "epoch": 2.66, "learning_rate": 5.5845291877048455e-06, "loss": 0.5483, "step": 240675 }, { "epoch": 2.66, "learning_rate": 5.583606460566333e-06, "loss": 0.626, "step": 240680 }, { "epoch": 2.67, "learning_rate": 5.582683733427821e-06, "loss": 0.5778, "step": 240685 }, { "epoch": 2.67, "learning_rate": 5.581761006289308e-06, "loss": 0.5601, "step": 240690 }, { "epoch": 2.67, "learning_rate": 5.580838279150796e-06, "loss": 0.5621, "step": 240695 }, { "epoch": 2.67, "learning_rate": 5.579915552012284e-06, "loss": 0.5165, "step": 240700 }, { "epoch": 2.67, "learning_rate": 5.578992824873771e-06, "loss": 0.5862, "step": 240705 }, { "epoch": 2.67, "learning_rate": 5.578070097735259e-06, "loss": 0.555, "step": 240710 }, { "epoch": 2.67, "learning_rate": 5.577147370596747e-06, "loss": 0.5551, "step": 240715 }, { "epoch": 2.67, "learning_rate": 5.576224643458234e-06, "loss": 0.5834, "step": 240720 }, { "epoch": 2.67, "learning_rate": 5.5753019163197215e-06, "loss": 0.561, "step": 240725 }, { "epoch": 2.67, "learning_rate": 5.574379189181209e-06, "loss": 0.6005, "step": 240730 }, { "epoch": 2.67, "learning_rate": 5.573456462042697e-06, "loss": 0.5304, "step": 240735 }, { "epoch": 2.67, "learning_rate": 5.572533734904184e-06, "loss": 0.5768, "step": 240740 }, { "epoch": 2.67, "learning_rate": 5.571611007765672e-06, "loss": 0.6083, "step": 240745 }, { "epoch": 2.67, "learning_rate": 5.570688280627159e-06, "loss": 0.5926, "step": 240750 }, { "epoch": 2.67, "learning_rate": 5.569765553488647e-06, "loss": 0.5767, "step": 240755 }, { "epoch": 2.67, "learning_rate": 5.568842826350135e-06, "loss": 0.5839, "step": 240760 }, { "epoch": 2.67, "learning_rate": 5.5679200992116225e-06, "loss": 0.6013, "step": 240765 }, { "epoch": 2.67, "learning_rate": 5.56699737207311e-06, "loss": 0.605, "step": 240770 }, { "epoch": 2.67, "learning_rate": 5.566074644934597e-06, "loss": 0.5536, "step": 240775 }, { "epoch": 2.67, "learning_rate": 5.565151917796084e-06, "loss": 0.605, "step": 240780 }, { "epoch": 2.67, "learning_rate": 5.564229190657572e-06, "loss": 0.5441, "step": 240785 }, { "epoch": 2.67, "learning_rate": 5.56330646351906e-06, "loss": 0.5792, "step": 240790 }, { "epoch": 2.67, "learning_rate": 5.562383736380548e-06, "loss": 0.621, "step": 240795 }, { "epoch": 2.67, "learning_rate": 5.561461009242035e-06, "loss": 0.5652, "step": 240800 }, { "epoch": 2.67, "learning_rate": 5.560538282103523e-06, "loss": 0.532, "step": 240805 }, { "epoch": 2.67, "learning_rate": 5.559615554965011e-06, "loss": 0.6477, "step": 240810 }, { "epoch": 2.67, "learning_rate": 5.558692827826498e-06, "loss": 0.6074, "step": 240815 }, { "epoch": 2.67, "learning_rate": 5.5577701006879855e-06, "loss": 0.6018, "step": 240820 }, { "epoch": 2.67, "learning_rate": 5.556847373549473e-06, "loss": 0.5348, "step": 240825 }, { "epoch": 2.67, "learning_rate": 5.55592464641096e-06, "loss": 0.588, "step": 240830 }, { "epoch": 2.67, "learning_rate": 5.555001919272448e-06, "loss": 0.6449, "step": 240835 }, { "epoch": 2.67, "learning_rate": 5.554079192133936e-06, "loss": 0.5755, "step": 240840 }, { "epoch": 2.67, "learning_rate": 5.553156464995424e-06, "loss": 0.5943, "step": 240845 }, { "epoch": 2.67, "learning_rate": 5.552233737856912e-06, "loss": 0.5899, "step": 240850 }, { "epoch": 2.67, "learning_rate": 5.551311010718399e-06, "loss": 0.6008, "step": 240855 }, { "epoch": 2.67, "learning_rate": 5.550388283579886e-06, "loss": 0.5856, "step": 240860 }, { "epoch": 2.67, "learning_rate": 5.549465556441374e-06, "loss": 0.544, "step": 240865 }, { "epoch": 2.67, "learning_rate": 5.5485428293028614e-06, "loss": 0.5522, "step": 240870 }, { "epoch": 2.67, "learning_rate": 5.547620102164349e-06, "loss": 0.6127, "step": 240875 }, { "epoch": 2.67, "learning_rate": 5.546697375025837e-06, "loss": 0.5412, "step": 240880 }, { "epoch": 2.67, "learning_rate": 5.545774647887324e-06, "loss": 0.5189, "step": 240885 }, { "epoch": 2.67, "learning_rate": 5.544851920748812e-06, "loss": 0.5549, "step": 240890 }, { "epoch": 2.67, "learning_rate": 5.543929193610299e-06, "loss": 0.5721, "step": 240895 }, { "epoch": 2.67, "learning_rate": 5.543006466471787e-06, "loss": 0.5257, "step": 240900 }, { "epoch": 2.67, "learning_rate": 5.542083739333275e-06, "loss": 0.5929, "step": 240905 }, { "epoch": 2.67, "learning_rate": 5.5411610121947625e-06, "loss": 0.6177, "step": 240910 }, { "epoch": 2.67, "learning_rate": 5.5402382850562495e-06, "loss": 0.6054, "step": 240915 }, { "epoch": 2.67, "learning_rate": 5.539315557917737e-06, "loss": 0.5815, "step": 240920 }, { "epoch": 2.67, "learning_rate": 5.538392830779225e-06, "loss": 0.5449, "step": 240925 }, { "epoch": 2.67, "learning_rate": 5.537470103640712e-06, "loss": 0.5761, "step": 240930 }, { "epoch": 2.67, "learning_rate": 5.5365473765022e-06, "loss": 0.5824, "step": 240935 }, { "epoch": 2.67, "learning_rate": 5.535624649363687e-06, "loss": 0.6182, "step": 240940 }, { "epoch": 2.67, "learning_rate": 5.534701922225175e-06, "loss": 0.5592, "step": 240945 }, { "epoch": 2.67, "learning_rate": 5.533779195086663e-06, "loss": 0.5824, "step": 240950 }, { "epoch": 2.67, "learning_rate": 5.532856467948151e-06, "loss": 0.5346, "step": 240955 }, { "epoch": 2.67, "learning_rate": 5.5319337408096385e-06, "loss": 0.6304, "step": 240960 }, { "epoch": 2.67, "learning_rate": 5.5310110136711255e-06, "loss": 0.57, "step": 240965 }, { "epoch": 2.67, "learning_rate": 5.5300882865326125e-06, "loss": 0.6338, "step": 240970 }, { "epoch": 2.67, "learning_rate": 5.5291655593941e-06, "loss": 0.6056, "step": 240975 }, { "epoch": 2.67, "learning_rate": 5.528242832255588e-06, "loss": 0.633, "step": 240980 }, { "epoch": 2.67, "learning_rate": 5.527320105117076e-06, "loss": 0.5736, "step": 240985 }, { "epoch": 2.67, "learning_rate": 5.526397377978564e-06, "loss": 0.6074, "step": 240990 }, { "epoch": 2.67, "learning_rate": 5.525474650840051e-06, "loss": 0.5499, "step": 240995 }, { "epoch": 2.67, "learning_rate": 5.524551923701539e-06, "loss": 0.6131, "step": 241000 }, { "epoch": 2.67, "eval_loss": 0.5472548604011536, "eval_runtime": 69.7658, "eval_samples_per_second": 28.667, "eval_steps_per_second": 14.334, "step": 241000 }, { "epoch": 2.67, "learning_rate": 5.523629196563026e-06, "loss": 0.5817, "step": 241005 }, { "epoch": 2.67, "learning_rate": 5.522706469424514e-06, "loss": 0.5892, "step": 241010 }, { "epoch": 2.67, "learning_rate": 5.5217837422860014e-06, "loss": 0.5749, "step": 241015 }, { "epoch": 2.67, "learning_rate": 5.520861015147489e-06, "loss": 0.5247, "step": 241020 }, { "epoch": 2.67, "learning_rate": 5.519938288008976e-06, "loss": 0.5696, "step": 241025 }, { "epoch": 2.67, "learning_rate": 5.519015560870464e-06, "loss": 0.5876, "step": 241030 }, { "epoch": 2.67, "learning_rate": 5.518092833731952e-06, "loss": 0.5922, "step": 241035 }, { "epoch": 2.67, "learning_rate": 5.51717010659344e-06, "loss": 0.6012, "step": 241040 }, { "epoch": 2.67, "learning_rate": 5.516247379454927e-06, "loss": 0.6257, "step": 241045 }, { "epoch": 2.67, "learning_rate": 5.515324652316415e-06, "loss": 0.6111, "step": 241050 }, { "epoch": 2.67, "learning_rate": 5.514401925177902e-06, "loss": 0.5341, "step": 241055 }, { "epoch": 2.67, "learning_rate": 5.5134791980393895e-06, "loss": 0.5589, "step": 241060 }, { "epoch": 2.67, "learning_rate": 5.512556470900877e-06, "loss": 0.6053, "step": 241065 }, { "epoch": 2.67, "learning_rate": 5.511633743762365e-06, "loss": 0.5944, "step": 241070 }, { "epoch": 2.67, "learning_rate": 5.510711016623852e-06, "loss": 0.631, "step": 241075 }, { "epoch": 2.67, "learning_rate": 5.50978828948534e-06, "loss": 0.5798, "step": 241080 }, { "epoch": 2.67, "learning_rate": 5.508865562346827e-06, "loss": 0.5796, "step": 241085 }, { "epoch": 2.67, "learning_rate": 5.507942835208315e-06, "loss": 0.4758, "step": 241090 }, { "epoch": 2.67, "learning_rate": 5.507020108069803e-06, "loss": 0.5911, "step": 241095 }, { "epoch": 2.67, "learning_rate": 5.506097380931291e-06, "loss": 0.5841, "step": 241100 }, { "epoch": 2.67, "learning_rate": 5.505174653792778e-06, "loss": 0.5992, "step": 241105 }, { "epoch": 2.67, "learning_rate": 5.5042519266542655e-06, "loss": 0.5701, "step": 241110 }, { "epoch": 2.67, "learning_rate": 5.503329199515753e-06, "loss": 0.5956, "step": 241115 }, { "epoch": 2.67, "learning_rate": 5.50240647237724e-06, "loss": 0.5968, "step": 241120 }, { "epoch": 2.67, "learning_rate": 5.501483745238728e-06, "loss": 0.5902, "step": 241125 }, { "epoch": 2.67, "learning_rate": 5.500561018100216e-06, "loss": 0.6095, "step": 241130 }, { "epoch": 2.67, "learning_rate": 5.499638290961703e-06, "loss": 0.5647, "step": 241135 }, { "epoch": 2.67, "learning_rate": 5.498715563823191e-06, "loss": 0.588, "step": 241140 }, { "epoch": 2.67, "learning_rate": 5.497792836684679e-06, "loss": 0.5967, "step": 241145 }, { "epoch": 2.67, "learning_rate": 5.4968701095461666e-06, "loss": 0.6215, "step": 241150 }, { "epoch": 2.67, "learning_rate": 5.495947382407654e-06, "loss": 0.589, "step": 241155 }, { "epoch": 2.67, "learning_rate": 5.495024655269141e-06, "loss": 0.5951, "step": 241160 }, { "epoch": 2.67, "learning_rate": 5.494101928130628e-06, "loss": 0.5665, "step": 241165 }, { "epoch": 2.67, "learning_rate": 5.493179200992116e-06, "loss": 0.6056, "step": 241170 }, { "epoch": 2.67, "learning_rate": 5.492256473853604e-06, "loss": 0.5992, "step": 241175 }, { "epoch": 2.67, "learning_rate": 5.491333746715092e-06, "loss": 0.6209, "step": 241180 }, { "epoch": 2.67, "learning_rate": 5.49041101957658e-06, "loss": 0.624, "step": 241185 }, { "epoch": 2.67, "learning_rate": 5.489488292438067e-06, "loss": 0.5671, "step": 241190 }, { "epoch": 2.67, "learning_rate": 5.488565565299554e-06, "loss": 0.605, "step": 241195 }, { "epoch": 2.67, "learning_rate": 5.487642838161042e-06, "loss": 0.5856, "step": 241200 }, { "epoch": 2.67, "learning_rate": 5.4867201110225295e-06, "loss": 0.5449, "step": 241205 }, { "epoch": 2.67, "learning_rate": 5.485797383884017e-06, "loss": 0.6447, "step": 241210 }, { "epoch": 2.67, "learning_rate": 5.484874656745504e-06, "loss": 0.5468, "step": 241215 }, { "epoch": 2.67, "learning_rate": 5.483951929606992e-06, "loss": 0.5869, "step": 241220 }, { "epoch": 2.67, "learning_rate": 5.48302920246848e-06, "loss": 0.6122, "step": 241225 }, { "epoch": 2.67, "learning_rate": 5.482106475329968e-06, "loss": 0.5536, "step": 241230 }, { "epoch": 2.67, "learning_rate": 5.481183748191455e-06, "loss": 0.5217, "step": 241235 }, { "epoch": 2.67, "learning_rate": 5.480261021052943e-06, "loss": 0.5922, "step": 241240 }, { "epoch": 2.67, "learning_rate": 5.47933829391443e-06, "loss": 0.6073, "step": 241245 }, { "epoch": 2.67, "learning_rate": 5.478415566775918e-06, "loss": 0.5967, "step": 241250 }, { "epoch": 2.67, "learning_rate": 5.4774928396374055e-06, "loss": 0.5543, "step": 241255 }, { "epoch": 2.67, "learning_rate": 5.476570112498893e-06, "loss": 0.5675, "step": 241260 }, { "epoch": 2.67, "learning_rate": 5.475647385360381e-06, "loss": 0.581, "step": 241265 }, { "epoch": 2.67, "learning_rate": 5.474724658221868e-06, "loss": 0.5949, "step": 241270 }, { "epoch": 2.67, "learning_rate": 5.473801931083355e-06, "loss": 0.5743, "step": 241275 }, { "epoch": 2.67, "learning_rate": 5.472879203944843e-06, "loss": 0.5807, "step": 241280 }, { "epoch": 2.67, "learning_rate": 5.471956476806331e-06, "loss": 0.575, "step": 241285 }, { "epoch": 2.67, "learning_rate": 5.471033749667819e-06, "loss": 0.6006, "step": 241290 }, { "epoch": 2.67, "learning_rate": 5.4701110225293066e-06, "loss": 0.6095, "step": 241295 }, { "epoch": 2.67, "learning_rate": 5.4691882953907936e-06, "loss": 0.6013, "step": 241300 }, { "epoch": 2.67, "learning_rate": 5.468265568252281e-06, "loss": 0.6288, "step": 241305 }, { "epoch": 2.67, "learning_rate": 5.467342841113768e-06, "loss": 0.6052, "step": 241310 }, { "epoch": 2.67, "learning_rate": 5.466420113975256e-06, "loss": 0.5215, "step": 241315 }, { "epoch": 2.67, "learning_rate": 5.465497386836744e-06, "loss": 0.605, "step": 241320 }, { "epoch": 2.67, "learning_rate": 5.464574659698232e-06, "loss": 0.5793, "step": 241325 }, { "epoch": 2.67, "learning_rate": 5.463651932559719e-06, "loss": 0.5651, "step": 241330 }, { "epoch": 2.67, "learning_rate": 5.462729205421207e-06, "loss": 0.5673, "step": 241335 }, { "epoch": 2.67, "learning_rate": 5.461806478282695e-06, "loss": 0.5609, "step": 241340 }, { "epoch": 2.67, "learning_rate": 5.4608837511441825e-06, "loss": 0.6293, "step": 241345 }, { "epoch": 2.67, "learning_rate": 5.4599610240056695e-06, "loss": 0.6143, "step": 241350 }, { "epoch": 2.67, "learning_rate": 5.4590382968671565e-06, "loss": 0.5717, "step": 241355 }, { "epoch": 2.67, "learning_rate": 5.458115569728644e-06, "loss": 0.61, "step": 241360 }, { "epoch": 2.67, "learning_rate": 5.457192842590132e-06, "loss": 0.5414, "step": 241365 }, { "epoch": 2.67, "learning_rate": 5.45627011545162e-06, "loss": 0.5702, "step": 241370 }, { "epoch": 2.67, "learning_rate": 5.455347388313108e-06, "loss": 0.5833, "step": 241375 }, { "epoch": 2.67, "learning_rate": 5.454424661174595e-06, "loss": 0.5747, "step": 241380 }, { "epoch": 2.67, "learning_rate": 5.453501934036082e-06, "loss": 0.5618, "step": 241385 }, { "epoch": 2.67, "learning_rate": 5.45257920689757e-06, "loss": 0.5364, "step": 241390 }, { "epoch": 2.67, "learning_rate": 5.451656479759058e-06, "loss": 0.6366, "step": 241395 }, { "epoch": 2.67, "learning_rate": 5.4507337526205454e-06, "loss": 0.5319, "step": 241400 }, { "epoch": 2.67, "learning_rate": 5.449811025482033e-06, "loss": 0.6617, "step": 241405 }, { "epoch": 2.67, "learning_rate": 5.44888829834352e-06, "loss": 0.508, "step": 241410 }, { "epoch": 2.67, "learning_rate": 5.447965571205008e-06, "loss": 0.5336, "step": 241415 }, { "epoch": 2.67, "learning_rate": 5.447042844066496e-06, "loss": 0.6079, "step": 241420 }, { "epoch": 2.67, "learning_rate": 5.446120116927983e-06, "loss": 0.6395, "step": 241425 }, { "epoch": 2.67, "learning_rate": 5.445197389789471e-06, "loss": 0.6284, "step": 241430 }, { "epoch": 2.67, "learning_rate": 5.444274662650959e-06, "loss": 0.601, "step": 241435 }, { "epoch": 2.67, "learning_rate": 5.443351935512446e-06, "loss": 0.5793, "step": 241440 }, { "epoch": 2.67, "learning_rate": 5.4424292083739335e-06, "loss": 0.5609, "step": 241445 }, { "epoch": 2.67, "learning_rate": 5.441506481235421e-06, "loss": 0.6174, "step": 241450 }, { "epoch": 2.67, "learning_rate": 5.440583754096909e-06, "loss": 0.5913, "step": 241455 }, { "epoch": 2.67, "learning_rate": 5.439661026958396e-06, "loss": 0.543, "step": 241460 }, { "epoch": 2.67, "learning_rate": 5.438738299819884e-06, "loss": 0.6127, "step": 241465 }, { "epoch": 2.67, "learning_rate": 5.437815572681371e-06, "loss": 0.6384, "step": 241470 }, { "epoch": 2.67, "learning_rate": 5.436892845542859e-06, "loss": 0.5952, "step": 241475 }, { "epoch": 2.67, "learning_rate": 5.435970118404347e-06, "loss": 0.5995, "step": 241480 }, { "epoch": 2.67, "learning_rate": 5.435047391265835e-06, "loss": 0.5878, "step": 241485 }, { "epoch": 2.67, "learning_rate": 5.434124664127322e-06, "loss": 0.5966, "step": 241490 }, { "epoch": 2.67, "learning_rate": 5.4332019369888095e-06, "loss": 0.5448, "step": 241495 }, { "epoch": 2.67, "learning_rate": 5.4322792098502965e-06, "loss": 0.6056, "step": 241500 }, { "epoch": 2.67, "learning_rate": 5.431356482711784e-06, "loss": 0.5372, "step": 241505 }, { "epoch": 2.67, "learning_rate": 5.430433755573272e-06, "loss": 0.5647, "step": 241510 }, { "epoch": 2.67, "learning_rate": 5.42951102843476e-06, "loss": 0.599, "step": 241515 }, { "epoch": 2.67, "learning_rate": 5.428588301296247e-06, "loss": 0.6079, "step": 241520 }, { "epoch": 2.67, "learning_rate": 5.427665574157735e-06, "loss": 0.6165, "step": 241525 }, { "epoch": 2.67, "learning_rate": 5.426742847019223e-06, "loss": 0.5708, "step": 241530 }, { "epoch": 2.67, "learning_rate": 5.425820119880711e-06, "loss": 0.6113, "step": 241535 }, { "epoch": 2.67, "learning_rate": 5.424897392742198e-06, "loss": 0.5808, "step": 241540 }, { "epoch": 2.67, "learning_rate": 5.4239746656036854e-06, "loss": 0.6162, "step": 241545 }, { "epoch": 2.67, "learning_rate": 5.4230519384651724e-06, "loss": 0.6194, "step": 241550 }, { "epoch": 2.67, "learning_rate": 5.42212921132666e-06, "loss": 0.5943, "step": 241555 }, { "epoch": 2.67, "learning_rate": 5.421206484188148e-06, "loss": 0.5446, "step": 241560 }, { "epoch": 2.67, "learning_rate": 5.420283757049636e-06, "loss": 0.603, "step": 241565 }, { "epoch": 2.67, "learning_rate": 5.419361029911124e-06, "loss": 0.6001, "step": 241570 }, { "epoch": 2.67, "learning_rate": 5.418438302772611e-06, "loss": 0.6244, "step": 241575 }, { "epoch": 2.67, "learning_rate": 5.417515575634098e-06, "loss": 0.5939, "step": 241580 }, { "epoch": 2.68, "learning_rate": 5.416592848495586e-06, "loss": 0.6271, "step": 241585 }, { "epoch": 2.68, "learning_rate": 5.4156701213570735e-06, "loss": 0.5726, "step": 241590 }, { "epoch": 2.68, "learning_rate": 5.414747394218561e-06, "loss": 0.5965, "step": 241595 }, { "epoch": 2.68, "learning_rate": 5.413824667080049e-06, "loss": 0.596, "step": 241600 }, { "epoch": 2.68, "learning_rate": 5.412901939941536e-06, "loss": 0.5803, "step": 241605 }, { "epoch": 2.68, "learning_rate": 5.411979212803024e-06, "loss": 0.5967, "step": 241610 }, { "epoch": 2.68, "learning_rate": 5.411056485664511e-06, "loss": 0.5775, "step": 241615 }, { "epoch": 2.68, "learning_rate": 5.410133758525999e-06, "loss": 0.6056, "step": 241620 }, { "epoch": 2.68, "learning_rate": 5.409211031387487e-06, "loss": 0.6183, "step": 241625 }, { "epoch": 2.68, "learning_rate": 5.408288304248974e-06, "loss": 0.5809, "step": 241630 }, { "epoch": 2.68, "learning_rate": 5.407365577110462e-06, "loss": 0.5829, "step": 241635 }, { "epoch": 2.68, "learning_rate": 5.4064428499719495e-06, "loss": 0.6205, "step": 241640 }, { "epoch": 2.68, "learning_rate": 5.405520122833437e-06, "loss": 0.611, "step": 241645 }, { "epoch": 2.68, "learning_rate": 5.404597395694924e-06, "loss": 0.549, "step": 241650 }, { "epoch": 2.68, "learning_rate": 5.403674668556412e-06, "loss": 0.6186, "step": 241655 }, { "epoch": 2.68, "learning_rate": 5.402751941417899e-06, "loss": 0.5617, "step": 241660 }, { "epoch": 2.68, "learning_rate": 5.401829214279387e-06, "loss": 0.6392, "step": 241665 }, { "epoch": 2.68, "learning_rate": 5.400906487140875e-06, "loss": 0.5985, "step": 241670 }, { "epoch": 2.68, "learning_rate": 5.399983760002363e-06, "loss": 0.5536, "step": 241675 }, { "epoch": 2.68, "learning_rate": 5.3990610328638506e-06, "loss": 0.5464, "step": 241680 }, { "epoch": 2.68, "learning_rate": 5.3981383057253376e-06, "loss": 0.5391, "step": 241685 }, { "epoch": 2.68, "learning_rate": 5.3972155785868246e-06, "loss": 0.5361, "step": 241690 }, { "epoch": 2.68, "learning_rate": 5.3962928514483124e-06, "loss": 0.5565, "step": 241695 }, { "epoch": 2.68, "learning_rate": 5.3953701243098e-06, "loss": 0.6426, "step": 241700 }, { "epoch": 2.68, "learning_rate": 5.394447397171288e-06, "loss": 0.622, "step": 241705 }, { "epoch": 2.68, "learning_rate": 5.393524670032776e-06, "loss": 0.541, "step": 241710 }, { "epoch": 2.68, "learning_rate": 5.392601942894263e-06, "loss": 0.6138, "step": 241715 }, { "epoch": 2.68, "learning_rate": 5.391679215755751e-06, "loss": 0.6046, "step": 241720 }, { "epoch": 2.68, "learning_rate": 5.390756488617239e-06, "loss": 0.597, "step": 241725 }, { "epoch": 2.68, "learning_rate": 5.389833761478726e-06, "loss": 0.5797, "step": 241730 }, { "epoch": 2.68, "learning_rate": 5.3889110343402135e-06, "loss": 0.5684, "step": 241735 }, { "epoch": 2.68, "learning_rate": 5.387988307201701e-06, "loss": 0.5869, "step": 241740 }, { "epoch": 2.68, "learning_rate": 5.387065580063188e-06, "loss": 0.5822, "step": 241745 }, { "epoch": 2.68, "learning_rate": 5.386142852924676e-06, "loss": 0.5814, "step": 241750 }, { "epoch": 2.68, "learning_rate": 5.385220125786164e-06, "loss": 0.5709, "step": 241755 }, { "epoch": 2.68, "learning_rate": 5.384297398647652e-06, "loss": 0.5463, "step": 241760 }, { "epoch": 2.68, "learning_rate": 5.383374671509139e-06, "loss": 0.5878, "step": 241765 }, { "epoch": 2.68, "learning_rate": 5.382451944370626e-06, "loss": 0.6003, "step": 241770 }, { "epoch": 2.68, "learning_rate": 5.381529217232114e-06, "loss": 0.588, "step": 241775 }, { "epoch": 2.68, "learning_rate": 5.380606490093602e-06, "loss": 0.6093, "step": 241780 }, { "epoch": 2.68, "learning_rate": 5.3796837629550895e-06, "loss": 0.5498, "step": 241785 }, { "epoch": 2.68, "learning_rate": 5.378761035816577e-06, "loss": 0.5874, "step": 241790 }, { "epoch": 2.68, "learning_rate": 5.377838308678064e-06, "loss": 0.5973, "step": 241795 }, { "epoch": 2.68, "learning_rate": 5.376915581539552e-06, "loss": 0.5733, "step": 241800 }, { "epoch": 2.68, "learning_rate": 5.375992854401039e-06, "loss": 0.5593, "step": 241805 }, { "epoch": 2.68, "learning_rate": 5.375070127262527e-06, "loss": 0.5826, "step": 241810 }, { "epoch": 2.68, "learning_rate": 5.374147400124015e-06, "loss": 0.5715, "step": 241815 }, { "epoch": 2.68, "learning_rate": 5.373224672985503e-06, "loss": 0.516, "step": 241820 }, { "epoch": 2.68, "learning_rate": 5.37230194584699e-06, "loss": 0.5569, "step": 241825 }, { "epoch": 2.68, "learning_rate": 5.3713792187084776e-06, "loss": 0.5945, "step": 241830 }, { "epoch": 2.68, "learning_rate": 5.370456491569965e-06, "loss": 0.5873, "step": 241835 }, { "epoch": 2.68, "learning_rate": 5.369533764431452e-06, "loss": 0.5507, "step": 241840 }, { "epoch": 2.68, "learning_rate": 5.36861103729294e-06, "loss": 0.6332, "step": 241845 }, { "epoch": 2.68, "learning_rate": 5.367688310154428e-06, "loss": 0.6367, "step": 241850 }, { "epoch": 2.68, "learning_rate": 5.366765583015915e-06, "loss": 0.5362, "step": 241855 }, { "epoch": 2.68, "learning_rate": 5.365842855877403e-06, "loss": 0.5635, "step": 241860 }, { "epoch": 2.68, "learning_rate": 5.364920128738891e-06, "loss": 0.5615, "step": 241865 }, { "epoch": 2.68, "learning_rate": 5.363997401600379e-06, "loss": 0.6038, "step": 241870 }, { "epoch": 2.68, "learning_rate": 5.3630746744618665e-06, "loss": 0.5583, "step": 241875 }, { "epoch": 2.68, "learning_rate": 5.362151947323353e-06, "loss": 0.5599, "step": 241880 }, { "epoch": 2.68, "learning_rate": 5.3612292201848405e-06, "loss": 0.558, "step": 241885 }, { "epoch": 2.68, "learning_rate": 5.360306493046328e-06, "loss": 0.5731, "step": 241890 }, { "epoch": 2.68, "learning_rate": 5.359383765907816e-06, "loss": 0.6161, "step": 241895 }, { "epoch": 2.68, "learning_rate": 5.358461038769304e-06, "loss": 0.5776, "step": 241900 }, { "epoch": 2.68, "learning_rate": 5.357538311630791e-06, "loss": 0.5507, "step": 241905 }, { "epoch": 2.68, "learning_rate": 5.356615584492279e-06, "loss": 0.6111, "step": 241910 }, { "epoch": 2.68, "learning_rate": 5.355692857353767e-06, "loss": 0.5791, "step": 241915 }, { "epoch": 2.68, "learning_rate": 5.354770130215254e-06, "loss": 0.5868, "step": 241920 }, { "epoch": 2.68, "learning_rate": 5.353847403076742e-06, "loss": 0.6143, "step": 241925 }, { "epoch": 2.68, "learning_rate": 5.3529246759382295e-06, "loss": 0.517, "step": 241930 }, { "epoch": 2.68, "learning_rate": 5.3520019487997165e-06, "loss": 0.5649, "step": 241935 }, { "epoch": 2.68, "learning_rate": 5.351079221661204e-06, "loss": 0.6406, "step": 241940 }, { "epoch": 2.68, "learning_rate": 5.350156494522692e-06, "loss": 0.5698, "step": 241945 }, { "epoch": 2.68, "learning_rate": 5.34923376738418e-06, "loss": 0.6076, "step": 241950 }, { "epoch": 2.68, "learning_rate": 5.348311040245667e-06, "loss": 0.5409, "step": 241955 }, { "epoch": 2.68, "learning_rate": 5.347388313107155e-06, "loss": 0.5662, "step": 241960 }, { "epoch": 2.68, "learning_rate": 5.346465585968642e-06, "loss": 0.574, "step": 241965 }, { "epoch": 2.68, "learning_rate": 5.34554285883013e-06, "loss": 0.5226, "step": 241970 }, { "epoch": 2.68, "learning_rate": 5.3446201316916176e-06, "loss": 0.6152, "step": 241975 }, { "epoch": 2.68, "learning_rate": 5.343697404553105e-06, "loss": 0.5351, "step": 241980 }, { "epoch": 2.68, "learning_rate": 5.342774677414593e-06, "loss": 0.5763, "step": 241985 }, { "epoch": 2.68, "learning_rate": 5.34185195027608e-06, "loss": 0.6136, "step": 241990 }, { "epoch": 2.68, "learning_rate": 5.340929223137567e-06, "loss": 0.546, "step": 241995 }, { "epoch": 2.68, "learning_rate": 5.340006495999055e-06, "loss": 0.5419, "step": 242000 }, { "epoch": 2.68, "eval_loss": 0.5583019852638245, "eval_runtime": 69.8472, "eval_samples_per_second": 28.634, "eval_steps_per_second": 14.317, "step": 242000 }, { "epoch": 2.68, "learning_rate": 5.339083768860543e-06, "loss": 0.541, "step": 242005 }, { "epoch": 2.68, "learning_rate": 5.338161041722031e-06, "loss": 0.5927, "step": 242010 }, { "epoch": 2.68, "learning_rate": 5.337238314583519e-06, "loss": 0.5604, "step": 242015 }, { "epoch": 2.68, "learning_rate": 5.336315587445006e-06, "loss": 0.5678, "step": 242020 }, { "epoch": 2.68, "learning_rate": 5.3353928603064935e-06, "loss": 0.5245, "step": 242025 }, { "epoch": 2.68, "learning_rate": 5.3344701331679805e-06, "loss": 0.5803, "step": 242030 }, { "epoch": 2.68, "learning_rate": 5.333547406029468e-06, "loss": 0.6162, "step": 242035 }, { "epoch": 2.68, "learning_rate": 5.332624678890956e-06, "loss": 0.5512, "step": 242040 }, { "epoch": 2.68, "learning_rate": 5.331701951752443e-06, "loss": 0.5349, "step": 242045 }, { "epoch": 2.68, "learning_rate": 5.330779224613931e-06, "loss": 0.5801, "step": 242050 }, { "epoch": 2.68, "learning_rate": 5.329856497475419e-06, "loss": 0.6003, "step": 242055 }, { "epoch": 2.68, "learning_rate": 5.328933770336907e-06, "loss": 0.5487, "step": 242060 }, { "epoch": 2.68, "learning_rate": 5.328011043198395e-06, "loss": 0.5809, "step": 242065 }, { "epoch": 2.68, "learning_rate": 5.327088316059882e-06, "loss": 0.5726, "step": 242070 }, { "epoch": 2.68, "learning_rate": 5.326165588921369e-06, "loss": 0.5695, "step": 242075 }, { "epoch": 2.68, "learning_rate": 5.3252428617828564e-06, "loss": 0.5786, "step": 242080 }, { "epoch": 2.68, "learning_rate": 5.324320134644344e-06, "loss": 0.5354, "step": 242085 }, { "epoch": 2.68, "learning_rate": 5.323397407505832e-06, "loss": 0.5964, "step": 242090 }, { "epoch": 2.68, "learning_rate": 5.32247468036732e-06, "loss": 0.5639, "step": 242095 }, { "epoch": 2.68, "learning_rate": 5.321551953228807e-06, "loss": 0.5771, "step": 242100 }, { "epoch": 2.68, "learning_rate": 5.320629226090295e-06, "loss": 0.587, "step": 242105 }, { "epoch": 2.68, "learning_rate": 5.319706498951782e-06, "loss": 0.5222, "step": 242110 }, { "epoch": 2.68, "learning_rate": 5.31878377181327e-06, "loss": 0.6295, "step": 242115 }, { "epoch": 2.68, "learning_rate": 5.3178610446747575e-06, "loss": 0.6012, "step": 242120 }, { "epoch": 2.68, "learning_rate": 5.316938317536245e-06, "loss": 0.628, "step": 242125 }, { "epoch": 2.68, "learning_rate": 5.316015590397732e-06, "loss": 0.5871, "step": 242130 }, { "epoch": 2.68, "learning_rate": 5.31509286325922e-06, "loss": 0.5914, "step": 242135 }, { "epoch": 2.68, "learning_rate": 5.314170136120708e-06, "loss": 0.5743, "step": 242140 }, { "epoch": 2.68, "learning_rate": 5.313247408982195e-06, "loss": 0.5686, "step": 242145 }, { "epoch": 2.68, "learning_rate": 5.312324681843683e-06, "loss": 0.5854, "step": 242150 }, { "epoch": 2.68, "learning_rate": 5.31140195470517e-06, "loss": 0.5774, "step": 242155 }, { "epoch": 2.68, "learning_rate": 5.310479227566658e-06, "loss": 0.5716, "step": 242160 }, { "epoch": 2.68, "learning_rate": 5.309556500428146e-06, "loss": 0.5438, "step": 242165 }, { "epoch": 2.68, "learning_rate": 5.3086337732896335e-06, "loss": 0.538, "step": 242170 }, { "epoch": 2.68, "learning_rate": 5.307711046151121e-06, "loss": 0.5848, "step": 242175 }, { "epoch": 2.68, "learning_rate": 5.306788319012608e-06, "loss": 0.5992, "step": 242180 }, { "epoch": 2.68, "learning_rate": 5.305865591874095e-06, "loss": 0.5585, "step": 242185 }, { "epoch": 2.68, "learning_rate": 5.304942864735583e-06, "loss": 0.5874, "step": 242190 }, { "epoch": 2.68, "learning_rate": 5.304020137597071e-06, "loss": 0.5888, "step": 242195 }, { "epoch": 2.68, "learning_rate": 5.303097410458559e-06, "loss": 0.5463, "step": 242200 }, { "epoch": 2.68, "learning_rate": 5.302174683320047e-06, "loss": 0.5583, "step": 242205 }, { "epoch": 2.68, "learning_rate": 5.301251956181534e-06, "loss": 0.617, "step": 242210 }, { "epoch": 2.68, "learning_rate": 5.300329229043022e-06, "loss": 0.5618, "step": 242215 }, { "epoch": 2.68, "learning_rate": 5.299406501904509e-06, "loss": 0.5423, "step": 242220 }, { "epoch": 2.68, "learning_rate": 5.2984837747659964e-06, "loss": 0.5905, "step": 242225 }, { "epoch": 2.68, "learning_rate": 5.297561047627484e-06, "loss": 0.6147, "step": 242230 }, { "epoch": 2.68, "learning_rate": 5.296638320488972e-06, "loss": 0.5454, "step": 242235 }, { "epoch": 2.68, "learning_rate": 5.295715593350459e-06, "loss": 0.6115, "step": 242240 }, { "epoch": 2.68, "learning_rate": 5.294792866211947e-06, "loss": 0.6369, "step": 242245 }, { "epoch": 2.68, "learning_rate": 5.293870139073435e-06, "loss": 0.6221, "step": 242250 }, { "epoch": 2.68, "learning_rate": 5.292947411934923e-06, "loss": 0.5887, "step": 242255 }, { "epoch": 2.68, "learning_rate": 5.29202468479641e-06, "loss": 0.5742, "step": 242260 }, { "epoch": 2.68, "learning_rate": 5.2911019576578975e-06, "loss": 0.6195, "step": 242265 }, { "epoch": 2.68, "learning_rate": 5.2901792305193845e-06, "loss": 0.5678, "step": 242270 }, { "epoch": 2.68, "learning_rate": 5.289256503380872e-06, "loss": 0.5908, "step": 242275 }, { "epoch": 2.68, "learning_rate": 5.28833377624236e-06, "loss": 0.566, "step": 242280 }, { "epoch": 2.68, "learning_rate": 5.287411049103848e-06, "loss": 0.579, "step": 242285 }, { "epoch": 2.68, "learning_rate": 5.286488321965335e-06, "loss": 0.5542, "step": 242290 }, { "epoch": 2.68, "learning_rate": 5.285565594826822e-06, "loss": 0.6147, "step": 242295 }, { "epoch": 2.68, "learning_rate": 5.28464286768831e-06, "loss": 0.5375, "step": 242300 }, { "epoch": 2.68, "learning_rate": 5.283720140549798e-06, "loss": 0.6121, "step": 242305 }, { "epoch": 2.68, "learning_rate": 5.282797413411286e-06, "loss": 0.5681, "step": 242310 }, { "epoch": 2.68, "learning_rate": 5.2818746862727735e-06, "loss": 0.5908, "step": 242315 }, { "epoch": 2.68, "learning_rate": 5.2809519591342605e-06, "loss": 0.5717, "step": 242320 }, { "epoch": 2.68, "learning_rate": 5.280029231995748e-06, "loss": 0.6008, "step": 242325 }, { "epoch": 2.68, "learning_rate": 5.279106504857236e-06, "loss": 0.5655, "step": 242330 }, { "epoch": 2.68, "learning_rate": 5.278183777718723e-06, "loss": 0.5898, "step": 242335 }, { "epoch": 2.68, "learning_rate": 5.277261050580211e-06, "loss": 0.5468, "step": 242340 }, { "epoch": 2.68, "learning_rate": 5.276338323441699e-06, "loss": 0.6288, "step": 242345 }, { "epoch": 2.68, "learning_rate": 5.275415596303186e-06, "loss": 0.5667, "step": 242350 }, { "epoch": 2.68, "learning_rate": 5.274492869164674e-06, "loss": 0.5741, "step": 242355 }, { "epoch": 2.68, "learning_rate": 5.2735701420261616e-06, "loss": 0.5356, "step": 242360 }, { "epoch": 2.68, "learning_rate": 5.272647414887649e-06, "loss": 0.625, "step": 242365 }, { "epoch": 2.68, "learning_rate": 5.271724687749137e-06, "loss": 0.5548, "step": 242370 }, { "epoch": 2.68, "learning_rate": 5.270801960610624e-06, "loss": 0.5827, "step": 242375 }, { "epoch": 2.68, "learning_rate": 5.269879233472111e-06, "loss": 0.5791, "step": 242380 }, { "epoch": 2.68, "learning_rate": 5.268956506333599e-06, "loss": 0.6143, "step": 242385 }, { "epoch": 2.68, "learning_rate": 5.268033779195087e-06, "loss": 0.5435, "step": 242390 }, { "epoch": 2.68, "learning_rate": 5.267111052056575e-06, "loss": 0.5801, "step": 242395 }, { "epoch": 2.68, "learning_rate": 5.266188324918063e-06, "loss": 0.5732, "step": 242400 }, { "epoch": 2.68, "learning_rate": 5.26526559777955e-06, "loss": 0.5753, "step": 242405 }, { "epoch": 2.68, "learning_rate": 5.264342870641037e-06, "loss": 0.6317, "step": 242410 }, { "epoch": 2.68, "learning_rate": 5.2634201435025245e-06, "loss": 0.5365, "step": 242415 }, { "epoch": 2.68, "learning_rate": 5.262497416364012e-06, "loss": 0.5288, "step": 242420 }, { "epoch": 2.68, "learning_rate": 5.2615746892255e-06, "loss": 0.6007, "step": 242425 }, { "epoch": 2.68, "learning_rate": 5.260651962086987e-06, "loss": 0.5646, "step": 242430 }, { "epoch": 2.68, "learning_rate": 5.259729234948475e-06, "loss": 0.6065, "step": 242435 }, { "epoch": 2.68, "learning_rate": 5.258806507809963e-06, "loss": 0.5573, "step": 242440 }, { "epoch": 2.68, "learning_rate": 5.257883780671451e-06, "loss": 0.6203, "step": 242445 }, { "epoch": 2.68, "learning_rate": 5.256961053532938e-06, "loss": 0.6318, "step": 242450 }, { "epoch": 2.68, "learning_rate": 5.256038326394426e-06, "loss": 0.6132, "step": 242455 }, { "epoch": 2.68, "learning_rate": 5.255115599255913e-06, "loss": 0.578, "step": 242460 }, { "epoch": 2.68, "learning_rate": 5.2541928721174005e-06, "loss": 0.5805, "step": 242465 }, { "epoch": 2.68, "learning_rate": 5.253270144978888e-06, "loss": 0.562, "step": 242470 }, { "epoch": 2.68, "learning_rate": 5.252347417840376e-06, "loss": 0.5546, "step": 242475 }, { "epoch": 2.68, "learning_rate": 5.251424690701864e-06, "loss": 0.5727, "step": 242480 }, { "epoch": 2.68, "learning_rate": 5.250501963563351e-06, "loss": 0.6006, "step": 242485 }, { "epoch": 2.69, "learning_rate": 5.249579236424838e-06, "loss": 0.62, "step": 242490 }, { "epoch": 2.69, "learning_rate": 5.248656509286326e-06, "loss": 0.5506, "step": 242495 }, { "epoch": 2.69, "learning_rate": 5.247733782147814e-06, "loss": 0.601, "step": 242500 }, { "epoch": 2.69, "learning_rate": 5.2468110550093016e-06, "loss": 0.61, "step": 242505 }, { "epoch": 2.69, "learning_rate": 5.245888327870789e-06, "loss": 0.559, "step": 242510 }, { "epoch": 2.69, "learning_rate": 5.244965600732276e-06, "loss": 0.5894, "step": 242515 }, { "epoch": 2.69, "learning_rate": 5.244042873593764e-06, "loss": 0.5885, "step": 242520 }, { "epoch": 2.69, "learning_rate": 5.243120146455251e-06, "loss": 0.5674, "step": 242525 }, { "epoch": 2.69, "learning_rate": 5.242197419316739e-06, "loss": 0.5757, "step": 242530 }, { "epoch": 2.69, "learning_rate": 5.241274692178227e-06, "loss": 0.6112, "step": 242535 }, { "epoch": 2.69, "learning_rate": 5.240351965039715e-06, "loss": 0.5077, "step": 242540 }, { "epoch": 2.69, "learning_rate": 5.239429237901202e-06, "loss": 0.5865, "step": 242545 }, { "epoch": 2.69, "learning_rate": 5.23850651076269e-06, "loss": 0.5349, "step": 242550 }, { "epoch": 2.69, "learning_rate": 5.2375837836241775e-06, "loss": 0.5975, "step": 242555 }, { "epoch": 2.69, "learning_rate": 5.236661056485665e-06, "loss": 0.609, "step": 242560 }, { "epoch": 2.69, "learning_rate": 5.235738329347152e-06, "loss": 0.5501, "step": 242565 }, { "epoch": 2.69, "learning_rate": 5.234815602208639e-06, "loss": 0.5835, "step": 242570 }, { "epoch": 2.69, "learning_rate": 5.233892875070127e-06, "loss": 0.5643, "step": 242575 }, { "epoch": 2.69, "learning_rate": 5.232970147931615e-06, "loss": 0.5423, "step": 242580 }, { "epoch": 2.69, "learning_rate": 5.232047420793103e-06, "loss": 0.5986, "step": 242585 }, { "epoch": 2.69, "learning_rate": 5.231124693654591e-06, "loss": 0.5607, "step": 242590 }, { "epoch": 2.69, "learning_rate": 5.230201966516078e-06, "loss": 0.5882, "step": 242595 }, { "epoch": 2.69, "learning_rate": 5.229279239377565e-06, "loss": 0.581, "step": 242600 }, { "epoch": 2.69, "learning_rate": 5.228356512239053e-06, "loss": 0.5491, "step": 242605 }, { "epoch": 2.69, "learning_rate": 5.2274337851005405e-06, "loss": 0.5683, "step": 242610 }, { "epoch": 2.69, "learning_rate": 5.226511057962028e-06, "loss": 0.574, "step": 242615 }, { "epoch": 2.69, "learning_rate": 5.225588330823516e-06, "loss": 0.6327, "step": 242620 }, { "epoch": 2.69, "learning_rate": 5.224665603685003e-06, "loss": 0.5895, "step": 242625 }, { "epoch": 2.69, "learning_rate": 5.223742876546491e-06, "loss": 0.592, "step": 242630 }, { "epoch": 2.69, "learning_rate": 5.222820149407979e-06, "loss": 0.5403, "step": 242635 }, { "epoch": 2.69, "learning_rate": 5.221897422269466e-06, "loss": 0.6039, "step": 242640 }, { "epoch": 2.69, "learning_rate": 5.220974695130954e-06, "loss": 0.5818, "step": 242645 }, { "epoch": 2.69, "learning_rate": 5.2200519679924415e-06, "loss": 0.6471, "step": 242650 }, { "epoch": 2.69, "learning_rate": 5.2191292408539285e-06, "loss": 0.5838, "step": 242655 }, { "epoch": 2.69, "learning_rate": 5.218206513715416e-06, "loss": 0.6001, "step": 242660 }, { "epoch": 2.69, "learning_rate": 5.217283786576904e-06, "loss": 0.5444, "step": 242665 }, { "epoch": 2.69, "learning_rate": 5.216361059438392e-06, "loss": 0.5835, "step": 242670 }, { "epoch": 2.69, "learning_rate": 5.215438332299879e-06, "loss": 0.5865, "step": 242675 }, { "epoch": 2.69, "learning_rate": 5.214515605161367e-06, "loss": 0.5329, "step": 242680 }, { "epoch": 2.69, "learning_rate": 5.213592878022854e-06, "loss": 0.5672, "step": 242685 }, { "epoch": 2.69, "learning_rate": 5.212670150884342e-06, "loss": 0.6112, "step": 242690 }, { "epoch": 2.69, "learning_rate": 5.21174742374583e-06, "loss": 0.595, "step": 242695 }, { "epoch": 2.69, "learning_rate": 5.2108246966073175e-06, "loss": 0.5808, "step": 242700 }, { "epoch": 2.69, "learning_rate": 5.2099019694688045e-06, "loss": 0.5835, "step": 242705 }, { "epoch": 2.69, "learning_rate": 5.208979242330292e-06, "loss": 0.6145, "step": 242710 }, { "epoch": 2.69, "learning_rate": 5.208056515191779e-06, "loss": 0.5822, "step": 242715 }, { "epoch": 2.69, "learning_rate": 5.207133788053267e-06, "loss": 0.5997, "step": 242720 }, { "epoch": 2.69, "learning_rate": 5.206211060914755e-06, "loss": 0.5772, "step": 242725 }, { "epoch": 2.69, "learning_rate": 5.205288333776243e-06, "loss": 0.6003, "step": 242730 }, { "epoch": 2.69, "learning_rate": 5.20436560663773e-06, "loss": 0.5897, "step": 242735 }, { "epoch": 2.69, "learning_rate": 5.203442879499218e-06, "loss": 0.585, "step": 242740 }, { "epoch": 2.69, "learning_rate": 5.202520152360706e-06, "loss": 0.5722, "step": 242745 }, { "epoch": 2.69, "learning_rate": 5.2015974252221934e-06, "loss": 0.5881, "step": 242750 }, { "epoch": 2.69, "learning_rate": 5.2006746980836804e-06, "loss": 0.5573, "step": 242755 }, { "epoch": 2.69, "learning_rate": 5.199751970945168e-06, "loss": 0.599, "step": 242760 }, { "epoch": 2.69, "learning_rate": 5.198829243806655e-06, "loss": 0.6113, "step": 242765 }, { "epoch": 2.69, "learning_rate": 5.197906516668143e-06, "loss": 0.523, "step": 242770 }, { "epoch": 2.69, "learning_rate": 5.196983789529631e-06, "loss": 0.5961, "step": 242775 }, { "epoch": 2.69, "learning_rate": 5.196061062391119e-06, "loss": 0.6049, "step": 242780 }, { "epoch": 2.69, "learning_rate": 5.195138335252607e-06, "loss": 0.5615, "step": 242785 }, { "epoch": 2.69, "learning_rate": 5.194215608114094e-06, "loss": 0.6484, "step": 242790 }, { "epoch": 2.69, "learning_rate": 5.193292880975581e-06, "loss": 0.6139, "step": 242795 }, { "epoch": 2.69, "learning_rate": 5.1923701538370685e-06, "loss": 0.6023, "step": 242800 }, { "epoch": 2.69, "learning_rate": 5.191447426698556e-06, "loss": 0.5827, "step": 242805 }, { "epoch": 2.69, "learning_rate": 5.190524699560044e-06, "loss": 0.5778, "step": 242810 }, { "epoch": 2.69, "learning_rate": 5.189601972421532e-06, "loss": 0.5318, "step": 242815 }, { "epoch": 2.69, "learning_rate": 5.188679245283019e-06, "loss": 0.5894, "step": 242820 }, { "epoch": 2.69, "learning_rate": 5.187756518144507e-06, "loss": 0.5792, "step": 242825 }, { "epoch": 2.69, "learning_rate": 5.186833791005994e-06, "loss": 0.6091, "step": 242830 }, { "epoch": 2.69, "learning_rate": 5.185911063867482e-06, "loss": 0.5668, "step": 242835 }, { "epoch": 2.69, "learning_rate": 5.18498833672897e-06, "loss": 0.5621, "step": 242840 }, { "epoch": 2.69, "learning_rate": 5.184065609590457e-06, "loss": 0.5734, "step": 242845 }, { "epoch": 2.69, "learning_rate": 5.1831428824519445e-06, "loss": 0.6276, "step": 242850 }, { "epoch": 2.69, "learning_rate": 5.182220155313432e-06, "loss": 0.55, "step": 242855 }, { "epoch": 2.69, "learning_rate": 5.18129742817492e-06, "loss": 0.5294, "step": 242860 }, { "epoch": 2.69, "learning_rate": 5.180374701036407e-06, "loss": 0.6428, "step": 242865 }, { "epoch": 2.69, "learning_rate": 5.179451973897895e-06, "loss": 0.5695, "step": 242870 }, { "epoch": 2.69, "learning_rate": 5.178529246759382e-06, "loss": 0.593, "step": 242875 }, { "epoch": 2.69, "learning_rate": 5.17760651962087e-06, "loss": 0.5394, "step": 242880 }, { "epoch": 2.69, "learning_rate": 5.176683792482358e-06, "loss": 0.5695, "step": 242885 }, { "epoch": 2.69, "learning_rate": 5.175761065343846e-06, "loss": 0.6226, "step": 242890 }, { "epoch": 2.69, "learning_rate": 5.1748383382053334e-06, "loss": 0.573, "step": 242895 }, { "epoch": 2.69, "learning_rate": 5.1739156110668204e-06, "loss": 0.5579, "step": 242900 }, { "epoch": 2.69, "learning_rate": 5.1729928839283074e-06, "loss": 0.6059, "step": 242905 }, { "epoch": 2.69, "learning_rate": 5.172070156789795e-06, "loss": 0.578, "step": 242910 }, { "epoch": 2.69, "learning_rate": 5.171147429651283e-06, "loss": 0.5745, "step": 242915 }, { "epoch": 2.69, "learning_rate": 5.170224702512771e-06, "loss": 0.571, "step": 242920 }, { "epoch": 2.69, "learning_rate": 5.169301975374259e-06, "loss": 0.5071, "step": 242925 }, { "epoch": 2.69, "learning_rate": 5.168379248235746e-06, "loss": 0.6032, "step": 242930 }, { "epoch": 2.69, "learning_rate": 5.167456521097234e-06, "loss": 0.5833, "step": 242935 }, { "epoch": 2.69, "learning_rate": 5.166533793958721e-06, "loss": 0.6552, "step": 242940 }, { "epoch": 2.69, "learning_rate": 5.1656110668202085e-06, "loss": 0.5492, "step": 242945 }, { "epoch": 2.69, "learning_rate": 5.164688339681696e-06, "loss": 0.5751, "step": 242950 }, { "epoch": 2.69, "learning_rate": 5.163765612543184e-06, "loss": 0.5561, "step": 242955 }, { "epoch": 2.69, "learning_rate": 5.162842885404671e-06, "loss": 0.5211, "step": 242960 }, { "epoch": 2.69, "learning_rate": 5.161920158266159e-06, "loss": 0.6193, "step": 242965 }, { "epoch": 2.69, "learning_rate": 5.160997431127647e-06, "loss": 0.5394, "step": 242970 }, { "epoch": 2.69, "learning_rate": 5.160074703989135e-06, "loss": 0.638, "step": 242975 }, { "epoch": 2.69, "learning_rate": 5.159151976850622e-06, "loss": 0.5788, "step": 242980 }, { "epoch": 2.69, "learning_rate": 5.158229249712109e-06, "loss": 0.5742, "step": 242985 }, { "epoch": 2.69, "learning_rate": 5.157306522573597e-06, "loss": 0.5776, "step": 242990 }, { "epoch": 2.69, "learning_rate": 5.1563837954350845e-06, "loss": 0.6424, "step": 242995 }, { "epoch": 2.69, "learning_rate": 5.155461068296572e-06, "loss": 0.5685, "step": 243000 }, { "epoch": 2.69, "eval_loss": 0.5635286569595337, "eval_runtime": 69.7486, "eval_samples_per_second": 28.674, "eval_steps_per_second": 14.337, "step": 243000 }, { "epoch": 2.69, "learning_rate": 5.15453834115806e-06, "loss": 0.5844, "step": 243005 }, { "epoch": 2.69, "learning_rate": 5.153615614019547e-06, "loss": 0.5781, "step": 243010 }, { "epoch": 2.69, "learning_rate": 5.152692886881035e-06, "loss": 0.6146, "step": 243015 }, { "epoch": 2.69, "learning_rate": 5.151770159742522e-06, "loss": 0.6016, "step": 243020 }, { "epoch": 2.69, "learning_rate": 5.15084743260401e-06, "loss": 0.608, "step": 243025 }, { "epoch": 2.69, "learning_rate": 5.149924705465498e-06, "loss": 0.5765, "step": 243030 }, { "epoch": 2.69, "learning_rate": 5.1490019783269856e-06, "loss": 0.5735, "step": 243035 }, { "epoch": 2.69, "learning_rate": 5.1480792511884726e-06, "loss": 0.5466, "step": 243040 }, { "epoch": 2.69, "learning_rate": 5.14715652404996e-06, "loss": 0.5915, "step": 243045 }, { "epoch": 2.69, "learning_rate": 5.146233796911448e-06, "loss": 0.5851, "step": 243050 }, { "epoch": 2.69, "learning_rate": 5.145311069772935e-06, "loss": 0.5571, "step": 243055 }, { "epoch": 2.69, "learning_rate": 5.144388342634423e-06, "loss": 0.5748, "step": 243060 }, { "epoch": 2.69, "learning_rate": 5.143465615495911e-06, "loss": 0.514, "step": 243065 }, { "epoch": 2.69, "learning_rate": 5.142542888357398e-06, "loss": 0.5634, "step": 243070 }, { "epoch": 2.69, "learning_rate": 5.141620161218886e-06, "loss": 0.5685, "step": 243075 }, { "epoch": 2.69, "learning_rate": 5.140697434080374e-06, "loss": 0.5699, "step": 243080 }, { "epoch": 2.69, "learning_rate": 5.1397747069418615e-06, "loss": 0.6242, "step": 243085 }, { "epoch": 2.69, "learning_rate": 5.138851979803349e-06, "loss": 0.5739, "step": 243090 }, { "epoch": 2.69, "learning_rate": 5.1379292526648355e-06, "loss": 0.5633, "step": 243095 }, { "epoch": 2.69, "learning_rate": 5.137006525526323e-06, "loss": 0.5478, "step": 243100 }, { "epoch": 2.69, "learning_rate": 5.136083798387811e-06, "loss": 0.5718, "step": 243105 }, { "epoch": 2.69, "learning_rate": 5.135161071249299e-06, "loss": 0.537, "step": 243110 }, { "epoch": 2.69, "learning_rate": 5.134238344110787e-06, "loss": 0.6059, "step": 243115 }, { "epoch": 2.69, "learning_rate": 5.133315616972274e-06, "loss": 0.5425, "step": 243120 }, { "epoch": 2.69, "learning_rate": 5.132392889833762e-06, "loss": 0.5396, "step": 243125 }, { "epoch": 2.69, "learning_rate": 5.131470162695249e-06, "loss": 0.579, "step": 243130 }, { "epoch": 2.69, "learning_rate": 5.130547435556737e-06, "loss": 0.5054, "step": 243135 }, { "epoch": 2.69, "learning_rate": 5.1296247084182245e-06, "loss": 0.5708, "step": 243140 }, { "epoch": 2.69, "learning_rate": 5.128701981279712e-06, "loss": 0.5932, "step": 243145 }, { "epoch": 2.69, "learning_rate": 5.127779254141199e-06, "loss": 0.5788, "step": 243150 }, { "epoch": 2.69, "learning_rate": 5.126856527002687e-06, "loss": 0.6175, "step": 243155 }, { "epoch": 2.69, "learning_rate": 5.125933799864175e-06, "loss": 0.5528, "step": 243160 }, { "epoch": 2.69, "learning_rate": 5.125011072725663e-06, "loss": 0.6286, "step": 243165 }, { "epoch": 2.69, "learning_rate": 5.12408834558715e-06, "loss": 0.556, "step": 243170 }, { "epoch": 2.69, "learning_rate": 5.123165618448638e-06, "loss": 0.6494, "step": 243175 }, { "epoch": 2.69, "learning_rate": 5.122242891310125e-06, "loss": 0.5823, "step": 243180 }, { "epoch": 2.69, "learning_rate": 5.1213201641716126e-06, "loss": 0.5441, "step": 243185 }, { "epoch": 2.69, "learning_rate": 5.1203974370331e-06, "loss": 0.5961, "step": 243190 }, { "epoch": 2.69, "learning_rate": 5.119474709894588e-06, "loss": 0.5259, "step": 243195 }, { "epoch": 2.69, "learning_rate": 5.118551982756076e-06, "loss": 0.5198, "step": 243200 }, { "epoch": 2.69, "learning_rate": 5.117629255617563e-06, "loss": 0.6052, "step": 243205 }, { "epoch": 2.69, "learning_rate": 5.11670652847905e-06, "loss": 0.6083, "step": 243210 }, { "epoch": 2.69, "learning_rate": 5.115783801340538e-06, "loss": 0.5583, "step": 243215 }, { "epoch": 2.69, "learning_rate": 5.114861074202026e-06, "loss": 0.5425, "step": 243220 }, { "epoch": 2.69, "learning_rate": 5.113938347063514e-06, "loss": 0.5879, "step": 243225 }, { "epoch": 2.69, "learning_rate": 5.1130156199250015e-06, "loss": 0.6004, "step": 243230 }, { "epoch": 2.69, "learning_rate": 5.1120928927864885e-06, "loss": 0.6241, "step": 243235 }, { "epoch": 2.69, "learning_rate": 5.111170165647976e-06, "loss": 0.5871, "step": 243240 }, { "epoch": 2.69, "learning_rate": 5.110247438509463e-06, "loss": 0.5974, "step": 243245 }, { "epoch": 2.69, "learning_rate": 5.109324711370951e-06, "loss": 0.5562, "step": 243250 }, { "epoch": 2.69, "learning_rate": 5.108401984232439e-06, "loss": 0.5807, "step": 243255 }, { "epoch": 2.69, "learning_rate": 5.107479257093926e-06, "loss": 0.5657, "step": 243260 }, { "epoch": 2.69, "learning_rate": 5.106556529955414e-06, "loss": 0.5998, "step": 243265 }, { "epoch": 2.69, "learning_rate": 5.105633802816902e-06, "loss": 0.6095, "step": 243270 }, { "epoch": 2.69, "learning_rate": 5.10471107567839e-06, "loss": 0.5657, "step": 243275 }, { "epoch": 2.69, "learning_rate": 5.1037883485398774e-06, "loss": 0.552, "step": 243280 }, { "epoch": 2.69, "learning_rate": 5.1028656214013644e-06, "loss": 0.564, "step": 243285 }, { "epoch": 2.69, "learning_rate": 5.1019428942628514e-06, "loss": 0.6135, "step": 243290 }, { "epoch": 2.69, "learning_rate": 5.101020167124339e-06, "loss": 0.6107, "step": 243295 }, { "epoch": 2.69, "learning_rate": 5.100097439985827e-06, "loss": 0.6164, "step": 243300 }, { "epoch": 2.69, "learning_rate": 5.099174712847315e-06, "loss": 0.5386, "step": 243305 }, { "epoch": 2.69, "learning_rate": 5.098251985708803e-06, "loss": 0.5712, "step": 243310 }, { "epoch": 2.69, "learning_rate": 5.09732925857029e-06, "loss": 0.5643, "step": 243315 }, { "epoch": 2.69, "learning_rate": 5.096406531431777e-06, "loss": 0.5962, "step": 243320 }, { "epoch": 2.69, "learning_rate": 5.095483804293265e-06, "loss": 0.5517, "step": 243325 }, { "epoch": 2.69, "learning_rate": 5.0945610771547525e-06, "loss": 0.57, "step": 243330 }, { "epoch": 2.69, "learning_rate": 5.09363835001624e-06, "loss": 0.5391, "step": 243335 }, { "epoch": 2.69, "learning_rate": 5.092715622877728e-06, "loss": 0.611, "step": 243340 }, { "epoch": 2.69, "learning_rate": 5.091792895739215e-06, "loss": 0.6046, "step": 243345 }, { "epoch": 2.69, "learning_rate": 5.090870168600703e-06, "loss": 0.5597, "step": 243350 }, { "epoch": 2.69, "learning_rate": 5.089947441462191e-06, "loss": 0.5854, "step": 243355 }, { "epoch": 2.69, "learning_rate": 5.089024714323678e-06, "loss": 0.5959, "step": 243360 }, { "epoch": 2.69, "learning_rate": 5.088101987185166e-06, "loss": 0.6227, "step": 243365 }, { "epoch": 2.69, "learning_rate": 5.087179260046653e-06, "loss": 0.5406, "step": 243370 }, { "epoch": 2.69, "learning_rate": 5.086256532908141e-06, "loss": 0.5718, "step": 243375 }, { "epoch": 2.69, "learning_rate": 5.0853338057696285e-06, "loss": 0.5808, "step": 243380 }, { "epoch": 2.69, "learning_rate": 5.084411078631116e-06, "loss": 0.5845, "step": 243385 }, { "epoch": 2.69, "learning_rate": 5.083488351492604e-06, "loss": 0.566, "step": 243390 }, { "epoch": 2.7, "learning_rate": 5.082565624354091e-06, "loss": 0.62, "step": 243395 }, { "epoch": 2.7, "learning_rate": 5.081642897215578e-06, "loss": 0.5591, "step": 243400 }, { "epoch": 2.7, "learning_rate": 5.080720170077066e-06, "loss": 0.607, "step": 243405 }, { "epoch": 2.7, "learning_rate": 5.079797442938554e-06, "loss": 0.5835, "step": 243410 }, { "epoch": 2.7, "learning_rate": 5.078874715800042e-06, "loss": 0.5819, "step": 243415 }, { "epoch": 2.7, "learning_rate": 5.07795198866153e-06, "loss": 0.5719, "step": 243420 }, { "epoch": 2.7, "learning_rate": 5.077029261523017e-06, "loss": 0.5669, "step": 243425 }, { "epoch": 2.7, "learning_rate": 5.0761065343845044e-06, "loss": 0.5864, "step": 243430 }, { "epoch": 2.7, "learning_rate": 5.0751838072459914e-06, "loss": 0.5452, "step": 243435 }, { "epoch": 2.7, "learning_rate": 5.074261080107479e-06, "loss": 0.5332, "step": 243440 }, { "epoch": 2.7, "learning_rate": 5.073338352968967e-06, "loss": 0.5702, "step": 243445 }, { "epoch": 2.7, "learning_rate": 5.072415625830455e-06, "loss": 0.5771, "step": 243450 }, { "epoch": 2.7, "learning_rate": 5.071492898691942e-06, "loss": 0.6047, "step": 243455 }, { "epoch": 2.7, "learning_rate": 5.07057017155343e-06, "loss": 0.5686, "step": 243460 }, { "epoch": 2.7, "learning_rate": 5.069647444414918e-06, "loss": 0.5889, "step": 243465 }, { "epoch": 2.7, "learning_rate": 5.0687247172764055e-06, "loss": 0.5733, "step": 243470 }, { "epoch": 2.7, "learning_rate": 5.0678019901378925e-06, "loss": 0.6168, "step": 243475 }, { "epoch": 2.7, "learning_rate": 5.06687926299938e-06, "loss": 0.5744, "step": 243480 }, { "epoch": 2.7, "learning_rate": 5.065956535860867e-06, "loss": 0.549, "step": 243485 }, { "epoch": 2.7, "learning_rate": 5.065033808722355e-06, "loss": 0.5486, "step": 243490 }, { "epoch": 2.7, "learning_rate": 5.064111081583843e-06, "loss": 0.5762, "step": 243495 }, { "epoch": 2.7, "learning_rate": 5.063188354445331e-06, "loss": 0.6122, "step": 243500 }, { "epoch": 2.7, "learning_rate": 5.062265627306819e-06, "loss": 0.5947, "step": 243505 }, { "epoch": 2.7, "learning_rate": 5.061342900168305e-06, "loss": 0.5494, "step": 243510 }, { "epoch": 2.7, "learning_rate": 5.060420173029793e-06, "loss": 0.5501, "step": 243515 }, { "epoch": 2.7, "learning_rate": 5.059497445891281e-06, "loss": 0.5843, "step": 243520 }, { "epoch": 2.7, "learning_rate": 5.0585747187527685e-06, "loss": 0.5861, "step": 243525 }, { "epoch": 2.7, "learning_rate": 5.057651991614256e-06, "loss": 0.5198, "step": 243530 }, { "epoch": 2.7, "learning_rate": 5.056729264475743e-06, "loss": 0.597, "step": 243535 }, { "epoch": 2.7, "learning_rate": 5.055806537337231e-06, "loss": 0.587, "step": 243540 }, { "epoch": 2.7, "learning_rate": 5.054883810198719e-06, "loss": 0.5792, "step": 243545 }, { "epoch": 2.7, "learning_rate": 5.053961083060206e-06, "loss": 0.6084, "step": 243550 }, { "epoch": 2.7, "learning_rate": 5.053038355921694e-06, "loss": 0.6226, "step": 243555 }, { "epoch": 2.7, "learning_rate": 5.052115628783182e-06, "loss": 0.6112, "step": 243560 }, { "epoch": 2.7, "learning_rate": 5.051192901644669e-06, "loss": 0.5726, "step": 243565 }, { "epoch": 2.7, "learning_rate": 5.0502701745061566e-06, "loss": 0.5911, "step": 243570 }, { "epoch": 2.7, "learning_rate": 5.0493474473676444e-06, "loss": 0.5158, "step": 243575 }, { "epoch": 2.7, "learning_rate": 5.048424720229132e-06, "loss": 0.5322, "step": 243580 }, { "epoch": 2.7, "learning_rate": 5.04750199309062e-06, "loss": 0.6001, "step": 243585 }, { "epoch": 2.7, "learning_rate": 5.046579265952107e-06, "loss": 0.585, "step": 243590 }, { "epoch": 2.7, "learning_rate": 5.045656538813594e-06, "loss": 0.6012, "step": 243595 }, { "epoch": 2.7, "learning_rate": 5.044733811675082e-06, "loss": 0.5628, "step": 243600 }, { "epoch": 2.7, "learning_rate": 5.04381108453657e-06, "loss": 0.6101, "step": 243605 }, { "epoch": 2.7, "learning_rate": 5.042888357398058e-06, "loss": 0.5271, "step": 243610 }, { "epoch": 2.7, "learning_rate": 5.0419656302595455e-06, "loss": 0.5573, "step": 243615 }, { "epoch": 2.7, "learning_rate": 5.0410429031210325e-06, "loss": 0.5797, "step": 243620 }, { "epoch": 2.7, "learning_rate": 5.0401201759825195e-06, "loss": 0.5965, "step": 243625 }, { "epoch": 2.7, "learning_rate": 5.039197448844007e-06, "loss": 0.5592, "step": 243630 }, { "epoch": 2.7, "learning_rate": 5.038274721705495e-06, "loss": 0.5562, "step": 243635 }, { "epoch": 2.7, "learning_rate": 5.037351994566983e-06, "loss": 0.569, "step": 243640 }, { "epoch": 2.7, "learning_rate": 5.03642926742847e-06, "loss": 0.5689, "step": 243645 }, { "epoch": 2.7, "learning_rate": 5.035506540289958e-06, "loss": 0.5632, "step": 243650 }, { "epoch": 2.7, "learning_rate": 5.034583813151446e-06, "loss": 0.5876, "step": 243655 }, { "epoch": 2.7, "learning_rate": 5.033661086012934e-06, "loss": 0.6201, "step": 243660 }, { "epoch": 2.7, "learning_rate": 5.032738358874421e-06, "loss": 0.5598, "step": 243665 }, { "epoch": 2.7, "learning_rate": 5.0318156317359085e-06, "loss": 0.5591, "step": 243670 }, { "epoch": 2.7, "learning_rate": 5.0308929045973955e-06, "loss": 0.5682, "step": 243675 }, { "epoch": 2.7, "learning_rate": 5.029970177458883e-06, "loss": 0.5571, "step": 243680 }, { "epoch": 2.7, "learning_rate": 5.029047450320371e-06, "loss": 0.5166, "step": 243685 }, { "epoch": 2.7, "learning_rate": 5.028124723181859e-06, "loss": 0.5837, "step": 243690 }, { "epoch": 2.7, "learning_rate": 5.027201996043347e-06, "loss": 0.5901, "step": 243695 }, { "epoch": 2.7, "learning_rate": 5.026279268904834e-06, "loss": 0.5651, "step": 243700 }, { "epoch": 2.7, "learning_rate": 5.025356541766321e-06, "loss": 0.5679, "step": 243705 }, { "epoch": 2.7, "learning_rate": 5.024433814627809e-06, "loss": 0.5651, "step": 243710 }, { "epoch": 2.7, "learning_rate": 5.0235110874892966e-06, "loss": 0.6405, "step": 243715 }, { "epoch": 2.7, "learning_rate": 5.022588360350784e-06, "loss": 0.528, "step": 243720 }, { "epoch": 2.7, "learning_rate": 5.021665633212272e-06, "loss": 0.568, "step": 243725 }, { "epoch": 2.7, "learning_rate": 5.020742906073759e-06, "loss": 0.577, "step": 243730 }, { "epoch": 2.7, "learning_rate": 5.019820178935247e-06, "loss": 0.5633, "step": 243735 }, { "epoch": 2.7, "learning_rate": 5.018897451796734e-06, "loss": 0.5445, "step": 243740 }, { "epoch": 2.7, "learning_rate": 5.017974724658222e-06, "loss": 0.5553, "step": 243745 }, { "epoch": 2.7, "learning_rate": 5.01705199751971e-06, "loss": 0.6102, "step": 243750 }, { "epoch": 2.7, "learning_rate": 5.016129270381198e-06, "loss": 0.5486, "step": 243755 }, { "epoch": 2.7, "learning_rate": 5.015206543242685e-06, "loss": 0.5342, "step": 243760 }, { "epoch": 2.7, "learning_rate": 5.0142838161041725e-06, "loss": 0.5826, "step": 243765 }, { "epoch": 2.7, "learning_rate": 5.01336108896566e-06, "loss": 0.5974, "step": 243770 }, { "epoch": 2.7, "learning_rate": 5.012438361827147e-06, "loss": 0.5483, "step": 243775 }, { "epoch": 2.7, "learning_rate": 5.011515634688635e-06, "loss": 0.5429, "step": 243780 }, { "epoch": 2.7, "learning_rate": 5.010592907550122e-06, "loss": 0.5813, "step": 243785 }, { "epoch": 2.7, "learning_rate": 5.00967018041161e-06, "loss": 0.6027, "step": 243790 }, { "epoch": 2.7, "learning_rate": 5.008747453273098e-06, "loss": 0.5751, "step": 243795 }, { "epoch": 2.7, "learning_rate": 5.007824726134586e-06, "loss": 0.564, "step": 243800 }, { "epoch": 2.7, "learning_rate": 5.006901998996074e-06, "loss": 0.5702, "step": 243805 }, { "epoch": 2.7, "learning_rate": 5.005979271857561e-06, "loss": 0.6264, "step": 243810 }, { "epoch": 2.7, "learning_rate": 5.005056544719048e-06, "loss": 0.536, "step": 243815 }, { "epoch": 2.7, "learning_rate": 5.0041338175805355e-06, "loss": 0.5226, "step": 243820 }, { "epoch": 2.7, "learning_rate": 5.003211090442023e-06, "loss": 0.5795, "step": 243825 }, { "epoch": 2.7, "learning_rate": 5.002288363303511e-06, "loss": 0.6105, "step": 243830 }, { "epoch": 2.7, "learning_rate": 5.001365636164999e-06, "loss": 0.6057, "step": 243835 }, { "epoch": 2.7, "learning_rate": 5.000442909026486e-06, "loss": 0.563, "step": 243840 }, { "epoch": 2.7, "learning_rate": 4.999520181887974e-06, "loss": 0.5976, "step": 243845 }, { "epoch": 2.7, "learning_rate": 4.998597454749462e-06, "loss": 0.5851, "step": 243850 }, { "epoch": 2.7, "learning_rate": 4.997674727610949e-06, "loss": 0.5603, "step": 243855 }, { "epoch": 2.7, "learning_rate": 4.9967520004724366e-06, "loss": 0.5859, "step": 243860 }, { "epoch": 2.7, "learning_rate": 4.995829273333924e-06, "loss": 0.556, "step": 243865 }, { "epoch": 2.7, "learning_rate": 4.994906546195411e-06, "loss": 0.5896, "step": 243870 }, { "epoch": 2.7, "learning_rate": 4.993983819056899e-06, "loss": 0.5839, "step": 243875 }, { "epoch": 2.7, "learning_rate": 4.993061091918387e-06, "loss": 0.5667, "step": 243880 }, { "epoch": 2.7, "learning_rate": 4.992138364779875e-06, "loss": 0.5579, "step": 243885 }, { "epoch": 2.7, "learning_rate": 4.991215637641362e-06, "loss": 0.571, "step": 243890 }, { "epoch": 2.7, "learning_rate": 4.99029291050285e-06, "loss": 0.6216, "step": 243895 }, { "epoch": 2.7, "learning_rate": 4.989370183364337e-06, "loss": 0.591, "step": 243900 }, { "epoch": 2.7, "learning_rate": 4.988447456225825e-06, "loss": 0.6102, "step": 243905 }, { "epoch": 2.7, "learning_rate": 4.9875247290873125e-06, "loss": 0.5765, "step": 243910 }, { "epoch": 2.7, "learning_rate": 4.9866020019488e-06, "loss": 0.5755, "step": 243915 }, { "epoch": 2.7, "learning_rate": 4.985679274810287e-06, "loss": 0.6039, "step": 243920 }, { "epoch": 2.7, "learning_rate": 4.984756547671775e-06, "loss": 0.5739, "step": 243925 }, { "epoch": 2.7, "learning_rate": 4.983833820533262e-06, "loss": 0.5307, "step": 243930 }, { "epoch": 2.7, "learning_rate": 4.98291109339475e-06, "loss": 0.5861, "step": 243935 }, { "epoch": 2.7, "learning_rate": 4.981988366256238e-06, "loss": 0.5826, "step": 243940 }, { "epoch": 2.7, "learning_rate": 4.981065639117726e-06, "loss": 0.5786, "step": 243945 }, { "epoch": 2.7, "learning_rate": 4.980142911979213e-06, "loss": 0.6001, "step": 243950 }, { "epoch": 2.7, "learning_rate": 4.979220184840701e-06, "loss": 0.6021, "step": 243955 }, { "epoch": 2.7, "learning_rate": 4.9782974577021884e-06, "loss": 0.628, "step": 243960 }, { "epoch": 2.7, "learning_rate": 4.9773747305636754e-06, "loss": 0.6143, "step": 243965 }, { "epoch": 2.7, "learning_rate": 4.976452003425163e-06, "loss": 0.5334, "step": 243970 }, { "epoch": 2.7, "learning_rate": 4.975529276286651e-06, "loss": 0.6755, "step": 243975 }, { "epoch": 2.7, "learning_rate": 4.974606549148138e-06, "loss": 0.5932, "step": 243980 }, { "epoch": 2.7, "learning_rate": 4.973683822009626e-06, "loss": 0.5975, "step": 243985 }, { "epoch": 2.7, "learning_rate": 4.972761094871114e-06, "loss": 0.5997, "step": 243990 }, { "epoch": 2.7, "learning_rate": 4.971838367732602e-06, "loss": 0.5824, "step": 243995 }, { "epoch": 2.7, "learning_rate": 4.9709156405940895e-06, "loss": 0.5267, "step": 244000 }, { "epoch": 2.7, "eval_loss": 0.5480848550796509, "eval_runtime": 70.1084, "eval_samples_per_second": 28.527, "eval_steps_per_second": 14.264, "step": 244000 }, { "epoch": 2.7, "learning_rate": 4.9699929134555765e-06, "loss": 0.5826, "step": 244005 }, { "epoch": 2.7, "learning_rate": 4.9690701863170635e-06, "loss": 0.6233, "step": 244010 }, { "epoch": 2.7, "learning_rate": 4.968147459178551e-06, "loss": 0.5932, "step": 244015 }, { "epoch": 2.7, "learning_rate": 4.967224732040039e-06, "loss": 0.6358, "step": 244020 }, { "epoch": 2.7, "learning_rate": 4.966302004901527e-06, "loss": 0.5772, "step": 244025 }, { "epoch": 2.7, "learning_rate": 4.965379277763015e-06, "loss": 0.5617, "step": 244030 }, { "epoch": 2.7, "learning_rate": 4.964456550624502e-06, "loss": 0.5056, "step": 244035 }, { "epoch": 2.7, "learning_rate": 4.96353382348599e-06, "loss": 0.6119, "step": 244040 }, { "epoch": 2.7, "learning_rate": 4.962611096347477e-06, "loss": 0.5529, "step": 244045 }, { "epoch": 2.7, "learning_rate": 4.961688369208965e-06, "loss": 0.5977, "step": 244050 }, { "epoch": 2.7, "learning_rate": 4.9607656420704525e-06, "loss": 0.5786, "step": 244055 }, { "epoch": 2.7, "learning_rate": 4.9598429149319395e-06, "loss": 0.5528, "step": 244060 }, { "epoch": 2.7, "learning_rate": 4.958920187793427e-06, "loss": 0.576, "step": 244065 }, { "epoch": 2.7, "learning_rate": 4.957997460654915e-06, "loss": 0.5796, "step": 244070 }, { "epoch": 2.7, "learning_rate": 4.957074733516403e-06, "loss": 0.5943, "step": 244075 }, { "epoch": 2.7, "learning_rate": 4.95615200637789e-06, "loss": 0.552, "step": 244080 }, { "epoch": 2.7, "learning_rate": 4.955229279239378e-06, "loss": 0.5255, "step": 244085 }, { "epoch": 2.7, "learning_rate": 4.954306552100865e-06, "loss": 0.6086, "step": 244090 }, { "epoch": 2.7, "learning_rate": 4.953383824962353e-06, "loss": 0.5332, "step": 244095 }, { "epoch": 2.7, "learning_rate": 4.952461097823841e-06, "loss": 0.5734, "step": 244100 }, { "epoch": 2.7, "learning_rate": 4.9515383706853284e-06, "loss": 0.5083, "step": 244105 }, { "epoch": 2.7, "learning_rate": 4.950615643546816e-06, "loss": 0.5794, "step": 244110 }, { "epoch": 2.7, "learning_rate": 4.949692916408303e-06, "loss": 0.5474, "step": 244115 }, { "epoch": 2.7, "learning_rate": 4.94877018926979e-06, "loss": 0.5929, "step": 244120 }, { "epoch": 2.7, "learning_rate": 4.947847462131278e-06, "loss": 0.5889, "step": 244125 }, { "epoch": 2.7, "learning_rate": 4.946924734992766e-06, "loss": 0.5347, "step": 244130 }, { "epoch": 2.7, "learning_rate": 4.946002007854254e-06, "loss": 0.5521, "step": 244135 }, { "epoch": 2.7, "learning_rate": 4.945079280715742e-06, "loss": 0.5995, "step": 244140 }, { "epoch": 2.7, "learning_rate": 4.944156553577229e-06, "loss": 0.5726, "step": 244145 }, { "epoch": 2.7, "learning_rate": 4.9432338264387165e-06, "loss": 0.5746, "step": 244150 }, { "epoch": 2.7, "learning_rate": 4.9423110993002035e-06, "loss": 0.579, "step": 244155 }, { "epoch": 2.7, "learning_rate": 4.941388372161691e-06, "loss": 0.586, "step": 244160 }, { "epoch": 2.7, "learning_rate": 4.940465645023179e-06, "loss": 0.5664, "step": 244165 }, { "epoch": 2.7, "learning_rate": 4.939542917884667e-06, "loss": 0.6231, "step": 244170 }, { "epoch": 2.7, "learning_rate": 4.938620190746154e-06, "loss": 0.5612, "step": 244175 }, { "epoch": 2.7, "learning_rate": 4.937697463607642e-06, "loss": 0.5856, "step": 244180 }, { "epoch": 2.7, "learning_rate": 4.93677473646913e-06, "loss": 0.5553, "step": 244185 }, { "epoch": 2.7, "learning_rate": 4.935852009330618e-06, "loss": 0.586, "step": 244190 }, { "epoch": 2.7, "learning_rate": 4.934929282192105e-06, "loss": 0.5533, "step": 244195 }, { "epoch": 2.7, "learning_rate": 4.934006555053592e-06, "loss": 0.5797, "step": 244200 }, { "epoch": 2.7, "learning_rate": 4.9330838279150795e-06, "loss": 0.618, "step": 244205 }, { "epoch": 2.7, "learning_rate": 4.932161100776567e-06, "loss": 0.6163, "step": 244210 }, { "epoch": 2.7, "learning_rate": 4.931238373638055e-06, "loss": 0.5851, "step": 244215 }, { "epoch": 2.7, "learning_rate": 4.930315646499543e-06, "loss": 0.5611, "step": 244220 }, { "epoch": 2.7, "learning_rate": 4.92939291936103e-06, "loss": 0.5975, "step": 244225 }, { "epoch": 2.7, "learning_rate": 4.928470192222518e-06, "loss": 0.5834, "step": 244230 }, { "epoch": 2.7, "learning_rate": 4.927547465084005e-06, "loss": 0.6135, "step": 244235 }, { "epoch": 2.7, "learning_rate": 4.926624737945493e-06, "loss": 0.567, "step": 244240 }, { "epoch": 2.7, "learning_rate": 4.9257020108069806e-06, "loss": 0.5797, "step": 244245 }, { "epoch": 2.7, "learning_rate": 4.924779283668468e-06, "loss": 0.5851, "step": 244250 }, { "epoch": 2.7, "learning_rate": 4.923856556529955e-06, "loss": 0.5276, "step": 244255 }, { "epoch": 2.7, "learning_rate": 4.922933829391443e-06, "loss": 0.574, "step": 244260 }, { "epoch": 2.7, "learning_rate": 4.922011102252931e-06, "loss": 0.5468, "step": 244265 }, { "epoch": 2.7, "learning_rate": 4.921088375114418e-06, "loss": 0.6023, "step": 244270 }, { "epoch": 2.7, "learning_rate": 4.920165647975906e-06, "loss": 0.5635, "step": 244275 }, { "epoch": 2.7, "learning_rate": 4.919242920837394e-06, "loss": 0.5587, "step": 244280 }, { "epoch": 2.7, "learning_rate": 4.918320193698881e-06, "loss": 0.6094, "step": 244285 }, { "epoch": 2.7, "learning_rate": 4.917397466560369e-06, "loss": 0.6126, "step": 244290 }, { "epoch": 2.71, "learning_rate": 4.9164747394218565e-06, "loss": 0.6172, "step": 244295 }, { "epoch": 2.71, "learning_rate": 4.915552012283344e-06, "loss": 0.5722, "step": 244300 }, { "epoch": 2.71, "learning_rate": 4.914629285144832e-06, "loss": 0.5815, "step": 244305 }, { "epoch": 2.71, "learning_rate": 4.913706558006319e-06, "loss": 0.5927, "step": 244310 }, { "epoch": 2.71, "learning_rate": 4.912783830867806e-06, "loss": 0.5958, "step": 244315 }, { "epoch": 2.71, "learning_rate": 4.911861103729294e-06, "loss": 0.6147, "step": 244320 }, { "epoch": 2.71, "learning_rate": 4.910938376590782e-06, "loss": 0.5427, "step": 244325 }, { "epoch": 2.71, "learning_rate": 4.91001564945227e-06, "loss": 0.5872, "step": 244330 }, { "epoch": 2.71, "learning_rate": 4.909092922313757e-06, "loss": 0.5733, "step": 244335 }, { "epoch": 2.71, "learning_rate": 4.908170195175245e-06, "loss": 0.5822, "step": 244340 }, { "epoch": 2.71, "learning_rate": 4.907247468036732e-06, "loss": 0.536, "step": 244345 }, { "epoch": 2.71, "learning_rate": 4.9063247408982195e-06, "loss": 0.5461, "step": 244350 }, { "epoch": 2.71, "learning_rate": 4.905402013759707e-06, "loss": 0.6151, "step": 244355 }, { "epoch": 2.71, "learning_rate": 4.904479286621195e-06, "loss": 0.5845, "step": 244360 }, { "epoch": 2.71, "learning_rate": 4.903556559482682e-06, "loss": 0.5652, "step": 244365 }, { "epoch": 2.71, "learning_rate": 4.90263383234417e-06, "loss": 0.6024, "step": 244370 }, { "epoch": 2.71, "learning_rate": 4.901711105205658e-06, "loss": 0.5551, "step": 244375 }, { "epoch": 2.71, "learning_rate": 4.900788378067146e-06, "loss": 0.5335, "step": 244380 }, { "epoch": 2.71, "learning_rate": 4.899865650928633e-06, "loss": 0.5965, "step": 244385 }, { "epoch": 2.71, "learning_rate": 4.8989429237901206e-06, "loss": 0.5642, "step": 244390 }, { "epoch": 2.71, "learning_rate": 4.8980201966516076e-06, "loss": 0.5016, "step": 244395 }, { "epoch": 2.71, "learning_rate": 4.897097469513095e-06, "loss": 0.5947, "step": 244400 }, { "epoch": 2.71, "learning_rate": 4.896174742374583e-06, "loss": 0.5531, "step": 244405 }, { "epoch": 2.71, "learning_rate": 4.895252015236071e-06, "loss": 0.5287, "step": 244410 }, { "epoch": 2.71, "learning_rate": 4.894329288097559e-06, "loss": 0.5365, "step": 244415 }, { "epoch": 2.71, "learning_rate": 4.893406560959046e-06, "loss": 0.5538, "step": 244420 }, { "epoch": 2.71, "learning_rate": 4.892483833820533e-06, "loss": 0.641, "step": 244425 }, { "epoch": 2.71, "learning_rate": 4.891561106682021e-06, "loss": 0.5824, "step": 244430 }, { "epoch": 2.71, "learning_rate": 4.890638379543509e-06, "loss": 0.621, "step": 244435 }, { "epoch": 2.71, "learning_rate": 4.8897156524049965e-06, "loss": 0.5726, "step": 244440 }, { "epoch": 2.71, "learning_rate": 4.888792925266484e-06, "loss": 0.6142, "step": 244445 }, { "epoch": 2.71, "learning_rate": 4.887870198127971e-06, "loss": 0.5839, "step": 244450 }, { "epoch": 2.71, "learning_rate": 4.886947470989459e-06, "loss": 0.6419, "step": 244455 }, { "epoch": 2.71, "learning_rate": 4.886024743850946e-06, "loss": 0.6317, "step": 244460 }, { "epoch": 2.71, "learning_rate": 4.885102016712434e-06, "loss": 0.5603, "step": 244465 }, { "epoch": 2.71, "learning_rate": 4.884179289573922e-06, "loss": 0.5585, "step": 244470 }, { "epoch": 2.71, "learning_rate": 4.883256562435409e-06, "loss": 0.544, "step": 244475 }, { "epoch": 2.71, "learning_rate": 4.882333835296897e-06, "loss": 0.5647, "step": 244480 }, { "epoch": 2.71, "learning_rate": 4.881411108158385e-06, "loss": 0.6169, "step": 244485 }, { "epoch": 2.71, "learning_rate": 4.8804883810198725e-06, "loss": 0.6547, "step": 244490 }, { "epoch": 2.71, "learning_rate": 4.87956565388136e-06, "loss": 0.5859, "step": 244495 }, { "epoch": 2.71, "learning_rate": 4.878642926742847e-06, "loss": 0.5338, "step": 244500 }, { "epoch": 2.71, "learning_rate": 4.877720199604334e-06, "loss": 0.5636, "step": 244505 }, { "epoch": 2.71, "learning_rate": 4.876797472465822e-06, "loss": 0.5882, "step": 244510 }, { "epoch": 2.71, "learning_rate": 4.87587474532731e-06, "loss": 0.5833, "step": 244515 }, { "epoch": 2.71, "learning_rate": 4.874952018188798e-06, "loss": 0.5653, "step": 244520 }, { "epoch": 2.71, "learning_rate": 4.874029291050286e-06, "loss": 0.6328, "step": 244525 }, { "epoch": 2.71, "learning_rate": 4.873106563911773e-06, "loss": 0.5308, "step": 244530 }, { "epoch": 2.71, "learning_rate": 4.87218383677326e-06, "loss": 0.6024, "step": 244535 }, { "epoch": 2.71, "learning_rate": 4.8712611096347475e-06, "loss": 0.5443, "step": 244540 }, { "epoch": 2.71, "learning_rate": 4.870338382496235e-06, "loss": 0.5626, "step": 244545 }, { "epoch": 2.71, "learning_rate": 4.869415655357723e-06, "loss": 0.6012, "step": 244550 }, { "epoch": 2.71, "learning_rate": 4.868492928219211e-06, "loss": 0.6023, "step": 244555 }, { "epoch": 2.71, "learning_rate": 4.867570201080698e-06, "loss": 0.6454, "step": 244560 }, { "epoch": 2.71, "learning_rate": 4.866647473942186e-06, "loss": 0.5746, "step": 244565 }, { "epoch": 2.71, "learning_rate": 4.865724746803674e-06, "loss": 0.5774, "step": 244570 }, { "epoch": 2.71, "learning_rate": 4.864802019665161e-06, "loss": 0.6072, "step": 244575 }, { "epoch": 2.71, "learning_rate": 4.863879292526649e-06, "loss": 0.6026, "step": 244580 }, { "epoch": 2.71, "learning_rate": 4.8629565653881365e-06, "loss": 0.6122, "step": 244585 }, { "epoch": 2.71, "learning_rate": 4.8620338382496235e-06, "loss": 0.5518, "step": 244590 }, { "epoch": 2.71, "learning_rate": 4.861111111111111e-06, "loss": 0.5753, "step": 244595 }, { "epoch": 2.71, "learning_rate": 4.860188383972599e-06, "loss": 0.6127, "step": 244600 }, { "epoch": 2.71, "learning_rate": 4.859265656834087e-06, "loss": 0.5841, "step": 244605 }, { "epoch": 2.71, "learning_rate": 4.858342929695574e-06, "loss": 0.6142, "step": 244610 }, { "epoch": 2.71, "learning_rate": 4.857420202557061e-06, "loss": 0.5641, "step": 244615 }, { "epoch": 2.71, "learning_rate": 4.856497475418549e-06, "loss": 0.5773, "step": 244620 }, { "epoch": 2.71, "learning_rate": 4.855574748280037e-06, "loss": 0.5642, "step": 244625 }, { "epoch": 2.71, "learning_rate": 4.854652021141525e-06, "loss": 0.5661, "step": 244630 }, { "epoch": 2.71, "learning_rate": 4.8537292940030124e-06, "loss": 0.6, "step": 244635 }, { "epoch": 2.71, "learning_rate": 4.8528065668644994e-06, "loss": 0.6113, "step": 244640 }, { "epoch": 2.71, "learning_rate": 4.851883839725987e-06, "loss": 0.5554, "step": 244645 }, { "epoch": 2.71, "learning_rate": 4.850961112587474e-06, "loss": 0.6102, "step": 244650 }, { "epoch": 2.71, "learning_rate": 4.850038385448962e-06, "loss": 0.5814, "step": 244655 }, { "epoch": 2.71, "learning_rate": 4.84911565831045e-06, "loss": 0.5238, "step": 244660 }, { "epoch": 2.71, "learning_rate": 4.848192931171938e-06, "loss": 0.6258, "step": 244665 }, { "epoch": 2.71, "learning_rate": 4.847270204033425e-06, "loss": 0.5501, "step": 244670 }, { "epoch": 2.71, "learning_rate": 4.846347476894913e-06, "loss": 0.5454, "step": 244675 }, { "epoch": 2.71, "learning_rate": 4.8454247497564005e-06, "loss": 0.5772, "step": 244680 }, { "epoch": 2.71, "learning_rate": 4.844502022617888e-06, "loss": 0.5679, "step": 244685 }, { "epoch": 2.71, "learning_rate": 4.843579295479375e-06, "loss": 0.569, "step": 244690 }, { "epoch": 2.71, "learning_rate": 4.842656568340863e-06, "loss": 0.5896, "step": 244695 }, { "epoch": 2.71, "learning_rate": 4.84173384120235e-06, "loss": 0.529, "step": 244700 }, { "epoch": 2.71, "learning_rate": 4.840811114063838e-06, "loss": 0.6592, "step": 244705 }, { "epoch": 2.71, "learning_rate": 4.839888386925326e-06, "loss": 0.5706, "step": 244710 }, { "epoch": 2.71, "learning_rate": 4.838965659786814e-06, "loss": 0.5816, "step": 244715 }, { "epoch": 2.71, "learning_rate": 4.838042932648302e-06, "loss": 0.5837, "step": 244720 }, { "epoch": 2.71, "learning_rate": 4.837120205509788e-06, "loss": 0.6071, "step": 244725 }, { "epoch": 2.71, "learning_rate": 4.836197478371276e-06, "loss": 0.5679, "step": 244730 }, { "epoch": 2.71, "learning_rate": 4.8352747512327635e-06, "loss": 0.5769, "step": 244735 }, { "epoch": 2.71, "learning_rate": 4.834352024094251e-06, "loss": 0.5448, "step": 244740 }, { "epoch": 2.71, "learning_rate": 4.833429296955739e-06, "loss": 0.577, "step": 244745 }, { "epoch": 2.71, "learning_rate": 4.832506569817226e-06, "loss": 0.5863, "step": 244750 }, { "epoch": 2.71, "learning_rate": 4.831583842678714e-06, "loss": 0.5346, "step": 244755 }, { "epoch": 2.71, "learning_rate": 4.830661115540202e-06, "loss": 0.5863, "step": 244760 }, { "epoch": 2.71, "learning_rate": 4.829738388401689e-06, "loss": 0.5857, "step": 244765 }, { "epoch": 2.71, "learning_rate": 4.828815661263177e-06, "loss": 0.5903, "step": 244770 }, { "epoch": 2.71, "learning_rate": 4.827892934124665e-06, "loss": 0.5713, "step": 244775 }, { "epoch": 2.71, "learning_rate": 4.826970206986152e-06, "loss": 0.5141, "step": 244780 }, { "epoch": 2.71, "learning_rate": 4.8260474798476394e-06, "loss": 0.6296, "step": 244785 }, { "epoch": 2.71, "learning_rate": 4.825124752709127e-06, "loss": 0.6006, "step": 244790 }, { "epoch": 2.71, "learning_rate": 4.824202025570615e-06, "loss": 0.5509, "step": 244795 }, { "epoch": 2.71, "learning_rate": 4.823279298432102e-06, "loss": 0.5202, "step": 244800 }, { "epoch": 2.71, "learning_rate": 4.82235657129359e-06, "loss": 0.5671, "step": 244805 }, { "epoch": 2.71, "learning_rate": 4.821433844155077e-06, "loss": 0.6003, "step": 244810 }, { "epoch": 2.71, "learning_rate": 4.820511117016565e-06, "loss": 0.6205, "step": 244815 }, { "epoch": 2.71, "learning_rate": 4.819588389878053e-06, "loss": 0.5595, "step": 244820 }, { "epoch": 2.71, "learning_rate": 4.8186656627395405e-06, "loss": 0.5804, "step": 244825 }, { "epoch": 2.71, "learning_rate": 4.817742935601028e-06, "loss": 0.5783, "step": 244830 }, { "epoch": 2.71, "learning_rate": 4.816820208462515e-06, "loss": 0.6111, "step": 244835 }, { "epoch": 2.71, "learning_rate": 4.815897481324002e-06, "loss": 0.6187, "step": 244840 }, { "epoch": 2.71, "learning_rate": 4.81497475418549e-06, "loss": 0.5703, "step": 244845 }, { "epoch": 2.71, "learning_rate": 4.814052027046978e-06, "loss": 0.5781, "step": 244850 }, { "epoch": 2.71, "learning_rate": 4.813129299908466e-06, "loss": 0.6212, "step": 244855 }, { "epoch": 2.71, "learning_rate": 4.812206572769953e-06, "loss": 0.583, "step": 244860 }, { "epoch": 2.71, "learning_rate": 4.811283845631441e-06, "loss": 0.5438, "step": 244865 }, { "epoch": 2.71, "learning_rate": 4.810361118492929e-06, "loss": 0.6048, "step": 244870 }, { "epoch": 2.71, "learning_rate": 4.8094383913544165e-06, "loss": 0.5689, "step": 244875 }, { "epoch": 2.71, "learning_rate": 4.8085156642159035e-06, "loss": 0.5566, "step": 244880 }, { "epoch": 2.71, "learning_rate": 4.807592937077391e-06, "loss": 0.5626, "step": 244885 }, { "epoch": 2.71, "learning_rate": 4.806670209938878e-06, "loss": 0.6034, "step": 244890 }, { "epoch": 2.71, "learning_rate": 4.805747482800366e-06, "loss": 0.5256, "step": 244895 }, { "epoch": 2.71, "learning_rate": 4.804824755661854e-06, "loss": 0.5768, "step": 244900 }, { "epoch": 2.71, "learning_rate": 4.803902028523342e-06, "loss": 0.5799, "step": 244905 }, { "epoch": 2.71, "learning_rate": 4.80297930138483e-06, "loss": 0.5579, "step": 244910 }, { "epoch": 2.71, "learning_rate": 4.802056574246317e-06, "loss": 0.6012, "step": 244915 }, { "epoch": 2.71, "learning_rate": 4.801133847107804e-06, "loss": 0.5804, "step": 244920 }, { "epoch": 2.71, "learning_rate": 4.8002111199692916e-06, "loss": 0.6015, "step": 244925 }, { "epoch": 2.71, "learning_rate": 4.799288392830779e-06, "loss": 0.5637, "step": 244930 }, { "epoch": 2.71, "learning_rate": 4.798365665692267e-06, "loss": 0.5892, "step": 244935 }, { "epoch": 2.71, "learning_rate": 4.797442938553755e-06, "loss": 0.613, "step": 244940 }, { "epoch": 2.71, "learning_rate": 4.796520211415242e-06, "loss": 0.5418, "step": 244945 }, { "epoch": 2.71, "learning_rate": 4.79559748427673e-06, "loss": 0.5789, "step": 244950 }, { "epoch": 2.71, "learning_rate": 4.794674757138217e-06, "loss": 0.5886, "step": 244955 }, { "epoch": 2.71, "learning_rate": 4.793752029999705e-06, "loss": 0.6429, "step": 244960 }, { "epoch": 2.71, "learning_rate": 4.792829302861193e-06, "loss": 0.5704, "step": 244965 }, { "epoch": 2.71, "learning_rate": 4.7919065757226805e-06, "loss": 0.5685, "step": 244970 }, { "epoch": 2.71, "learning_rate": 4.7909838485841675e-06, "loss": 0.5484, "step": 244975 }, { "epoch": 2.71, "learning_rate": 4.790061121445655e-06, "loss": 0.5377, "step": 244980 }, { "epoch": 2.71, "learning_rate": 4.789138394307143e-06, "loss": 0.5873, "step": 244985 }, { "epoch": 2.71, "learning_rate": 4.78821566716863e-06, "loss": 0.5791, "step": 244990 }, { "epoch": 2.71, "learning_rate": 4.787292940030118e-06, "loss": 0.4956, "step": 244995 }, { "epoch": 2.71, "learning_rate": 4.786370212891605e-06, "loss": 0.5357, "step": 245000 }, { "epoch": 2.71, "eval_loss": 0.547398567199707, "eval_runtime": 69.7921, "eval_samples_per_second": 28.657, "eval_steps_per_second": 14.328, "step": 245000 }, { "epoch": 2.71, "learning_rate": 4.785447485753093e-06, "loss": 0.5929, "step": 245005 }, { "epoch": 2.71, "learning_rate": 4.784524758614581e-06, "loss": 0.603, "step": 245010 }, { "epoch": 2.71, "learning_rate": 4.783602031476069e-06, "loss": 0.5706, "step": 245015 }, { "epoch": 2.71, "learning_rate": 4.7826793043375565e-06, "loss": 0.5798, "step": 245020 }, { "epoch": 2.71, "learning_rate": 4.7817565771990435e-06, "loss": 0.6295, "step": 245025 }, { "epoch": 2.71, "learning_rate": 4.7808338500605305e-06, "loss": 0.6116, "step": 245030 }, { "epoch": 2.71, "learning_rate": 4.779911122922018e-06, "loss": 0.5354, "step": 245035 }, { "epoch": 2.71, "learning_rate": 4.778988395783506e-06, "loss": 0.6284, "step": 245040 }, { "epoch": 2.71, "learning_rate": 4.778065668644994e-06, "loss": 0.5634, "step": 245045 }, { "epoch": 2.71, "learning_rate": 4.777142941506482e-06, "loss": 0.5672, "step": 245050 }, { "epoch": 2.71, "learning_rate": 4.776220214367969e-06, "loss": 0.5759, "step": 245055 }, { "epoch": 2.71, "learning_rate": 4.775297487229457e-06, "loss": 0.6286, "step": 245060 }, { "epoch": 2.71, "learning_rate": 4.7743747600909446e-06, "loss": 0.5763, "step": 245065 }, { "epoch": 2.71, "learning_rate": 4.7734520329524316e-06, "loss": 0.5859, "step": 245070 }, { "epoch": 2.71, "learning_rate": 4.772529305813919e-06, "loss": 0.5751, "step": 245075 }, { "epoch": 2.71, "learning_rate": 4.771606578675407e-06, "loss": 0.612, "step": 245080 }, { "epoch": 2.71, "learning_rate": 4.770683851536894e-06, "loss": 0.619, "step": 245085 }, { "epoch": 2.71, "learning_rate": 4.769761124398382e-06, "loss": 0.6176, "step": 245090 }, { "epoch": 2.71, "learning_rate": 4.76883839725987e-06, "loss": 0.6056, "step": 245095 }, { "epoch": 2.71, "learning_rate": 4.767915670121358e-06, "loss": 0.588, "step": 245100 }, { "epoch": 2.71, "learning_rate": 4.766992942982845e-06, "loss": 0.577, "step": 245105 }, { "epoch": 2.71, "learning_rate": 4.766070215844333e-06, "loss": 0.5488, "step": 245110 }, { "epoch": 2.71, "learning_rate": 4.76514748870582e-06, "loss": 0.6091, "step": 245115 }, { "epoch": 2.71, "learning_rate": 4.7642247615673075e-06, "loss": 0.5201, "step": 245120 }, { "epoch": 2.71, "learning_rate": 4.763302034428795e-06, "loss": 0.6105, "step": 245125 }, { "epoch": 2.71, "learning_rate": 4.762379307290283e-06, "loss": 0.6018, "step": 245130 }, { "epoch": 2.71, "learning_rate": 4.76145658015177e-06, "loss": 0.5898, "step": 245135 }, { "epoch": 2.71, "learning_rate": 4.760533853013258e-06, "loss": 0.6106, "step": 245140 }, { "epoch": 2.71, "learning_rate": 4.759611125874745e-06, "loss": 0.5356, "step": 245145 }, { "epoch": 2.71, "learning_rate": 4.758688398736233e-06, "loss": 0.589, "step": 245150 }, { "epoch": 2.71, "learning_rate": 4.757765671597721e-06, "loss": 0.5798, "step": 245155 }, { "epoch": 2.71, "learning_rate": 4.756842944459209e-06, "loss": 0.6178, "step": 245160 }, { "epoch": 2.71, "learning_rate": 4.755920217320696e-06, "loss": 0.59, "step": 245165 }, { "epoch": 2.71, "learning_rate": 4.7549974901821834e-06, "loss": 0.5572, "step": 245170 }, { "epoch": 2.71, "learning_rate": 4.754074763043671e-06, "loss": 0.5914, "step": 245175 }, { "epoch": 2.71, "learning_rate": 4.753152035905158e-06, "loss": 0.6299, "step": 245180 }, { "epoch": 2.71, "learning_rate": 4.752229308766646e-06, "loss": 0.6025, "step": 245185 }, { "epoch": 2.71, "learning_rate": 4.751306581628134e-06, "loss": 0.5613, "step": 245190 }, { "epoch": 2.71, "learning_rate": 4.750383854489621e-06, "loss": 0.5639, "step": 245195 }, { "epoch": 2.72, "learning_rate": 4.749461127351109e-06, "loss": 0.5565, "step": 245200 }, { "epoch": 2.72, "learning_rate": 4.748538400212597e-06, "loss": 0.6003, "step": 245205 }, { "epoch": 2.72, "learning_rate": 4.7476156730740845e-06, "loss": 0.6665, "step": 245210 }, { "epoch": 2.72, "learning_rate": 4.746692945935572e-06, "loss": 0.5905, "step": 245215 }, { "epoch": 2.72, "learning_rate": 4.745770218797059e-06, "loss": 0.5735, "step": 245220 }, { "epoch": 2.72, "learning_rate": 4.744847491658546e-06, "loss": 0.5798, "step": 245225 }, { "epoch": 2.72, "learning_rate": 4.743924764520034e-06, "loss": 0.6089, "step": 245230 }, { "epoch": 2.72, "learning_rate": 4.743002037381522e-06, "loss": 0.5743, "step": 245235 }, { "epoch": 2.72, "learning_rate": 4.74207931024301e-06, "loss": 0.6205, "step": 245240 }, { "epoch": 2.72, "learning_rate": 4.741156583104498e-06, "loss": 0.561, "step": 245245 }, { "epoch": 2.72, "learning_rate": 4.740233855965985e-06, "loss": 0.6108, "step": 245250 }, { "epoch": 2.72, "learning_rate": 4.739311128827472e-06, "loss": 0.6041, "step": 245255 }, { "epoch": 2.72, "learning_rate": 4.73838840168896e-06, "loss": 0.5692, "step": 245260 }, { "epoch": 2.72, "learning_rate": 4.7374656745504475e-06, "loss": 0.5715, "step": 245265 }, { "epoch": 2.72, "learning_rate": 4.736542947411935e-06, "loss": 0.5899, "step": 245270 }, { "epoch": 2.72, "learning_rate": 4.735620220273422e-06, "loss": 0.5817, "step": 245275 }, { "epoch": 2.72, "learning_rate": 4.73469749313491e-06, "loss": 0.5745, "step": 245280 }, { "epoch": 2.72, "learning_rate": 4.733774765996398e-06, "loss": 0.5274, "step": 245285 }, { "epoch": 2.72, "learning_rate": 4.732852038857886e-06, "loss": 0.6179, "step": 245290 }, { "epoch": 2.72, "learning_rate": 4.731929311719373e-06, "loss": 0.5904, "step": 245295 }, { "epoch": 2.72, "learning_rate": 4.731006584580861e-06, "loss": 0.5893, "step": 245300 }, { "epoch": 2.72, "learning_rate": 4.730083857442348e-06, "loss": 0.5638, "step": 245305 }, { "epoch": 2.72, "learning_rate": 4.729161130303836e-06, "loss": 0.5842, "step": 245310 }, { "epoch": 2.72, "learning_rate": 4.7282384031653234e-06, "loss": 0.5634, "step": 245315 }, { "epoch": 2.72, "learning_rate": 4.727315676026811e-06, "loss": 0.5527, "step": 245320 }, { "epoch": 2.72, "learning_rate": 4.726392948888299e-06, "loss": 0.6042, "step": 245325 }, { "epoch": 2.72, "learning_rate": 4.725470221749786e-06, "loss": 0.5877, "step": 245330 }, { "epoch": 2.72, "learning_rate": 4.724547494611273e-06, "loss": 0.5733, "step": 245335 }, { "epoch": 2.72, "learning_rate": 4.723624767472761e-06, "loss": 0.5699, "step": 245340 }, { "epoch": 2.72, "learning_rate": 4.722702040334249e-06, "loss": 0.5756, "step": 245345 }, { "epoch": 2.72, "learning_rate": 4.721779313195737e-06, "loss": 0.5768, "step": 245350 }, { "epoch": 2.72, "learning_rate": 4.7208565860572245e-06, "loss": 0.5696, "step": 245355 }, { "epoch": 2.72, "learning_rate": 4.7199338589187115e-06, "loss": 0.5642, "step": 245360 }, { "epoch": 2.72, "learning_rate": 4.719011131780199e-06, "loss": 0.5592, "step": 245365 }, { "epoch": 2.72, "learning_rate": 4.718088404641686e-06, "loss": 0.617, "step": 245370 }, { "epoch": 2.72, "learning_rate": 4.717165677503174e-06, "loss": 0.5799, "step": 245375 }, { "epoch": 2.72, "learning_rate": 4.716242950364662e-06, "loss": 0.5174, "step": 245380 }, { "epoch": 2.72, "learning_rate": 4.71532022322615e-06, "loss": 0.5489, "step": 245385 }, { "epoch": 2.72, "learning_rate": 4.714397496087637e-06, "loss": 0.5939, "step": 245390 }, { "epoch": 2.72, "learning_rate": 4.713474768949125e-06, "loss": 0.5782, "step": 245395 }, { "epoch": 2.72, "learning_rate": 4.712552041810613e-06, "loss": 0.6131, "step": 245400 }, { "epoch": 2.72, "learning_rate": 4.7116293146721005e-06, "loss": 0.6005, "step": 245405 }, { "epoch": 2.72, "learning_rate": 4.7107065875335875e-06, "loss": 0.527, "step": 245410 }, { "epoch": 2.72, "learning_rate": 4.7097838603950745e-06, "loss": 0.5646, "step": 245415 }, { "epoch": 2.72, "learning_rate": 4.708861133256562e-06, "loss": 0.5769, "step": 245420 }, { "epoch": 2.72, "learning_rate": 4.70793840611805e-06, "loss": 0.5999, "step": 245425 }, { "epoch": 2.72, "learning_rate": 4.707015678979538e-06, "loss": 0.6264, "step": 245430 }, { "epoch": 2.72, "learning_rate": 4.706092951841026e-06, "loss": 0.5905, "step": 245435 }, { "epoch": 2.72, "learning_rate": 4.705170224702513e-06, "loss": 0.5761, "step": 245440 }, { "epoch": 2.72, "learning_rate": 4.704247497564e-06, "loss": 0.5931, "step": 245445 }, { "epoch": 2.72, "learning_rate": 4.703324770425488e-06, "loss": 0.576, "step": 245450 }, { "epoch": 2.72, "learning_rate": 4.702402043286976e-06, "loss": 0.5849, "step": 245455 }, { "epoch": 2.72, "learning_rate": 4.7014793161484634e-06, "loss": 0.5707, "step": 245460 }, { "epoch": 2.72, "learning_rate": 4.700556589009951e-06, "loss": 0.5739, "step": 245465 }, { "epoch": 2.72, "learning_rate": 4.699633861871438e-06, "loss": 0.5447, "step": 245470 }, { "epoch": 2.72, "learning_rate": 4.698711134732926e-06, "loss": 0.6148, "step": 245475 }, { "epoch": 2.72, "learning_rate": 4.697788407594414e-06, "loss": 0.6428, "step": 245480 }, { "epoch": 2.72, "learning_rate": 4.696865680455901e-06, "loss": 0.5216, "step": 245485 }, { "epoch": 2.72, "learning_rate": 4.695942953317389e-06, "loss": 0.5624, "step": 245490 }, { "epoch": 2.72, "learning_rate": 4.695020226178877e-06, "loss": 0.6127, "step": 245495 }, { "epoch": 2.72, "learning_rate": 4.694097499040364e-06, "loss": 0.5469, "step": 245500 }, { "epoch": 2.72, "learning_rate": 4.6931747719018515e-06, "loss": 0.5829, "step": 245505 }, { "epoch": 2.72, "learning_rate": 4.692252044763339e-06, "loss": 0.5913, "step": 245510 }, { "epoch": 2.72, "learning_rate": 4.691329317624827e-06, "loss": 0.5419, "step": 245515 }, { "epoch": 2.72, "learning_rate": 4.690406590486315e-06, "loss": 0.5574, "step": 245520 }, { "epoch": 2.72, "learning_rate": 4.689483863347802e-06, "loss": 0.5928, "step": 245525 }, { "epoch": 2.72, "learning_rate": 4.688561136209289e-06, "loss": 0.5992, "step": 245530 }, { "epoch": 2.72, "learning_rate": 4.687638409070777e-06, "loss": 0.583, "step": 245535 }, { "epoch": 2.72, "learning_rate": 4.686715681932265e-06, "loss": 0.574, "step": 245540 }, { "epoch": 2.72, "learning_rate": 4.685792954793753e-06, "loss": 0.6227, "step": 245545 }, { "epoch": 2.72, "learning_rate": 4.68487022765524e-06, "loss": 0.6036, "step": 245550 }, { "epoch": 2.72, "learning_rate": 4.6839475005167275e-06, "loss": 0.5867, "step": 245555 }, { "epoch": 2.72, "learning_rate": 4.6830247733782145e-06, "loss": 0.5487, "step": 245560 }, { "epoch": 2.72, "learning_rate": 4.682102046239702e-06, "loss": 0.5629, "step": 245565 }, { "epoch": 2.72, "learning_rate": 4.68117931910119e-06, "loss": 0.6071, "step": 245570 }, { "epoch": 2.72, "learning_rate": 4.680256591962678e-06, "loss": 0.6248, "step": 245575 }, { "epoch": 2.72, "learning_rate": 4.679333864824165e-06, "loss": 0.5836, "step": 245580 }, { "epoch": 2.72, "learning_rate": 4.678411137685653e-06, "loss": 0.6321, "step": 245585 }, { "epoch": 2.72, "learning_rate": 4.677488410547141e-06, "loss": 0.5793, "step": 245590 }, { "epoch": 2.72, "learning_rate": 4.6765656834086286e-06, "loss": 0.6403, "step": 245595 }, { "epoch": 2.72, "learning_rate": 4.6756429562701156e-06, "loss": 0.6042, "step": 245600 }, { "epoch": 2.72, "learning_rate": 4.674720229131603e-06, "loss": 0.5705, "step": 245605 }, { "epoch": 2.72, "learning_rate": 4.67379750199309e-06, "loss": 0.5886, "step": 245610 }, { "epoch": 2.72, "learning_rate": 4.672874774854578e-06, "loss": 0.5579, "step": 245615 }, { "epoch": 2.72, "learning_rate": 4.671952047716066e-06, "loss": 0.5369, "step": 245620 }, { "epoch": 2.72, "learning_rate": 4.671029320577554e-06, "loss": 0.5776, "step": 245625 }, { "epoch": 2.72, "learning_rate": 4.670106593439042e-06, "loss": 0.6024, "step": 245630 }, { "epoch": 2.72, "learning_rate": 4.669183866300529e-06, "loss": 0.5684, "step": 245635 }, { "epoch": 2.72, "learning_rate": 4.668261139162016e-06, "loss": 0.5568, "step": 245640 }, { "epoch": 2.72, "learning_rate": 4.667338412023504e-06, "loss": 0.5865, "step": 245645 }, { "epoch": 2.72, "learning_rate": 4.6664156848849915e-06, "loss": 0.5677, "step": 245650 }, { "epoch": 2.72, "learning_rate": 4.665492957746479e-06, "loss": 0.5772, "step": 245655 }, { "epoch": 2.72, "learning_rate": 4.664570230607967e-06, "loss": 0.6222, "step": 245660 }, { "epoch": 2.72, "learning_rate": 4.663647503469454e-06, "loss": 0.5837, "step": 245665 }, { "epoch": 2.72, "learning_rate": 4.662724776330942e-06, "loss": 0.5163, "step": 245670 }, { "epoch": 2.72, "learning_rate": 4.661802049192429e-06, "loss": 0.5489, "step": 245675 }, { "epoch": 2.72, "learning_rate": 4.660879322053917e-06, "loss": 0.5731, "step": 245680 }, { "epoch": 2.72, "learning_rate": 4.659956594915405e-06, "loss": 0.5478, "step": 245685 }, { "epoch": 2.72, "learning_rate": 4.659033867776892e-06, "loss": 0.5857, "step": 245690 }, { "epoch": 2.72, "learning_rate": 4.65811114063838e-06, "loss": 0.5489, "step": 245695 }, { "epoch": 2.72, "learning_rate": 4.6571884134998675e-06, "loss": 0.5743, "step": 245700 }, { "epoch": 2.72, "learning_rate": 4.656265686361355e-06, "loss": 0.6202, "step": 245705 }, { "epoch": 2.72, "learning_rate": 4.655342959222843e-06, "loss": 0.5616, "step": 245710 }, { "epoch": 2.72, "learning_rate": 4.65442023208433e-06, "loss": 0.5486, "step": 245715 }, { "epoch": 2.72, "learning_rate": 4.653497504945817e-06, "loss": 0.5829, "step": 245720 }, { "epoch": 2.72, "learning_rate": 4.652574777807305e-06, "loss": 0.5482, "step": 245725 }, { "epoch": 2.72, "learning_rate": 4.651652050668793e-06, "loss": 0.6045, "step": 245730 }, { "epoch": 2.72, "learning_rate": 4.650729323530281e-06, "loss": 0.6162, "step": 245735 }, { "epoch": 2.72, "learning_rate": 4.6498065963917686e-06, "loss": 0.563, "step": 245740 }, { "epoch": 2.72, "learning_rate": 4.6488838692532556e-06, "loss": 0.6156, "step": 245745 }, { "epoch": 2.72, "learning_rate": 4.6479611421147426e-06, "loss": 0.5398, "step": 245750 }, { "epoch": 2.72, "learning_rate": 4.64703841497623e-06, "loss": 0.5841, "step": 245755 }, { "epoch": 2.72, "learning_rate": 4.646115687837718e-06, "loss": 0.556, "step": 245760 }, { "epoch": 2.72, "learning_rate": 4.645192960699206e-06, "loss": 0.5918, "step": 245765 }, { "epoch": 2.72, "learning_rate": 4.644270233560694e-06, "loss": 0.5086, "step": 245770 }, { "epoch": 2.72, "learning_rate": 4.643347506422181e-06, "loss": 0.5688, "step": 245775 }, { "epoch": 2.72, "learning_rate": 4.642424779283669e-06, "loss": 0.5723, "step": 245780 }, { "epoch": 2.72, "learning_rate": 4.641502052145157e-06, "loss": 0.625, "step": 245785 }, { "epoch": 2.72, "learning_rate": 4.640579325006644e-06, "loss": 0.5817, "step": 245790 }, { "epoch": 2.72, "learning_rate": 4.6396565978681315e-06, "loss": 0.5986, "step": 245795 }, { "epoch": 2.72, "learning_rate": 4.638733870729619e-06, "loss": 0.597, "step": 245800 }, { "epoch": 2.72, "learning_rate": 4.637811143591106e-06, "loss": 0.5626, "step": 245805 }, { "epoch": 2.72, "learning_rate": 4.636888416452594e-06, "loss": 0.5154, "step": 245810 }, { "epoch": 2.72, "learning_rate": 4.635965689314082e-06, "loss": 0.5902, "step": 245815 }, { "epoch": 2.72, "learning_rate": 4.63504296217557e-06, "loss": 0.6363, "step": 245820 }, { "epoch": 2.72, "learning_rate": 4.634120235037057e-06, "loss": 0.6202, "step": 245825 }, { "epoch": 2.72, "learning_rate": 4.633197507898544e-06, "loss": 0.5621, "step": 245830 }, { "epoch": 2.72, "learning_rate": 4.632274780760032e-06, "loss": 0.56, "step": 245835 }, { "epoch": 2.72, "learning_rate": 4.63135205362152e-06, "loss": 0.5396, "step": 245840 }, { "epoch": 2.72, "learning_rate": 4.6304293264830074e-06, "loss": 0.5736, "step": 245845 }, { "epoch": 2.72, "learning_rate": 4.629506599344495e-06, "loss": 0.5806, "step": 245850 }, { "epoch": 2.72, "learning_rate": 4.628583872205982e-06, "loss": 0.6068, "step": 245855 }, { "epoch": 2.72, "learning_rate": 4.62766114506747e-06, "loss": 0.5479, "step": 245860 }, { "epoch": 2.72, "learning_rate": 4.626738417928957e-06, "loss": 0.5651, "step": 245865 }, { "epoch": 2.72, "learning_rate": 4.625815690790445e-06, "loss": 0.5795, "step": 245870 }, { "epoch": 2.72, "learning_rate": 4.624892963651933e-06, "loss": 0.5924, "step": 245875 }, { "epoch": 2.72, "learning_rate": 4.623970236513421e-06, "loss": 0.6005, "step": 245880 }, { "epoch": 2.72, "learning_rate": 4.623047509374908e-06, "loss": 0.617, "step": 245885 }, { "epoch": 2.72, "learning_rate": 4.6221247822363955e-06, "loss": 0.625, "step": 245890 }, { "epoch": 2.72, "learning_rate": 4.621202055097883e-06, "loss": 0.5581, "step": 245895 }, { "epoch": 2.72, "learning_rate": 4.620279327959371e-06, "loss": 0.6032, "step": 245900 }, { "epoch": 2.72, "learning_rate": 4.619356600820858e-06, "loss": 0.579, "step": 245905 }, { "epoch": 2.72, "learning_rate": 4.618433873682346e-06, "loss": 0.584, "step": 245910 }, { "epoch": 2.72, "learning_rate": 4.617511146543833e-06, "loss": 0.5848, "step": 245915 }, { "epoch": 2.72, "learning_rate": 4.616588419405321e-06, "loss": 0.6123, "step": 245920 }, { "epoch": 2.72, "learning_rate": 4.615665692266809e-06, "loss": 0.5141, "step": 245925 }, { "epoch": 2.72, "learning_rate": 4.614742965128297e-06, "loss": 0.6296, "step": 245930 }, { "epoch": 2.72, "learning_rate": 4.6138202379897845e-06, "loss": 0.6589, "step": 245935 }, { "epoch": 2.72, "learning_rate": 4.612897510851271e-06, "loss": 0.5657, "step": 245940 }, { "epoch": 2.72, "learning_rate": 4.6119747837127585e-06, "loss": 0.5703, "step": 245945 }, { "epoch": 2.72, "learning_rate": 4.611052056574246e-06, "loss": 0.5524, "step": 245950 }, { "epoch": 2.72, "learning_rate": 4.610129329435734e-06, "loss": 0.5719, "step": 245955 }, { "epoch": 2.72, "learning_rate": 4.609206602297222e-06, "loss": 0.5632, "step": 245960 }, { "epoch": 2.72, "learning_rate": 4.608283875158709e-06, "loss": 0.5363, "step": 245965 }, { "epoch": 2.72, "learning_rate": 4.607361148020197e-06, "loss": 0.6024, "step": 245970 }, { "epoch": 2.72, "learning_rate": 4.606438420881685e-06, "loss": 0.5987, "step": 245975 }, { "epoch": 2.72, "learning_rate": 4.605515693743172e-06, "loss": 0.6143, "step": 245980 }, { "epoch": 2.72, "learning_rate": 4.60459296660466e-06, "loss": 0.6128, "step": 245985 }, { "epoch": 2.72, "learning_rate": 4.6036702394661474e-06, "loss": 0.5518, "step": 245990 }, { "epoch": 2.72, "learning_rate": 4.6027475123276344e-06, "loss": 0.6591, "step": 245995 }, { "epoch": 2.72, "learning_rate": 4.601824785189122e-06, "loss": 0.585, "step": 246000 }, { "epoch": 2.72, "eval_loss": 0.5281015634536743, "eval_runtime": 69.7432, "eval_samples_per_second": 28.677, "eval_steps_per_second": 14.338, "step": 246000 }, { "epoch": 2.72, "learning_rate": 4.60090205805061e-06, "loss": 0.5816, "step": 246005 }, { "epoch": 2.72, "learning_rate": 4.599979330912098e-06, "loss": 0.603, "step": 246010 }, { "epoch": 2.72, "learning_rate": 4.599056603773585e-06, "loss": 0.5641, "step": 246015 }, { "epoch": 2.72, "learning_rate": 4.598133876635073e-06, "loss": 0.5794, "step": 246020 }, { "epoch": 2.72, "learning_rate": 4.59721114949656e-06, "loss": 0.5651, "step": 246025 }, { "epoch": 2.72, "learning_rate": 4.596288422358048e-06, "loss": 0.593, "step": 246030 }, { "epoch": 2.72, "learning_rate": 4.5953656952195355e-06, "loss": 0.5601, "step": 246035 }, { "epoch": 2.72, "learning_rate": 4.594442968081023e-06, "loss": 0.5362, "step": 246040 }, { "epoch": 2.72, "learning_rate": 4.593520240942511e-06, "loss": 0.585, "step": 246045 }, { "epoch": 2.72, "learning_rate": 4.592597513803998e-06, "loss": 0.6082, "step": 246050 }, { "epoch": 2.72, "learning_rate": 4.591674786665485e-06, "loss": 0.5762, "step": 246055 }, { "epoch": 2.72, "learning_rate": 4.590752059526973e-06, "loss": 0.5357, "step": 246060 }, { "epoch": 2.72, "learning_rate": 4.589829332388461e-06, "loss": 0.6123, "step": 246065 }, { "epoch": 2.72, "learning_rate": 4.588906605249949e-06, "loss": 0.6106, "step": 246070 }, { "epoch": 2.72, "learning_rate": 4.587983878111437e-06, "loss": 0.5815, "step": 246075 }, { "epoch": 2.72, "learning_rate": 4.587061150972924e-06, "loss": 0.5907, "step": 246080 }, { "epoch": 2.72, "learning_rate": 4.5861384238344115e-06, "loss": 0.5821, "step": 246085 }, { "epoch": 2.72, "learning_rate": 4.5852156966958985e-06, "loss": 0.5992, "step": 246090 }, { "epoch": 2.72, "learning_rate": 4.584292969557386e-06, "loss": 0.5928, "step": 246095 }, { "epoch": 2.72, "learning_rate": 4.583370242418874e-06, "loss": 0.5783, "step": 246100 }, { "epoch": 2.73, "learning_rate": 4.582447515280361e-06, "loss": 0.4926, "step": 246105 }, { "epoch": 2.73, "learning_rate": 4.581524788141849e-06, "loss": 0.5558, "step": 246110 }, { "epoch": 2.73, "learning_rate": 4.580602061003337e-06, "loss": 0.5953, "step": 246115 }, { "epoch": 2.73, "learning_rate": 4.579679333864825e-06, "loss": 0.5764, "step": 246120 }, { "epoch": 2.73, "learning_rate": 4.5787566067263126e-06, "loss": 0.5459, "step": 246125 }, { "epoch": 2.73, "learning_rate": 4.5778338795877996e-06, "loss": 0.5939, "step": 246130 }, { "epoch": 2.73, "learning_rate": 4.5769111524492866e-06, "loss": 0.6032, "step": 246135 }, { "epoch": 2.73, "learning_rate": 4.575988425310774e-06, "loss": 0.5762, "step": 246140 }, { "epoch": 2.73, "learning_rate": 4.575065698172262e-06, "loss": 0.5974, "step": 246145 }, { "epoch": 2.73, "learning_rate": 4.57414297103375e-06, "loss": 0.5315, "step": 246150 }, { "epoch": 2.73, "learning_rate": 4.573220243895238e-06, "loss": 0.5993, "step": 246155 }, { "epoch": 2.73, "learning_rate": 4.572297516756725e-06, "loss": 0.5397, "step": 246160 }, { "epoch": 2.73, "learning_rate": 4.571374789618213e-06, "loss": 0.5618, "step": 246165 }, { "epoch": 2.73, "learning_rate": 4.5704520624797e-06, "loss": 0.5805, "step": 246170 }, { "epoch": 2.73, "learning_rate": 4.569529335341188e-06, "loss": 0.5691, "step": 246175 }, { "epoch": 2.73, "learning_rate": 4.5686066082026755e-06, "loss": 0.5784, "step": 246180 }, { "epoch": 2.73, "learning_rate": 4.567683881064163e-06, "loss": 0.6008, "step": 246185 }, { "epoch": 2.73, "learning_rate": 4.56676115392565e-06, "loss": 0.539, "step": 246190 }, { "epoch": 2.73, "learning_rate": 4.565838426787138e-06, "loss": 0.5657, "step": 246195 }, { "epoch": 2.73, "learning_rate": 4.564915699648626e-06, "loss": 0.535, "step": 246200 }, { "epoch": 2.73, "learning_rate": 4.563992972510113e-06, "loss": 0.5833, "step": 246205 }, { "epoch": 2.73, "learning_rate": 4.563070245371601e-06, "loss": 0.5968, "step": 246210 }, { "epoch": 2.73, "learning_rate": 4.562147518233088e-06, "loss": 0.5302, "step": 246215 }, { "epoch": 2.73, "learning_rate": 4.561224791094576e-06, "loss": 0.582, "step": 246220 }, { "epoch": 2.73, "learning_rate": 4.560302063956064e-06, "loss": 0.59, "step": 246225 }, { "epoch": 2.73, "learning_rate": 4.5593793368175515e-06, "loss": 0.547, "step": 246230 }, { "epoch": 2.73, "learning_rate": 4.558456609679039e-06, "loss": 0.5818, "step": 246235 }, { "epoch": 2.73, "learning_rate": 4.557533882540526e-06, "loss": 0.5958, "step": 246240 }, { "epoch": 2.73, "learning_rate": 4.556611155402013e-06, "loss": 0.5844, "step": 246245 }, { "epoch": 2.73, "learning_rate": 4.555688428263501e-06, "loss": 0.5323, "step": 246250 }, { "epoch": 2.73, "learning_rate": 4.554765701124989e-06, "loss": 0.6088, "step": 246255 }, { "epoch": 2.73, "learning_rate": 4.553842973986477e-06, "loss": 0.518, "step": 246260 }, { "epoch": 2.73, "learning_rate": 4.552920246847965e-06, "loss": 0.5577, "step": 246265 }, { "epoch": 2.73, "learning_rate": 4.551997519709452e-06, "loss": 0.5711, "step": 246270 }, { "epoch": 2.73, "learning_rate": 4.5510747925709396e-06, "loss": 0.5736, "step": 246275 }, { "epoch": 2.73, "learning_rate": 4.5501520654324266e-06, "loss": 0.5654, "step": 246280 }, { "epoch": 2.73, "learning_rate": 4.549229338293914e-06, "loss": 0.5446, "step": 246285 }, { "epoch": 2.73, "learning_rate": 4.548306611155402e-06, "loss": 0.5604, "step": 246290 }, { "epoch": 2.73, "learning_rate": 4.54738388401689e-06, "loss": 0.5593, "step": 246295 }, { "epoch": 2.73, "learning_rate": 4.546461156878377e-06, "loss": 0.5773, "step": 246300 }, { "epoch": 2.73, "learning_rate": 4.545538429739865e-06, "loss": 0.5509, "step": 246305 }, { "epoch": 2.73, "learning_rate": 4.544615702601353e-06, "loss": 0.5747, "step": 246310 }, { "epoch": 2.73, "learning_rate": 4.543692975462841e-06, "loss": 0.6165, "step": 246315 }, { "epoch": 2.73, "learning_rate": 4.542770248324328e-06, "loss": 0.5533, "step": 246320 }, { "epoch": 2.73, "learning_rate": 4.5418475211858155e-06, "loss": 0.5858, "step": 246325 }, { "epoch": 2.73, "learning_rate": 4.5409247940473025e-06, "loss": 0.5855, "step": 246330 }, { "epoch": 2.73, "learning_rate": 4.54000206690879e-06, "loss": 0.5349, "step": 246335 }, { "epoch": 2.73, "learning_rate": 4.539079339770278e-06, "loss": 0.5648, "step": 246340 }, { "epoch": 2.73, "learning_rate": 4.538156612631766e-06, "loss": 0.5814, "step": 246345 }, { "epoch": 2.73, "learning_rate": 4.537233885493254e-06, "loss": 0.5678, "step": 246350 }, { "epoch": 2.73, "learning_rate": 4.536311158354741e-06, "loss": 0.6083, "step": 246355 }, { "epoch": 2.73, "learning_rate": 4.535388431216228e-06, "loss": 0.5565, "step": 246360 }, { "epoch": 2.73, "learning_rate": 4.534465704077716e-06, "loss": 0.5848, "step": 246365 }, { "epoch": 2.73, "learning_rate": 4.533542976939204e-06, "loss": 0.5629, "step": 246370 }, { "epoch": 2.73, "learning_rate": 4.5326202498006915e-06, "loss": 0.5854, "step": 246375 }, { "epoch": 2.73, "learning_rate": 4.5316975226621785e-06, "loss": 0.5881, "step": 246380 }, { "epoch": 2.73, "learning_rate": 4.530774795523666e-06, "loss": 0.5508, "step": 246385 }, { "epoch": 2.73, "learning_rate": 4.529852068385154e-06, "loss": 0.5459, "step": 246390 }, { "epoch": 2.73, "learning_rate": 4.528929341246641e-06, "loss": 0.564, "step": 246395 }, { "epoch": 2.73, "learning_rate": 4.528006614108129e-06, "loss": 0.5688, "step": 246400 }, { "epoch": 2.73, "learning_rate": 4.527083886969617e-06, "loss": 0.6261, "step": 246405 }, { "epoch": 2.73, "learning_rate": 4.526161159831104e-06, "loss": 0.6137, "step": 246410 }, { "epoch": 2.73, "learning_rate": 4.525238432692592e-06, "loss": 0.615, "step": 246415 }, { "epoch": 2.73, "learning_rate": 4.5243157055540795e-06, "loss": 0.5826, "step": 246420 }, { "epoch": 2.73, "learning_rate": 4.523392978415567e-06, "loss": 0.5937, "step": 246425 }, { "epoch": 2.73, "learning_rate": 4.522470251277055e-06, "loss": 0.5783, "step": 246430 }, { "epoch": 2.73, "learning_rate": 4.521547524138542e-06, "loss": 0.554, "step": 246435 }, { "epoch": 2.73, "learning_rate": 4.520624797000029e-06, "loss": 0.5265, "step": 246440 }, { "epoch": 2.73, "learning_rate": 4.519702069861517e-06, "loss": 0.5454, "step": 246445 }, { "epoch": 2.73, "learning_rate": 4.518779342723005e-06, "loss": 0.5675, "step": 246450 }, { "epoch": 2.73, "learning_rate": 4.517856615584493e-06, "loss": 0.5977, "step": 246455 }, { "epoch": 2.73, "learning_rate": 4.516933888445981e-06, "loss": 0.5597, "step": 246460 }, { "epoch": 2.73, "learning_rate": 4.516011161307468e-06, "loss": 0.5943, "step": 246465 }, { "epoch": 2.73, "learning_rate": 4.515088434168955e-06, "loss": 0.5536, "step": 246470 }, { "epoch": 2.73, "learning_rate": 4.5141657070304425e-06, "loss": 0.5716, "step": 246475 }, { "epoch": 2.73, "learning_rate": 4.51324297989193e-06, "loss": 0.6108, "step": 246480 }, { "epoch": 2.73, "learning_rate": 4.512320252753418e-06, "loss": 0.5203, "step": 246485 }, { "epoch": 2.73, "learning_rate": 4.511397525614905e-06, "loss": 0.5989, "step": 246490 }, { "epoch": 2.73, "learning_rate": 4.510474798476393e-06, "loss": 0.5994, "step": 246495 }, { "epoch": 2.73, "learning_rate": 4.509552071337881e-06, "loss": 0.5308, "step": 246500 }, { "epoch": 2.73, "learning_rate": 4.508629344199369e-06, "loss": 0.5241, "step": 246505 }, { "epoch": 2.73, "learning_rate": 4.507706617060856e-06, "loss": 0.6096, "step": 246510 }, { "epoch": 2.73, "learning_rate": 4.506783889922344e-06, "loss": 0.581, "step": 246515 }, { "epoch": 2.73, "learning_rate": 4.505861162783831e-06, "loss": 0.5422, "step": 246520 }, { "epoch": 2.73, "learning_rate": 4.5049384356453184e-06, "loss": 0.5572, "step": 246525 }, { "epoch": 2.73, "learning_rate": 4.504015708506806e-06, "loss": 0.5683, "step": 246530 }, { "epoch": 2.73, "learning_rate": 4.503092981368294e-06, "loss": 0.6079, "step": 246535 }, { "epoch": 2.73, "learning_rate": 4.502170254229782e-06, "loss": 0.5818, "step": 246540 }, { "epoch": 2.73, "learning_rate": 4.501247527091269e-06, "loss": 0.5764, "step": 246545 }, { "epoch": 2.73, "learning_rate": 4.500324799952756e-06, "loss": 0.6185, "step": 246550 }, { "epoch": 2.73, "learning_rate": 4.499402072814244e-06, "loss": 0.558, "step": 246555 }, { "epoch": 2.73, "learning_rate": 4.498479345675732e-06, "loss": 0.5681, "step": 246560 }, { "epoch": 2.73, "learning_rate": 4.4975566185372195e-06, "loss": 0.559, "step": 246565 }, { "epoch": 2.73, "learning_rate": 4.496633891398707e-06, "loss": 0.5491, "step": 246570 }, { "epoch": 2.73, "learning_rate": 4.495711164260194e-06, "loss": 0.5495, "step": 246575 }, { "epoch": 2.73, "learning_rate": 4.494788437121682e-06, "loss": 0.5555, "step": 246580 }, { "epoch": 2.73, "learning_rate": 4.493865709983169e-06, "loss": 0.5943, "step": 246585 }, { "epoch": 2.73, "learning_rate": 4.492942982844657e-06, "loss": 0.5675, "step": 246590 }, { "epoch": 2.73, "learning_rate": 4.492020255706145e-06, "loss": 0.5761, "step": 246595 }, { "epoch": 2.73, "learning_rate": 4.491097528567633e-06, "loss": 0.5992, "step": 246600 }, { "epoch": 2.73, "learning_rate": 4.49017480142912e-06, "loss": 0.5579, "step": 246605 }, { "epoch": 2.73, "learning_rate": 4.489252074290608e-06, "loss": 0.6335, "step": 246610 }, { "epoch": 2.73, "learning_rate": 4.4883293471520955e-06, "loss": 0.5598, "step": 246615 }, { "epoch": 2.73, "learning_rate": 4.487406620013583e-06, "loss": 0.5794, "step": 246620 }, { "epoch": 2.73, "learning_rate": 4.48648389287507e-06, "loss": 0.609, "step": 246625 }, { "epoch": 2.73, "learning_rate": 4.485561165736557e-06, "loss": 0.5745, "step": 246630 }, { "epoch": 2.73, "learning_rate": 4.484638438598045e-06, "loss": 0.5833, "step": 246635 }, { "epoch": 2.73, "learning_rate": 4.483715711459533e-06, "loss": 0.5718, "step": 246640 }, { "epoch": 2.73, "learning_rate": 4.482792984321021e-06, "loss": 0.6137, "step": 246645 }, { "epoch": 2.73, "learning_rate": 4.481870257182509e-06, "loss": 0.5459, "step": 246650 }, { "epoch": 2.73, "learning_rate": 4.480947530043996e-06, "loss": 0.5805, "step": 246655 }, { "epoch": 2.73, "learning_rate": 4.480024802905483e-06, "loss": 0.5509, "step": 246660 }, { "epoch": 2.73, "learning_rate": 4.479102075766971e-06, "loss": 0.5645, "step": 246665 }, { "epoch": 2.73, "learning_rate": 4.4781793486284584e-06, "loss": 0.662, "step": 246670 }, { "epoch": 2.73, "learning_rate": 4.477256621489946e-06, "loss": 0.5274, "step": 246675 }, { "epoch": 2.73, "learning_rate": 4.476333894351434e-06, "loss": 0.6253, "step": 246680 }, { "epoch": 2.73, "learning_rate": 4.475411167212921e-06, "loss": 0.5704, "step": 246685 }, { "epoch": 2.73, "learning_rate": 4.474488440074409e-06, "loss": 0.5609, "step": 246690 }, { "epoch": 2.73, "learning_rate": 4.473565712935897e-06, "loss": 0.5742, "step": 246695 }, { "epoch": 2.73, "learning_rate": 4.472642985797384e-06, "loss": 0.5948, "step": 246700 }, { "epoch": 2.73, "learning_rate": 4.471720258658872e-06, "loss": 0.575, "step": 246705 }, { "epoch": 2.73, "learning_rate": 4.4707975315203595e-06, "loss": 0.5233, "step": 246710 }, { "epoch": 2.73, "learning_rate": 4.4698748043818465e-06, "loss": 0.5937, "step": 246715 }, { "epoch": 2.73, "learning_rate": 4.468952077243334e-06, "loss": 0.5549, "step": 246720 }, { "epoch": 2.73, "learning_rate": 4.468029350104822e-06, "loss": 0.598, "step": 246725 }, { "epoch": 2.73, "learning_rate": 4.46710662296631e-06, "loss": 0.5973, "step": 246730 }, { "epoch": 2.73, "learning_rate": 4.466183895827797e-06, "loss": 0.6014, "step": 246735 }, { "epoch": 2.73, "learning_rate": 4.465261168689285e-06, "loss": 0.5636, "step": 246740 }, { "epoch": 2.73, "learning_rate": 4.464338441550772e-06, "loss": 0.5704, "step": 246745 }, { "epoch": 2.73, "learning_rate": 4.46341571441226e-06, "loss": 0.5988, "step": 246750 }, { "epoch": 2.73, "learning_rate": 4.462492987273748e-06, "loss": 0.5184, "step": 246755 }, { "epoch": 2.73, "learning_rate": 4.4615702601352355e-06, "loss": 0.5617, "step": 246760 }, { "epoch": 2.73, "learning_rate": 4.4606475329967225e-06, "loss": 0.6112, "step": 246765 }, { "epoch": 2.73, "learning_rate": 4.45972480585821e-06, "loss": 0.6126, "step": 246770 }, { "epoch": 2.73, "learning_rate": 4.458802078719697e-06, "loss": 0.5619, "step": 246775 }, { "epoch": 2.73, "learning_rate": 4.457879351581185e-06, "loss": 0.5884, "step": 246780 }, { "epoch": 2.73, "learning_rate": 4.456956624442673e-06, "loss": 0.5706, "step": 246785 }, { "epoch": 2.73, "learning_rate": 4.456033897304161e-06, "loss": 0.5836, "step": 246790 }, { "epoch": 2.73, "learning_rate": 4.455111170165648e-06, "loss": 0.5508, "step": 246795 }, { "epoch": 2.73, "learning_rate": 4.454188443027136e-06, "loss": 0.5551, "step": 246800 }, { "epoch": 2.73, "learning_rate": 4.4532657158886236e-06, "loss": 0.5793, "step": 246805 }, { "epoch": 2.73, "learning_rate": 4.452342988750111e-06, "loss": 0.6045, "step": 246810 }, { "epoch": 2.73, "learning_rate": 4.451420261611598e-06, "loss": 0.5921, "step": 246815 }, { "epoch": 2.73, "learning_rate": 4.450497534473086e-06, "loss": 0.5992, "step": 246820 }, { "epoch": 2.73, "learning_rate": 4.449574807334573e-06, "loss": 0.5872, "step": 246825 }, { "epoch": 2.73, "learning_rate": 4.448652080196061e-06, "loss": 0.5589, "step": 246830 }, { "epoch": 2.73, "learning_rate": 4.447729353057549e-06, "loss": 0.5609, "step": 246835 }, { "epoch": 2.73, "learning_rate": 4.446806625919037e-06, "loss": 0.5982, "step": 246840 }, { "epoch": 2.73, "learning_rate": 4.445883898780525e-06, "loss": 0.5756, "step": 246845 }, { "epoch": 2.73, "learning_rate": 4.444961171642012e-06, "loss": 0.6067, "step": 246850 }, { "epoch": 2.73, "learning_rate": 4.444038444503499e-06, "loss": 0.5471, "step": 246855 }, { "epoch": 2.73, "learning_rate": 4.4431157173649865e-06, "loss": 0.5578, "step": 246860 }, { "epoch": 2.73, "learning_rate": 4.442192990226474e-06, "loss": 0.6007, "step": 246865 }, { "epoch": 2.73, "learning_rate": 4.441270263087962e-06, "loss": 0.5746, "step": 246870 }, { "epoch": 2.73, "learning_rate": 4.44034753594945e-06, "loss": 0.5544, "step": 246875 }, { "epoch": 2.73, "learning_rate": 4.439424808810937e-06, "loss": 0.6134, "step": 246880 }, { "epoch": 2.73, "learning_rate": 4.438502081672425e-06, "loss": 0.5609, "step": 246885 }, { "epoch": 2.73, "learning_rate": 4.437579354533912e-06, "loss": 0.5704, "step": 246890 }, { "epoch": 2.73, "learning_rate": 4.4366566273954e-06, "loss": 0.5781, "step": 246895 }, { "epoch": 2.73, "learning_rate": 4.435733900256888e-06, "loss": 0.5951, "step": 246900 }, { "epoch": 2.73, "learning_rate": 4.434811173118375e-06, "loss": 0.6006, "step": 246905 }, { "epoch": 2.73, "learning_rate": 4.4338884459798625e-06, "loss": 0.553, "step": 246910 }, { "epoch": 2.73, "learning_rate": 4.43296571884135e-06, "loss": 0.5649, "step": 246915 }, { "epoch": 2.73, "learning_rate": 4.432042991702838e-06, "loss": 0.5706, "step": 246920 }, { "epoch": 2.73, "learning_rate": 4.431120264564325e-06, "loss": 0.5457, "step": 246925 }, { "epoch": 2.73, "learning_rate": 4.430197537425813e-06, "loss": 0.6125, "step": 246930 }, { "epoch": 2.73, "learning_rate": 4.4292748102873e-06, "loss": 0.5595, "step": 246935 }, { "epoch": 2.73, "learning_rate": 4.428352083148788e-06, "loss": 0.6365, "step": 246940 }, { "epoch": 2.73, "learning_rate": 4.427429356010276e-06, "loss": 0.6102, "step": 246945 }, { "epoch": 2.73, "learning_rate": 4.4265066288717636e-06, "loss": 0.5321, "step": 246950 }, { "epoch": 2.73, "learning_rate": 4.425583901733251e-06, "loss": 0.5989, "step": 246955 }, { "epoch": 2.73, "learning_rate": 4.424661174594738e-06, "loss": 0.594, "step": 246960 }, { "epoch": 2.73, "learning_rate": 4.423738447456225e-06, "loss": 0.5749, "step": 246965 }, { "epoch": 2.73, "learning_rate": 4.422815720317713e-06, "loss": 0.6319, "step": 246970 }, { "epoch": 2.73, "learning_rate": 4.421892993179201e-06, "loss": 0.5702, "step": 246975 }, { "epoch": 2.73, "learning_rate": 4.420970266040689e-06, "loss": 0.5899, "step": 246980 }, { "epoch": 2.73, "learning_rate": 4.420047538902177e-06, "loss": 0.5662, "step": 246985 }, { "epoch": 2.73, "learning_rate": 4.419124811763664e-06, "loss": 0.5477, "step": 246990 }, { "epoch": 2.73, "learning_rate": 4.418202084625152e-06, "loss": 0.5824, "step": 246995 }, { "epoch": 2.73, "learning_rate": 4.4172793574866395e-06, "loss": 0.5894, "step": 247000 }, { "epoch": 2.73, "eval_loss": 0.5456817746162415, "eval_runtime": 69.741, "eval_samples_per_second": 28.678, "eval_steps_per_second": 14.339, "step": 247000 }, { "epoch": 2.74, "learning_rate": 4.4163566303481265e-06, "loss": 0.6121, "step": 247005 }, { "epoch": 2.74, "learning_rate": 4.415433903209614e-06, "loss": 0.5666, "step": 247010 }, { "epoch": 2.74, "learning_rate": 4.414511176071102e-06, "loss": 0.5652, "step": 247015 }, { "epoch": 2.74, "learning_rate": 4.413588448932589e-06, "loss": 0.6231, "step": 247020 }, { "epoch": 2.74, "learning_rate": 4.412665721794077e-06, "loss": 0.5568, "step": 247025 }, { "epoch": 2.74, "learning_rate": 4.411742994655565e-06, "loss": 0.6043, "step": 247030 }, { "epoch": 2.74, "learning_rate": 4.410820267517053e-06, "loss": 0.5821, "step": 247035 }, { "epoch": 2.74, "learning_rate": 4.40989754037854e-06, "loss": 0.5978, "step": 247040 }, { "epoch": 2.74, "learning_rate": 4.408974813240027e-06, "loss": 0.5615, "step": 247045 }, { "epoch": 2.74, "learning_rate": 4.408052086101515e-06, "loss": 0.6235, "step": 247050 }, { "epoch": 2.74, "learning_rate": 4.4071293589630024e-06, "loss": 0.55, "step": 247055 }, { "epoch": 2.74, "learning_rate": 4.40620663182449e-06, "loss": 0.6282, "step": 247060 }, { "epoch": 2.74, "learning_rate": 4.405283904685978e-06, "loss": 0.5422, "step": 247065 }, { "epoch": 2.74, "learning_rate": 4.404361177547465e-06, "loss": 0.5659, "step": 247070 }, { "epoch": 2.74, "learning_rate": 4.403438450408953e-06, "loss": 0.5774, "step": 247075 }, { "epoch": 2.74, "learning_rate": 4.40251572327044e-06, "loss": 0.5777, "step": 247080 }, { "epoch": 2.74, "learning_rate": 4.401592996131928e-06, "loss": 0.5474, "step": 247085 }, { "epoch": 2.74, "learning_rate": 4.400670268993416e-06, "loss": 0.5402, "step": 247090 }, { "epoch": 2.74, "learning_rate": 4.3997475418549035e-06, "loss": 0.6091, "step": 247095 }, { "epoch": 2.74, "learning_rate": 4.3988248147163905e-06, "loss": 0.6138, "step": 247100 }, { "epoch": 2.74, "learning_rate": 4.397902087577878e-06, "loss": 0.5901, "step": 247105 }, { "epoch": 2.74, "learning_rate": 4.396979360439366e-06, "loss": 0.5398, "step": 247110 }, { "epoch": 2.74, "learning_rate": 4.396056633300853e-06, "loss": 0.5432, "step": 247115 }, { "epoch": 2.74, "learning_rate": 4.395133906162341e-06, "loss": 0.5833, "step": 247120 }, { "epoch": 2.74, "learning_rate": 4.394211179023829e-06, "loss": 0.5593, "step": 247125 }, { "epoch": 2.74, "learning_rate": 4.393288451885316e-06, "loss": 0.6172, "step": 247130 }, { "epoch": 2.74, "learning_rate": 4.392365724746804e-06, "loss": 0.5778, "step": 247135 }, { "epoch": 2.74, "learning_rate": 4.391442997608292e-06, "loss": 0.5887, "step": 247140 }, { "epoch": 2.74, "learning_rate": 4.3905202704697795e-06, "loss": 0.5862, "step": 247145 }, { "epoch": 2.74, "learning_rate": 4.389597543331267e-06, "loss": 0.587, "step": 247150 }, { "epoch": 2.74, "learning_rate": 4.388674816192754e-06, "loss": 0.601, "step": 247155 }, { "epoch": 2.74, "learning_rate": 4.387752089054241e-06, "loss": 0.5776, "step": 247160 }, { "epoch": 2.74, "learning_rate": 4.386829361915729e-06, "loss": 0.5962, "step": 247165 }, { "epoch": 2.74, "learning_rate": 4.385906634777217e-06, "loss": 0.611, "step": 247170 }, { "epoch": 2.74, "learning_rate": 4.384983907638705e-06, "loss": 0.5402, "step": 247175 }, { "epoch": 2.74, "learning_rate": 4.384061180500192e-06, "loss": 0.6423, "step": 247180 }, { "epoch": 2.74, "learning_rate": 4.38313845336168e-06, "loss": 0.5732, "step": 247185 }, { "epoch": 2.74, "learning_rate": 4.382215726223168e-06, "loss": 0.6061, "step": 247190 }, { "epoch": 2.74, "learning_rate": 4.381292999084655e-06, "loss": 0.5874, "step": 247195 }, { "epoch": 2.74, "learning_rate": 4.3803702719461424e-06, "loss": 0.6016, "step": 247200 }, { "epoch": 2.74, "learning_rate": 4.37944754480763e-06, "loss": 0.5573, "step": 247205 }, { "epoch": 2.74, "learning_rate": 4.378524817669117e-06, "loss": 0.587, "step": 247210 }, { "epoch": 2.74, "learning_rate": 4.377602090530605e-06, "loss": 0.58, "step": 247215 }, { "epoch": 2.74, "learning_rate": 4.376679363392093e-06, "loss": 0.5837, "step": 247220 }, { "epoch": 2.74, "learning_rate": 4.375756636253581e-06, "loss": 0.6097, "step": 247225 }, { "epoch": 2.74, "learning_rate": 4.374833909115068e-06, "loss": 0.5551, "step": 247230 }, { "epoch": 2.74, "learning_rate": 4.373911181976556e-06, "loss": 0.5448, "step": 247235 }, { "epoch": 2.74, "learning_rate": 4.372988454838043e-06, "loss": 0.5647, "step": 247240 }, { "epoch": 2.74, "learning_rate": 4.3720657276995305e-06, "loss": 0.5889, "step": 247245 }, { "epoch": 2.74, "learning_rate": 4.371143000561018e-06, "loss": 0.5905, "step": 247250 }, { "epoch": 2.74, "learning_rate": 4.370220273422506e-06, "loss": 0.5309, "step": 247255 }, { "epoch": 2.74, "learning_rate": 4.369297546283994e-06, "loss": 0.6127, "step": 247260 }, { "epoch": 2.74, "learning_rate": 4.368374819145481e-06, "loss": 0.5779, "step": 247265 }, { "epoch": 2.74, "learning_rate": 4.367452092006968e-06, "loss": 0.6525, "step": 247270 }, { "epoch": 2.74, "learning_rate": 4.366529364868456e-06, "loss": 0.6098, "step": 247275 }, { "epoch": 2.74, "learning_rate": 4.365606637729944e-06, "loss": 0.6008, "step": 247280 }, { "epoch": 2.74, "learning_rate": 4.364683910591432e-06, "loss": 0.5332, "step": 247285 }, { "epoch": 2.74, "learning_rate": 4.3637611834529195e-06, "loss": 0.5659, "step": 247290 }, { "epoch": 2.74, "learning_rate": 4.3628384563144065e-06, "loss": 0.5892, "step": 247295 }, { "epoch": 2.74, "learning_rate": 4.361915729175894e-06, "loss": 0.5631, "step": 247300 }, { "epoch": 2.74, "learning_rate": 4.360993002037381e-06, "loss": 0.5582, "step": 247305 }, { "epoch": 2.74, "learning_rate": 4.360070274898869e-06, "loss": 0.5454, "step": 247310 }, { "epoch": 2.74, "learning_rate": 4.359147547760357e-06, "loss": 0.6169, "step": 247315 }, { "epoch": 2.74, "learning_rate": 4.358224820621844e-06, "loss": 0.6013, "step": 247320 }, { "epoch": 2.74, "learning_rate": 4.357302093483332e-06, "loss": 0.578, "step": 247325 }, { "epoch": 2.74, "learning_rate": 4.35637936634482e-06, "loss": 0.5817, "step": 247330 }, { "epoch": 2.74, "learning_rate": 4.3554566392063076e-06, "loss": 0.5297, "step": 247335 }, { "epoch": 2.74, "learning_rate": 4.3545339120677954e-06, "loss": 0.5978, "step": 247340 }, { "epoch": 2.74, "learning_rate": 4.3536111849292824e-06, "loss": 0.574, "step": 247345 }, { "epoch": 2.74, "learning_rate": 4.3526884577907694e-06, "loss": 0.5455, "step": 247350 }, { "epoch": 2.74, "learning_rate": 4.351765730652257e-06, "loss": 0.5978, "step": 247355 }, { "epoch": 2.74, "learning_rate": 4.350843003513745e-06, "loss": 0.5446, "step": 247360 }, { "epoch": 2.74, "learning_rate": 4.349920276375233e-06, "loss": 0.535, "step": 247365 }, { "epoch": 2.74, "learning_rate": 4.348997549236721e-06, "loss": 0.6015, "step": 247370 }, { "epoch": 2.74, "learning_rate": 4.348074822098208e-06, "loss": 0.5865, "step": 247375 }, { "epoch": 2.74, "learning_rate": 4.347152094959696e-06, "loss": 0.6043, "step": 247380 }, { "epoch": 2.74, "learning_rate": 4.346229367821183e-06, "loss": 0.5956, "step": 247385 }, { "epoch": 2.74, "learning_rate": 4.3453066406826705e-06, "loss": 0.5678, "step": 247390 }, { "epoch": 2.74, "learning_rate": 4.344383913544158e-06, "loss": 0.5958, "step": 247395 }, { "epoch": 2.74, "learning_rate": 4.343461186405646e-06, "loss": 0.574, "step": 247400 }, { "epoch": 2.74, "learning_rate": 4.342538459267133e-06, "loss": 0.5908, "step": 247405 }, { "epoch": 2.74, "learning_rate": 4.341615732128621e-06, "loss": 0.574, "step": 247410 }, { "epoch": 2.74, "learning_rate": 4.340693004990109e-06, "loss": 0.6091, "step": 247415 }, { "epoch": 2.74, "learning_rate": 4.339770277851596e-06, "loss": 0.5838, "step": 247420 }, { "epoch": 2.74, "learning_rate": 4.338847550713084e-06, "loss": 0.5992, "step": 247425 }, { "epoch": 2.74, "learning_rate": 4.337924823574571e-06, "loss": 0.6017, "step": 247430 }, { "epoch": 2.74, "learning_rate": 4.337002096436059e-06, "loss": 0.6013, "step": 247435 }, { "epoch": 2.74, "learning_rate": 4.3360793692975465e-06, "loss": 0.5915, "step": 247440 }, { "epoch": 2.74, "learning_rate": 4.335156642159034e-06, "loss": 0.6129, "step": 247445 }, { "epoch": 2.74, "learning_rate": 4.334233915020522e-06, "loss": 0.5818, "step": 247450 }, { "epoch": 2.74, "learning_rate": 4.333311187882009e-06, "loss": 0.5755, "step": 247455 }, { "epoch": 2.74, "learning_rate": 4.332388460743496e-06, "loss": 0.6163, "step": 247460 }, { "epoch": 2.74, "learning_rate": 4.331465733604984e-06, "loss": 0.5878, "step": 247465 }, { "epoch": 2.74, "learning_rate": 4.330543006466472e-06, "loss": 0.5457, "step": 247470 }, { "epoch": 2.74, "learning_rate": 4.32962027932796e-06, "loss": 0.5961, "step": 247475 }, { "epoch": 2.74, "learning_rate": 4.3286975521894476e-06, "loss": 0.6054, "step": 247480 }, { "epoch": 2.74, "learning_rate": 4.3277748250509346e-06, "loss": 0.5855, "step": 247485 }, { "epoch": 2.74, "learning_rate": 4.326852097912422e-06, "loss": 0.5688, "step": 247490 }, { "epoch": 2.74, "learning_rate": 4.325929370773909e-06, "loss": 0.5175, "step": 247495 }, { "epoch": 2.74, "learning_rate": 4.325006643635397e-06, "loss": 0.5438, "step": 247500 }, { "epoch": 2.74, "learning_rate": 4.324083916496885e-06, "loss": 0.5807, "step": 247505 }, { "epoch": 2.74, "learning_rate": 4.323161189358373e-06, "loss": 0.5409, "step": 247510 }, { "epoch": 2.74, "learning_rate": 4.32223846221986e-06, "loss": 0.5525, "step": 247515 }, { "epoch": 2.74, "learning_rate": 4.321315735081348e-06, "loss": 0.6235, "step": 247520 }, { "epoch": 2.74, "learning_rate": 4.320393007942836e-06, "loss": 0.5734, "step": 247525 }, { "epoch": 2.74, "learning_rate": 4.3194702808043235e-06, "loss": 0.5898, "step": 247530 }, { "epoch": 2.74, "learning_rate": 4.3185475536658105e-06, "loss": 0.5829, "step": 247535 }, { "epoch": 2.74, "learning_rate": 4.317624826527298e-06, "loss": 0.6068, "step": 247540 }, { "epoch": 2.74, "learning_rate": 4.316702099388785e-06, "loss": 0.6076, "step": 247545 }, { "epoch": 2.74, "learning_rate": 4.315779372250273e-06, "loss": 0.5912, "step": 247550 }, { "epoch": 2.74, "learning_rate": 4.314856645111761e-06, "loss": 0.5749, "step": 247555 }, { "epoch": 2.74, "learning_rate": 4.313933917973249e-06, "loss": 0.5522, "step": 247560 }, { "epoch": 2.74, "learning_rate": 4.313011190834737e-06, "loss": 0.6306, "step": 247565 }, { "epoch": 2.74, "learning_rate": 4.312088463696223e-06, "loss": 0.5417, "step": 247570 }, { "epoch": 2.74, "learning_rate": 4.311165736557711e-06, "loss": 0.5962, "step": 247575 }, { "epoch": 2.74, "learning_rate": 4.310243009419199e-06, "loss": 0.5874, "step": 247580 }, { "epoch": 2.74, "learning_rate": 4.3093202822806865e-06, "loss": 0.5706, "step": 247585 }, { "epoch": 2.74, "learning_rate": 4.308397555142174e-06, "loss": 0.5792, "step": 247590 }, { "epoch": 2.74, "learning_rate": 4.307474828003661e-06, "loss": 0.5985, "step": 247595 }, { "epoch": 2.74, "learning_rate": 4.306552100865149e-06, "loss": 0.5316, "step": 247600 }, { "epoch": 2.74, "learning_rate": 4.305629373726637e-06, "loss": 0.5831, "step": 247605 }, { "epoch": 2.74, "learning_rate": 4.304706646588124e-06, "loss": 0.5534, "step": 247610 }, { "epoch": 2.74, "learning_rate": 4.303783919449612e-06, "loss": 0.6163, "step": 247615 }, { "epoch": 2.74, "learning_rate": 4.3028611923111e-06, "loss": 0.5198, "step": 247620 }, { "epoch": 2.74, "learning_rate": 4.301938465172587e-06, "loss": 0.5161, "step": 247625 }, { "epoch": 2.74, "learning_rate": 4.3010157380340746e-06, "loss": 0.581, "step": 247630 }, { "epoch": 2.74, "learning_rate": 4.300093010895562e-06, "loss": 0.5633, "step": 247635 }, { "epoch": 2.74, "learning_rate": 4.29917028375705e-06, "loss": 0.5721, "step": 247640 }, { "epoch": 2.74, "learning_rate": 4.298247556618538e-06, "loss": 0.616, "step": 247645 }, { "epoch": 2.74, "learning_rate": 4.297324829480025e-06, "loss": 0.5741, "step": 247650 }, { "epoch": 2.74, "learning_rate": 4.296402102341512e-06, "loss": 0.5492, "step": 247655 }, { "epoch": 2.74, "learning_rate": 4.295479375203e-06, "loss": 0.583, "step": 247660 }, { "epoch": 2.74, "learning_rate": 4.294556648064488e-06, "loss": 0.616, "step": 247665 }, { "epoch": 2.74, "learning_rate": 4.293633920925976e-06, "loss": 0.6101, "step": 247670 }, { "epoch": 2.74, "learning_rate": 4.2927111937874635e-06, "loss": 0.6363, "step": 247675 }, { "epoch": 2.74, "learning_rate": 4.2917884666489505e-06, "loss": 0.569, "step": 247680 }, { "epoch": 2.74, "learning_rate": 4.2908657395104375e-06, "loss": 0.6511, "step": 247685 }, { "epoch": 2.74, "learning_rate": 4.289943012371925e-06, "loss": 0.5458, "step": 247690 }, { "epoch": 2.74, "learning_rate": 4.289020285233413e-06, "loss": 0.6333, "step": 247695 }, { "epoch": 2.74, "learning_rate": 4.288097558094901e-06, "loss": 0.5769, "step": 247700 }, { "epoch": 2.74, "learning_rate": 4.287174830956388e-06, "loss": 0.5473, "step": 247705 }, { "epoch": 2.74, "learning_rate": 4.286252103817876e-06, "loss": 0.679, "step": 247710 }, { "epoch": 2.74, "learning_rate": 4.285329376679364e-06, "loss": 0.6208, "step": 247715 }, { "epoch": 2.74, "learning_rate": 4.284406649540852e-06, "loss": 0.592, "step": 247720 }, { "epoch": 2.74, "learning_rate": 4.283483922402339e-06, "loss": 0.5869, "step": 247725 }, { "epoch": 2.74, "learning_rate": 4.2825611952638264e-06, "loss": 0.6208, "step": 247730 }, { "epoch": 2.74, "learning_rate": 4.2816384681253134e-06, "loss": 0.6152, "step": 247735 }, { "epoch": 2.74, "learning_rate": 4.280715740986801e-06, "loss": 0.6096, "step": 247740 }, { "epoch": 2.74, "learning_rate": 4.279793013848289e-06, "loss": 0.5653, "step": 247745 }, { "epoch": 2.74, "learning_rate": 4.278870286709777e-06, "loss": 0.5689, "step": 247750 }, { "epoch": 2.74, "learning_rate": 4.277947559571265e-06, "loss": 0.6148, "step": 247755 }, { "epoch": 2.74, "learning_rate": 4.277024832432752e-06, "loss": 0.5598, "step": 247760 }, { "epoch": 2.74, "learning_rate": 4.276102105294239e-06, "loss": 0.5844, "step": 247765 }, { "epoch": 2.74, "learning_rate": 4.275179378155727e-06, "loss": 0.5734, "step": 247770 }, { "epoch": 2.74, "learning_rate": 4.2742566510172145e-06, "loss": 0.5527, "step": 247775 }, { "epoch": 2.74, "learning_rate": 4.273333923878702e-06, "loss": 0.5535, "step": 247780 }, { "epoch": 2.74, "learning_rate": 4.27241119674019e-06, "loss": 0.6025, "step": 247785 }, { "epoch": 2.74, "learning_rate": 4.271488469601677e-06, "loss": 0.5896, "step": 247790 }, { "epoch": 2.74, "learning_rate": 4.270565742463165e-06, "loss": 0.5745, "step": 247795 }, { "epoch": 2.74, "learning_rate": 4.269643015324652e-06, "loss": 0.5383, "step": 247800 }, { "epoch": 2.74, "learning_rate": 4.26872028818614e-06, "loss": 0.5683, "step": 247805 }, { "epoch": 2.74, "learning_rate": 4.267797561047628e-06, "loss": 0.5928, "step": 247810 }, { "epoch": 2.74, "learning_rate": 4.266874833909116e-06, "loss": 0.565, "step": 247815 }, { "epoch": 2.74, "learning_rate": 4.265952106770603e-06, "loss": 0.5417, "step": 247820 }, { "epoch": 2.74, "learning_rate": 4.2650293796320905e-06, "loss": 0.5539, "step": 247825 }, { "epoch": 2.74, "learning_rate": 4.264106652493578e-06, "loss": 0.5831, "step": 247830 }, { "epoch": 2.74, "learning_rate": 4.263183925355066e-06, "loss": 0.6333, "step": 247835 }, { "epoch": 2.74, "learning_rate": 4.262261198216553e-06, "loss": 0.5624, "step": 247840 }, { "epoch": 2.74, "learning_rate": 4.26133847107804e-06, "loss": 0.6302, "step": 247845 }, { "epoch": 2.74, "learning_rate": 4.260415743939528e-06, "loss": 0.5639, "step": 247850 }, { "epoch": 2.74, "learning_rate": 4.259493016801016e-06, "loss": 0.5567, "step": 247855 }, { "epoch": 2.74, "learning_rate": 4.258570289662504e-06, "loss": 0.5738, "step": 247860 }, { "epoch": 2.74, "learning_rate": 4.257647562523992e-06, "loss": 0.5944, "step": 247865 }, { "epoch": 2.74, "learning_rate": 4.256724835385479e-06, "loss": 0.5651, "step": 247870 }, { "epoch": 2.74, "learning_rate": 4.255802108246966e-06, "loss": 0.5399, "step": 247875 }, { "epoch": 2.74, "learning_rate": 4.2548793811084534e-06, "loss": 0.5676, "step": 247880 }, { "epoch": 2.74, "learning_rate": 4.253956653969941e-06, "loss": 0.5563, "step": 247885 }, { "epoch": 2.74, "learning_rate": 4.253033926831429e-06, "loss": 0.5656, "step": 247890 }, { "epoch": 2.74, "learning_rate": 4.252111199692917e-06, "loss": 0.5547, "step": 247895 }, { "epoch": 2.74, "learning_rate": 4.251188472554404e-06, "loss": 0.6315, "step": 247900 }, { "epoch": 2.74, "learning_rate": 4.250265745415892e-06, "loss": 0.5827, "step": 247905 }, { "epoch": 2.75, "learning_rate": 4.24934301827738e-06, "loss": 0.6113, "step": 247910 }, { "epoch": 2.75, "learning_rate": 4.248420291138867e-06, "loss": 0.5645, "step": 247915 }, { "epoch": 2.75, "learning_rate": 4.2474975640003545e-06, "loss": 0.5713, "step": 247920 }, { "epoch": 2.75, "learning_rate": 4.246574836861842e-06, "loss": 0.6256, "step": 247925 }, { "epoch": 2.75, "learning_rate": 4.245652109723329e-06, "loss": 0.5835, "step": 247930 }, { "epoch": 2.75, "learning_rate": 4.244729382584817e-06, "loss": 0.5643, "step": 247935 }, { "epoch": 2.75, "learning_rate": 4.243806655446305e-06, "loss": 0.5855, "step": 247940 }, { "epoch": 2.75, "learning_rate": 4.242883928307793e-06, "loss": 0.6365, "step": 247945 }, { "epoch": 2.75, "learning_rate": 4.24196120116928e-06, "loss": 0.5348, "step": 247950 }, { "epoch": 2.75, "learning_rate": 4.241038474030768e-06, "loss": 0.5941, "step": 247955 }, { "epoch": 2.75, "learning_rate": 4.240115746892255e-06, "loss": 0.5577, "step": 247960 }, { "epoch": 2.75, "learning_rate": 4.239193019753743e-06, "loss": 0.5711, "step": 247965 }, { "epoch": 2.75, "learning_rate": 4.2382702926152305e-06, "loss": 0.5823, "step": 247970 }, { "epoch": 2.75, "learning_rate": 4.237347565476718e-06, "loss": 0.5633, "step": 247975 }, { "epoch": 2.75, "learning_rate": 4.236424838338205e-06, "loss": 0.5894, "step": 247980 }, { "epoch": 2.75, "learning_rate": 4.235502111199693e-06, "loss": 0.5605, "step": 247985 }, { "epoch": 2.75, "learning_rate": 4.23457938406118e-06, "loss": 0.5716, "step": 247990 }, { "epoch": 2.75, "learning_rate": 4.233656656922668e-06, "loss": 0.5274, "step": 247995 }, { "epoch": 2.75, "learning_rate": 4.232733929784156e-06, "loss": 0.5665, "step": 248000 }, { "epoch": 2.75, "eval_loss": 0.5578925609588623, "eval_runtime": 69.7874, "eval_samples_per_second": 28.658, "eval_steps_per_second": 14.329, "step": 248000 }, { "epoch": 2.75, "learning_rate": 4.231811202645644e-06, "loss": 0.5687, "step": 248005 }, { "epoch": 2.75, "learning_rate": 4.230888475507131e-06, "loss": 0.5507, "step": 248010 }, { "epoch": 2.75, "learning_rate": 4.2299657483686186e-06, "loss": 0.5449, "step": 248015 }, { "epoch": 2.75, "learning_rate": 4.229043021230106e-06, "loss": 0.5873, "step": 248020 }, { "epoch": 2.75, "learning_rate": 4.228120294091594e-06, "loss": 0.5571, "step": 248025 }, { "epoch": 2.75, "learning_rate": 4.227197566953081e-06, "loss": 0.6264, "step": 248030 }, { "epoch": 2.75, "learning_rate": 4.226274839814569e-06, "loss": 0.5556, "step": 248035 }, { "epoch": 2.75, "learning_rate": 4.225352112676056e-06, "loss": 0.6344, "step": 248040 }, { "epoch": 2.75, "learning_rate": 4.224429385537544e-06, "loss": 0.5912, "step": 248045 }, { "epoch": 2.75, "learning_rate": 4.223506658399032e-06, "loss": 0.6086, "step": 248050 }, { "epoch": 2.75, "learning_rate": 4.22258393126052e-06, "loss": 0.6199, "step": 248055 }, { "epoch": 2.75, "learning_rate": 4.2216612041220075e-06, "loss": 0.5824, "step": 248060 }, { "epoch": 2.75, "learning_rate": 4.2207384769834945e-06, "loss": 0.6364, "step": 248065 }, { "epoch": 2.75, "learning_rate": 4.2198157498449815e-06, "loss": 0.5893, "step": 248070 }, { "epoch": 2.75, "learning_rate": 4.218893022706469e-06, "loss": 0.5857, "step": 248075 }, { "epoch": 2.75, "learning_rate": 4.217970295567957e-06, "loss": 0.5348, "step": 248080 }, { "epoch": 2.75, "learning_rate": 4.217047568429445e-06, "loss": 0.5553, "step": 248085 }, { "epoch": 2.75, "learning_rate": 4.216124841290933e-06, "loss": 0.5585, "step": 248090 }, { "epoch": 2.75, "learning_rate": 4.21520211415242e-06, "loss": 0.5778, "step": 248095 }, { "epoch": 2.75, "learning_rate": 4.214279387013908e-06, "loss": 0.621, "step": 248100 }, { "epoch": 2.75, "learning_rate": 4.213356659875395e-06, "loss": 0.5681, "step": 248105 }, { "epoch": 2.75, "learning_rate": 4.212433932736883e-06, "loss": 0.6002, "step": 248110 }, { "epoch": 2.75, "learning_rate": 4.2115112055983705e-06, "loss": 0.5516, "step": 248115 }, { "epoch": 2.75, "learning_rate": 4.2105884784598575e-06, "loss": 0.5708, "step": 248120 }, { "epoch": 2.75, "learning_rate": 4.209665751321345e-06, "loss": 0.5782, "step": 248125 }, { "epoch": 2.75, "learning_rate": 4.208743024182833e-06, "loss": 0.5999, "step": 248130 }, { "epoch": 2.75, "learning_rate": 4.207820297044321e-06, "loss": 0.6211, "step": 248135 }, { "epoch": 2.75, "learning_rate": 4.206897569905808e-06, "loss": 0.6095, "step": 248140 }, { "epoch": 2.75, "learning_rate": 4.205974842767296e-06, "loss": 0.6067, "step": 248145 }, { "epoch": 2.75, "learning_rate": 4.205052115628783e-06, "loss": 0.5545, "step": 248150 }, { "epoch": 2.75, "learning_rate": 4.204129388490271e-06, "loss": 0.5384, "step": 248155 }, { "epoch": 2.75, "learning_rate": 4.2032066613517586e-06, "loss": 0.5505, "step": 248160 }, { "epoch": 2.75, "learning_rate": 4.202283934213246e-06, "loss": 0.6131, "step": 248165 }, { "epoch": 2.75, "learning_rate": 4.201361207074734e-06, "loss": 0.5652, "step": 248170 }, { "epoch": 2.75, "learning_rate": 4.200438479936221e-06, "loss": 0.5167, "step": 248175 }, { "epoch": 2.75, "learning_rate": 4.199515752797708e-06, "loss": 0.5703, "step": 248180 }, { "epoch": 2.75, "learning_rate": 4.198593025659196e-06, "loss": 0.5664, "step": 248185 }, { "epoch": 2.75, "learning_rate": 4.197670298520684e-06, "loss": 0.5974, "step": 248190 }, { "epoch": 2.75, "learning_rate": 4.196747571382172e-06, "loss": 0.6261, "step": 248195 }, { "epoch": 2.75, "learning_rate": 4.19582484424366e-06, "loss": 0.559, "step": 248200 }, { "epoch": 2.75, "learning_rate": 4.194902117105147e-06, "loss": 0.5384, "step": 248205 }, { "epoch": 2.75, "learning_rate": 4.1939793899666345e-06, "loss": 0.5482, "step": 248210 }, { "epoch": 2.75, "learning_rate": 4.193056662828122e-06, "loss": 0.576, "step": 248215 }, { "epoch": 2.75, "learning_rate": 4.192133935689609e-06, "loss": 0.5742, "step": 248220 }, { "epoch": 2.75, "learning_rate": 4.191211208551097e-06, "loss": 0.5529, "step": 248225 }, { "epoch": 2.75, "learning_rate": 4.190288481412585e-06, "loss": 0.6191, "step": 248230 }, { "epoch": 2.75, "learning_rate": 4.189365754274072e-06, "loss": 0.5966, "step": 248235 }, { "epoch": 2.75, "learning_rate": 4.18844302713556e-06, "loss": 0.579, "step": 248240 }, { "epoch": 2.75, "learning_rate": 4.187520299997048e-06, "loss": 0.6311, "step": 248245 }, { "epoch": 2.75, "learning_rate": 4.186597572858536e-06, "loss": 0.5539, "step": 248250 }, { "epoch": 2.75, "learning_rate": 4.185674845720023e-06, "loss": 0.5479, "step": 248255 }, { "epoch": 2.75, "learning_rate": 4.18475211858151e-06, "loss": 0.5971, "step": 248260 }, { "epoch": 2.75, "learning_rate": 4.1838293914429975e-06, "loss": 0.6018, "step": 248265 }, { "epoch": 2.75, "learning_rate": 4.182906664304485e-06, "loss": 0.5676, "step": 248270 }, { "epoch": 2.75, "learning_rate": 4.181983937165973e-06, "loss": 0.5623, "step": 248275 }, { "epoch": 2.75, "learning_rate": 4.181061210027461e-06, "loss": 0.6313, "step": 248280 }, { "epoch": 2.75, "learning_rate": 4.180138482888948e-06, "loss": 0.5734, "step": 248285 }, { "epoch": 2.75, "learning_rate": 4.179215755750436e-06, "loss": 0.5704, "step": 248290 }, { "epoch": 2.75, "learning_rate": 4.178293028611923e-06, "loss": 0.646, "step": 248295 }, { "epoch": 2.75, "learning_rate": 4.177370301473411e-06, "loss": 0.5765, "step": 248300 }, { "epoch": 2.75, "learning_rate": 4.1764475743348986e-06, "loss": 0.6072, "step": 248305 }, { "epoch": 2.75, "learning_rate": 4.175524847196386e-06, "loss": 0.627, "step": 248310 }, { "epoch": 2.75, "learning_rate": 4.174602120057873e-06, "loss": 0.6226, "step": 248315 }, { "epoch": 2.75, "learning_rate": 4.173679392919361e-06, "loss": 0.6157, "step": 248320 }, { "epoch": 2.75, "learning_rate": 4.172756665780849e-06, "loss": 0.5487, "step": 248325 }, { "epoch": 2.75, "learning_rate": 4.171833938642336e-06, "loss": 0.5877, "step": 248330 }, { "epoch": 2.75, "learning_rate": 4.170911211503824e-06, "loss": 0.5786, "step": 248335 }, { "epoch": 2.75, "learning_rate": 4.169988484365312e-06, "loss": 0.5798, "step": 248340 }, { "epoch": 2.75, "learning_rate": 4.169065757226799e-06, "loss": 0.5826, "step": 248345 }, { "epoch": 2.75, "learning_rate": 4.168143030088287e-06, "loss": 0.5797, "step": 248350 }, { "epoch": 2.75, "learning_rate": 4.1672203029497745e-06, "loss": 0.6148, "step": 248355 }, { "epoch": 2.75, "learning_rate": 4.166297575811262e-06, "loss": 0.5787, "step": 248360 }, { "epoch": 2.75, "learning_rate": 4.16537484867275e-06, "loss": 0.6239, "step": 248365 }, { "epoch": 2.75, "learning_rate": 4.164452121534237e-06, "loss": 0.5485, "step": 248370 }, { "epoch": 2.75, "learning_rate": 4.163529394395724e-06, "loss": 0.5721, "step": 248375 }, { "epoch": 2.75, "learning_rate": 4.162606667257212e-06, "loss": 0.6302, "step": 248380 }, { "epoch": 2.75, "learning_rate": 4.1616839401187e-06, "loss": 0.6485, "step": 248385 }, { "epoch": 2.75, "learning_rate": 4.160761212980188e-06, "loss": 0.6125, "step": 248390 }, { "epoch": 2.75, "learning_rate": 4.159838485841675e-06, "loss": 0.6272, "step": 248395 }, { "epoch": 2.75, "learning_rate": 4.158915758703163e-06, "loss": 0.5683, "step": 248400 }, { "epoch": 2.75, "learning_rate": 4.15799303156465e-06, "loss": 0.5922, "step": 248405 }, { "epoch": 2.75, "learning_rate": 4.1570703044261374e-06, "loss": 0.5692, "step": 248410 }, { "epoch": 2.75, "learning_rate": 4.156147577287625e-06, "loss": 0.5765, "step": 248415 }, { "epoch": 2.75, "learning_rate": 4.155224850149113e-06, "loss": 0.5796, "step": 248420 }, { "epoch": 2.75, "learning_rate": 4.1543021230106e-06, "loss": 0.543, "step": 248425 }, { "epoch": 2.75, "learning_rate": 4.153379395872088e-06, "loss": 0.5562, "step": 248430 }, { "epoch": 2.75, "learning_rate": 4.152456668733576e-06, "loss": 0.6135, "step": 248435 }, { "epoch": 2.75, "learning_rate": 4.151533941595064e-06, "loss": 0.6006, "step": 248440 }, { "epoch": 2.75, "learning_rate": 4.150611214456551e-06, "loss": 0.5523, "step": 248445 }, { "epoch": 2.75, "learning_rate": 4.1496884873180385e-06, "loss": 0.5951, "step": 248450 }, { "epoch": 2.75, "learning_rate": 4.1487657601795255e-06, "loss": 0.6508, "step": 248455 }, { "epoch": 2.75, "learning_rate": 4.147843033041013e-06, "loss": 0.5303, "step": 248460 }, { "epoch": 2.75, "learning_rate": 4.146920305902501e-06, "loss": 0.555, "step": 248465 }, { "epoch": 2.75, "learning_rate": 4.145997578763989e-06, "loss": 0.6348, "step": 248470 }, { "epoch": 2.75, "learning_rate": 4.145074851625477e-06, "loss": 0.6004, "step": 248475 }, { "epoch": 2.75, "learning_rate": 4.144152124486964e-06, "loss": 0.6047, "step": 248480 }, { "epoch": 2.75, "learning_rate": 4.143229397348451e-06, "loss": 0.5531, "step": 248485 }, { "epoch": 2.75, "learning_rate": 4.142306670209939e-06, "loss": 0.5612, "step": 248490 }, { "epoch": 2.75, "learning_rate": 4.141383943071427e-06, "loss": 0.5685, "step": 248495 }, { "epoch": 2.75, "learning_rate": 4.1404612159329145e-06, "loss": 0.6052, "step": 248500 }, { "epoch": 2.75, "learning_rate": 4.139538488794402e-06, "loss": 0.5985, "step": 248505 }, { "epoch": 2.75, "learning_rate": 4.138615761655889e-06, "loss": 0.5288, "step": 248510 }, { "epoch": 2.75, "learning_rate": 4.137693034517377e-06, "loss": 0.5377, "step": 248515 }, { "epoch": 2.75, "learning_rate": 4.136770307378864e-06, "loss": 0.6269, "step": 248520 }, { "epoch": 2.75, "learning_rate": 4.135847580240352e-06, "loss": 0.5966, "step": 248525 }, { "epoch": 2.75, "learning_rate": 4.13492485310184e-06, "loss": 0.6095, "step": 248530 }, { "epoch": 2.75, "learning_rate": 4.134002125963327e-06, "loss": 0.5457, "step": 248535 }, { "epoch": 2.75, "learning_rate": 4.133079398824815e-06, "loss": 0.6, "step": 248540 }, { "epoch": 2.75, "learning_rate": 4.132156671686303e-06, "loss": 0.5535, "step": 248545 }, { "epoch": 2.75, "learning_rate": 4.1312339445477904e-06, "loss": 0.5833, "step": 248550 }, { "epoch": 2.75, "learning_rate": 4.130311217409278e-06, "loss": 0.6225, "step": 248555 }, { "epoch": 2.75, "learning_rate": 4.129388490270765e-06, "loss": 0.5227, "step": 248560 }, { "epoch": 2.75, "learning_rate": 4.128465763132252e-06, "loss": 0.5528, "step": 248565 }, { "epoch": 2.75, "learning_rate": 4.12754303599374e-06, "loss": 0.5931, "step": 248570 }, { "epoch": 2.75, "learning_rate": 4.126620308855228e-06, "loss": 0.6206, "step": 248575 }, { "epoch": 2.75, "learning_rate": 4.125697581716716e-06, "loss": 0.5433, "step": 248580 }, { "epoch": 2.75, "learning_rate": 4.124774854578204e-06, "loss": 0.577, "step": 248585 }, { "epoch": 2.75, "learning_rate": 4.123852127439691e-06, "loss": 0.5861, "step": 248590 }, { "epoch": 2.75, "learning_rate": 4.122929400301178e-06, "loss": 0.5772, "step": 248595 }, { "epoch": 2.75, "learning_rate": 4.1220066731626655e-06, "loss": 0.62, "step": 248600 }, { "epoch": 2.75, "learning_rate": 4.121083946024153e-06, "loss": 0.5739, "step": 248605 }, { "epoch": 2.75, "learning_rate": 4.120161218885641e-06, "loss": 0.5931, "step": 248610 }, { "epoch": 2.75, "learning_rate": 4.119238491747129e-06, "loss": 0.6045, "step": 248615 }, { "epoch": 2.75, "learning_rate": 4.118315764608616e-06, "loss": 0.6308, "step": 248620 }, { "epoch": 2.75, "learning_rate": 4.117393037470104e-06, "loss": 0.5197, "step": 248625 }, { "epoch": 2.75, "learning_rate": 4.116470310331592e-06, "loss": 0.5074, "step": 248630 }, { "epoch": 2.75, "learning_rate": 4.115547583193079e-06, "loss": 0.5619, "step": 248635 }, { "epoch": 2.75, "learning_rate": 4.114624856054567e-06, "loss": 0.6093, "step": 248640 }, { "epoch": 2.75, "learning_rate": 4.1137021289160545e-06, "loss": 0.6277, "step": 248645 }, { "epoch": 2.75, "learning_rate": 4.1127794017775415e-06, "loss": 0.5896, "step": 248650 }, { "epoch": 2.75, "learning_rate": 4.111856674639029e-06, "loss": 0.5737, "step": 248655 }, { "epoch": 2.75, "learning_rate": 4.110933947500517e-06, "loss": 0.6379, "step": 248660 }, { "epoch": 2.75, "learning_rate": 4.110011220362005e-06, "loss": 0.583, "step": 248665 }, { "epoch": 2.75, "learning_rate": 4.109088493223492e-06, "loss": 0.5419, "step": 248670 }, { "epoch": 2.75, "learning_rate": 4.108165766084979e-06, "loss": 0.5883, "step": 248675 }, { "epoch": 2.75, "learning_rate": 4.107243038946467e-06, "loss": 0.5191, "step": 248680 }, { "epoch": 2.75, "learning_rate": 4.106320311807955e-06, "loss": 0.5916, "step": 248685 }, { "epoch": 2.75, "learning_rate": 4.1053975846694426e-06, "loss": 0.5922, "step": 248690 }, { "epoch": 2.75, "learning_rate": 4.10447485753093e-06, "loss": 0.6014, "step": 248695 }, { "epoch": 2.75, "learning_rate": 4.103552130392417e-06, "loss": 0.5641, "step": 248700 }, { "epoch": 2.75, "learning_rate": 4.102629403253905e-06, "loss": 0.5674, "step": 248705 }, { "epoch": 2.75, "learning_rate": 4.101706676115392e-06, "loss": 0.5453, "step": 248710 }, { "epoch": 2.75, "learning_rate": 4.10078394897688e-06, "loss": 0.5954, "step": 248715 }, { "epoch": 2.75, "learning_rate": 4.099861221838368e-06, "loss": 0.6134, "step": 248720 }, { "epoch": 2.75, "learning_rate": 4.098938494699856e-06, "loss": 0.5182, "step": 248725 }, { "epoch": 2.75, "learning_rate": 4.098015767561343e-06, "loss": 0.6051, "step": 248730 }, { "epoch": 2.75, "learning_rate": 4.097093040422831e-06, "loss": 0.5403, "step": 248735 }, { "epoch": 2.75, "learning_rate": 4.0961703132843185e-06, "loss": 0.535, "step": 248740 }, { "epoch": 2.75, "learning_rate": 4.095247586145806e-06, "loss": 0.5762, "step": 248745 }, { "epoch": 2.75, "learning_rate": 4.094324859007293e-06, "loss": 0.5635, "step": 248750 }, { "epoch": 2.75, "learning_rate": 4.093402131868781e-06, "loss": 0.5602, "step": 248755 }, { "epoch": 2.75, "learning_rate": 4.092479404730268e-06, "loss": 0.5311, "step": 248760 }, { "epoch": 2.75, "learning_rate": 4.091556677591756e-06, "loss": 0.5524, "step": 248765 }, { "epoch": 2.75, "learning_rate": 4.090633950453244e-06, "loss": 0.543, "step": 248770 }, { "epoch": 2.75, "learning_rate": 4.089711223314732e-06, "loss": 0.5852, "step": 248775 }, { "epoch": 2.75, "learning_rate": 4.08878849617622e-06, "loss": 0.5137, "step": 248780 }, { "epoch": 2.75, "learning_rate": 4.087865769037706e-06, "loss": 0.5896, "step": 248785 }, { "epoch": 2.75, "learning_rate": 4.086943041899194e-06, "loss": 0.5546, "step": 248790 }, { "epoch": 2.75, "learning_rate": 4.0860203147606815e-06, "loss": 0.5788, "step": 248795 }, { "epoch": 2.75, "learning_rate": 4.085097587622169e-06, "loss": 0.5769, "step": 248800 }, { "epoch": 2.75, "learning_rate": 4.084174860483657e-06, "loss": 0.5687, "step": 248805 }, { "epoch": 2.76, "learning_rate": 4.083252133345144e-06, "loss": 0.5349, "step": 248810 }, { "epoch": 2.76, "learning_rate": 4.082329406206632e-06, "loss": 0.5587, "step": 248815 }, { "epoch": 2.76, "learning_rate": 4.08140667906812e-06, "loss": 0.556, "step": 248820 }, { "epoch": 2.76, "learning_rate": 4.080483951929607e-06, "loss": 0.5948, "step": 248825 }, { "epoch": 2.76, "learning_rate": 4.079561224791095e-06, "loss": 0.5452, "step": 248830 }, { "epoch": 2.76, "learning_rate": 4.0786384976525826e-06, "loss": 0.5297, "step": 248835 }, { "epoch": 2.76, "learning_rate": 4.0777157705140696e-06, "loss": 0.639, "step": 248840 }, { "epoch": 2.76, "learning_rate": 4.076793043375557e-06, "loss": 0.543, "step": 248845 }, { "epoch": 2.76, "learning_rate": 4.075870316237045e-06, "loss": 0.5643, "step": 248850 }, { "epoch": 2.76, "learning_rate": 4.074947589098533e-06, "loss": 0.5927, "step": 248855 }, { "epoch": 2.76, "learning_rate": 4.074024861960021e-06, "loss": 0.6071, "step": 248860 }, { "epoch": 2.76, "learning_rate": 4.073102134821508e-06, "loss": 0.5661, "step": 248865 }, { "epoch": 2.76, "learning_rate": 4.072179407682995e-06, "loss": 0.6252, "step": 248870 }, { "epoch": 2.76, "learning_rate": 4.071256680544483e-06, "loss": 0.598, "step": 248875 }, { "epoch": 2.76, "learning_rate": 4.070333953405971e-06, "loss": 0.5504, "step": 248880 }, { "epoch": 2.76, "learning_rate": 4.0694112262674585e-06, "loss": 0.5617, "step": 248885 }, { "epoch": 2.76, "learning_rate": 4.068488499128946e-06, "loss": 0.5686, "step": 248890 }, { "epoch": 2.76, "learning_rate": 4.067565771990433e-06, "loss": 0.5496, "step": 248895 }, { "epoch": 2.76, "learning_rate": 4.06664304485192e-06, "loss": 0.6318, "step": 248900 }, { "epoch": 2.76, "learning_rate": 4.065720317713408e-06, "loss": 0.5871, "step": 248905 }, { "epoch": 2.76, "learning_rate": 4.064797590574896e-06, "loss": 0.5935, "step": 248910 }, { "epoch": 2.76, "learning_rate": 4.063874863436384e-06, "loss": 0.5663, "step": 248915 }, { "epoch": 2.76, "learning_rate": 4.062952136297872e-06, "loss": 0.599, "step": 248920 }, { "epoch": 2.76, "learning_rate": 4.062029409159359e-06, "loss": 0.6034, "step": 248925 }, { "epoch": 2.76, "learning_rate": 4.061106682020847e-06, "loss": 0.6238, "step": 248930 }, { "epoch": 2.76, "learning_rate": 4.0601839548823344e-06, "loss": 0.6157, "step": 248935 }, { "epoch": 2.76, "learning_rate": 4.0592612277438215e-06, "loss": 0.577, "step": 248940 }, { "epoch": 2.76, "learning_rate": 4.058338500605309e-06, "loss": 0.5727, "step": 248945 }, { "epoch": 2.76, "learning_rate": 4.057415773466796e-06, "loss": 0.5575, "step": 248950 }, { "epoch": 2.76, "learning_rate": 4.056493046328284e-06, "loss": 0.5632, "step": 248955 }, { "epoch": 2.76, "learning_rate": 4.055570319189772e-06, "loss": 0.5728, "step": 248960 }, { "epoch": 2.76, "learning_rate": 4.05464759205126e-06, "loss": 0.5977, "step": 248965 }, { "epoch": 2.76, "learning_rate": 4.053724864912748e-06, "loss": 0.5721, "step": 248970 }, { "epoch": 2.76, "learning_rate": 4.052802137774235e-06, "loss": 0.5575, "step": 248975 }, { "epoch": 2.76, "learning_rate": 4.051879410635722e-06, "loss": 0.551, "step": 248980 }, { "epoch": 2.76, "learning_rate": 4.0509566834972095e-06, "loss": 0.558, "step": 248985 }, { "epoch": 2.76, "learning_rate": 4.050033956358697e-06, "loss": 0.641, "step": 248990 }, { "epoch": 2.76, "learning_rate": 4.049111229220185e-06, "loss": 0.572, "step": 248995 }, { "epoch": 2.76, "learning_rate": 4.048188502081673e-06, "loss": 0.5409, "step": 249000 }, { "epoch": 2.76, "eval_loss": 0.5412405133247375, "eval_runtime": 69.6952, "eval_samples_per_second": 28.696, "eval_steps_per_second": 14.348, "step": 249000 }, { "epoch": 2.76, "learning_rate": 4.04726577494316e-06, "loss": 0.5624, "step": 249005 }, { "epoch": 2.76, "learning_rate": 4.046343047804648e-06, "loss": 0.5891, "step": 249010 }, { "epoch": 2.76, "learning_rate": 4.045420320666135e-06, "loss": 0.5635, "step": 249015 }, { "epoch": 2.76, "learning_rate": 4.044497593527623e-06, "loss": 0.5548, "step": 249020 }, { "epoch": 2.76, "learning_rate": 4.043574866389111e-06, "loss": 0.6137, "step": 249025 }, { "epoch": 2.76, "learning_rate": 4.0426521392505985e-06, "loss": 0.557, "step": 249030 }, { "epoch": 2.76, "learning_rate": 4.0417294121120855e-06, "loss": 0.6203, "step": 249035 }, { "epoch": 2.76, "learning_rate": 4.040806684973573e-06, "loss": 0.5075, "step": 249040 }, { "epoch": 2.76, "learning_rate": 4.039883957835061e-06, "loss": 0.5775, "step": 249045 }, { "epoch": 2.76, "learning_rate": 4.038961230696548e-06, "loss": 0.5791, "step": 249050 }, { "epoch": 2.76, "learning_rate": 4.038038503558036e-06, "loss": 0.5756, "step": 249055 }, { "epoch": 2.76, "learning_rate": 4.037115776419523e-06, "loss": 0.594, "step": 249060 }, { "epoch": 2.76, "learning_rate": 4.036193049281011e-06, "loss": 0.5843, "step": 249065 }, { "epoch": 2.76, "learning_rate": 4.035270322142499e-06, "loss": 0.6191, "step": 249070 }, { "epoch": 2.76, "learning_rate": 4.034347595003987e-06, "loss": 0.5194, "step": 249075 }, { "epoch": 2.76, "learning_rate": 4.0334248678654744e-06, "loss": 0.5615, "step": 249080 }, { "epoch": 2.76, "learning_rate": 4.0325021407269614e-06, "loss": 0.5923, "step": 249085 }, { "epoch": 2.76, "learning_rate": 4.0315794135884484e-06, "loss": 0.6103, "step": 249090 }, { "epoch": 2.76, "learning_rate": 4.030656686449936e-06, "loss": 0.5031, "step": 249095 }, { "epoch": 2.76, "learning_rate": 4.029733959311424e-06, "loss": 0.5978, "step": 249100 }, { "epoch": 2.76, "learning_rate": 4.028811232172912e-06, "loss": 0.5668, "step": 249105 }, { "epoch": 2.76, "learning_rate": 4.0278885050344e-06, "loss": 0.5984, "step": 249110 }, { "epoch": 2.76, "learning_rate": 4.026965777895887e-06, "loss": 0.6184, "step": 249115 }, { "epoch": 2.76, "learning_rate": 4.026043050757375e-06, "loss": 0.6121, "step": 249120 }, { "epoch": 2.76, "learning_rate": 4.0251203236188625e-06, "loss": 0.606, "step": 249125 }, { "epoch": 2.76, "learning_rate": 4.0241975964803495e-06, "loss": 0.5793, "step": 249130 }, { "epoch": 2.76, "learning_rate": 4.023274869341837e-06, "loss": 0.6058, "step": 249135 }, { "epoch": 2.76, "learning_rate": 4.022352142203325e-06, "loss": 0.567, "step": 249140 }, { "epoch": 2.76, "learning_rate": 4.021429415064812e-06, "loss": 0.5634, "step": 249145 }, { "epoch": 2.76, "learning_rate": 4.0205066879263e-06, "loss": 0.5704, "step": 249150 }, { "epoch": 2.76, "learning_rate": 4.019583960787788e-06, "loss": 0.5846, "step": 249155 }, { "epoch": 2.76, "learning_rate": 4.018661233649276e-06, "loss": 0.5811, "step": 249160 }, { "epoch": 2.76, "learning_rate": 4.017738506510763e-06, "loss": 0.6046, "step": 249165 }, { "epoch": 2.76, "learning_rate": 4.016815779372251e-06, "loss": 0.5568, "step": 249170 }, { "epoch": 2.76, "learning_rate": 4.015893052233738e-06, "loss": 0.558, "step": 249175 }, { "epoch": 2.76, "learning_rate": 4.0149703250952255e-06, "loss": 0.5932, "step": 249180 }, { "epoch": 2.76, "learning_rate": 4.014047597956713e-06, "loss": 0.5985, "step": 249185 }, { "epoch": 2.76, "learning_rate": 4.013124870818201e-06, "loss": 0.5982, "step": 249190 }, { "epoch": 2.76, "learning_rate": 4.012202143679689e-06, "loss": 0.5659, "step": 249195 }, { "epoch": 2.76, "learning_rate": 4.011279416541176e-06, "loss": 0.604, "step": 249200 }, { "epoch": 2.76, "learning_rate": 4.010356689402663e-06, "loss": 0.5724, "step": 249205 }, { "epoch": 2.76, "learning_rate": 4.009433962264151e-06, "loss": 0.571, "step": 249210 }, { "epoch": 2.76, "learning_rate": 4.008511235125639e-06, "loss": 0.5702, "step": 249215 }, { "epoch": 2.76, "learning_rate": 4.007588507987127e-06, "loss": 0.5861, "step": 249220 }, { "epoch": 2.76, "learning_rate": 4.006665780848614e-06, "loss": 0.5522, "step": 249225 }, { "epoch": 2.76, "learning_rate": 4.0057430537101014e-06, "loss": 0.5937, "step": 249230 }, { "epoch": 2.76, "learning_rate": 4.004820326571589e-06, "loss": 0.5524, "step": 249235 }, { "epoch": 2.76, "learning_rate": 4.003897599433076e-06, "loss": 0.592, "step": 249240 }, { "epoch": 2.76, "learning_rate": 4.002974872294564e-06, "loss": 0.6422, "step": 249245 }, { "epoch": 2.76, "learning_rate": 4.002052145156052e-06, "loss": 0.6062, "step": 249250 }, { "epoch": 2.76, "learning_rate": 4.001129418017539e-06, "loss": 0.6434, "step": 249255 }, { "epoch": 2.76, "learning_rate": 4.000206690879027e-06, "loss": 0.5443, "step": 249260 }, { "epoch": 2.76, "learning_rate": 3.999283963740515e-06, "loss": 0.5886, "step": 249265 }, { "epoch": 2.76, "learning_rate": 3.9983612366020025e-06, "loss": 0.667, "step": 249270 }, { "epoch": 2.76, "learning_rate": 3.99743850946349e-06, "loss": 0.5883, "step": 249275 }, { "epoch": 2.76, "learning_rate": 3.996515782324977e-06, "loss": 0.5844, "step": 249280 }, { "epoch": 2.76, "learning_rate": 3.995593055186464e-06, "loss": 0.5535, "step": 249285 }, { "epoch": 2.76, "learning_rate": 3.994670328047952e-06, "loss": 0.5507, "step": 249290 }, { "epoch": 2.76, "learning_rate": 3.99374760090944e-06, "loss": 0.5931, "step": 249295 }, { "epoch": 2.76, "learning_rate": 3.992824873770928e-06, "loss": 0.5515, "step": 249300 }, { "epoch": 2.76, "learning_rate": 3.991902146632416e-06, "loss": 0.5927, "step": 249305 }, { "epoch": 2.76, "learning_rate": 3.990979419493903e-06, "loss": 0.5519, "step": 249310 }, { "epoch": 2.76, "learning_rate": 3.990056692355391e-06, "loss": 0.5908, "step": 249315 }, { "epoch": 2.76, "learning_rate": 3.989133965216878e-06, "loss": 0.5481, "step": 249320 }, { "epoch": 2.76, "learning_rate": 3.9882112380783655e-06, "loss": 0.5687, "step": 249325 }, { "epoch": 2.76, "learning_rate": 3.987288510939853e-06, "loss": 0.5536, "step": 249330 }, { "epoch": 2.76, "learning_rate": 3.98636578380134e-06, "loss": 0.574, "step": 249335 }, { "epoch": 2.76, "learning_rate": 3.985443056662828e-06, "loss": 0.5802, "step": 249340 }, { "epoch": 2.76, "learning_rate": 3.984520329524316e-06, "loss": 0.5632, "step": 249345 }, { "epoch": 2.76, "learning_rate": 3.983597602385804e-06, "loss": 0.6041, "step": 249350 }, { "epoch": 2.76, "learning_rate": 3.982674875247291e-06, "loss": 0.5594, "step": 249355 }, { "epoch": 2.76, "learning_rate": 3.981752148108779e-06, "loss": 0.6132, "step": 249360 }, { "epoch": 2.76, "learning_rate": 3.980829420970266e-06, "loss": 0.538, "step": 249365 }, { "epoch": 2.76, "learning_rate": 3.9799066938317536e-06, "loss": 0.5522, "step": 249370 }, { "epoch": 2.76, "learning_rate": 3.978983966693241e-06, "loss": 0.5997, "step": 249375 }, { "epoch": 2.76, "learning_rate": 3.978061239554729e-06, "loss": 0.5988, "step": 249380 }, { "epoch": 2.76, "learning_rate": 3.977138512416217e-06, "loss": 0.5416, "step": 249385 }, { "epoch": 2.76, "learning_rate": 3.976215785277704e-06, "loss": 0.5847, "step": 249390 }, { "epoch": 2.76, "learning_rate": 3.975293058139191e-06, "loss": 0.5555, "step": 249395 }, { "epoch": 2.76, "learning_rate": 3.974370331000679e-06, "loss": 0.5528, "step": 249400 }, { "epoch": 2.76, "learning_rate": 3.973447603862167e-06, "loss": 0.5762, "step": 249405 }, { "epoch": 2.76, "learning_rate": 3.972524876723655e-06, "loss": 0.5996, "step": 249410 }, { "epoch": 2.76, "learning_rate": 3.9716021495851425e-06, "loss": 0.5461, "step": 249415 }, { "epoch": 2.76, "learning_rate": 3.9706794224466295e-06, "loss": 0.5992, "step": 249420 }, { "epoch": 2.76, "learning_rate": 3.969756695308117e-06, "loss": 0.6317, "step": 249425 }, { "epoch": 2.76, "learning_rate": 3.968833968169604e-06, "loss": 0.5182, "step": 249430 }, { "epoch": 2.76, "learning_rate": 3.967911241031092e-06, "loss": 0.5761, "step": 249435 }, { "epoch": 2.76, "learning_rate": 3.96698851389258e-06, "loss": 0.6029, "step": 249440 }, { "epoch": 2.76, "learning_rate": 3.966065786754068e-06, "loss": 0.5506, "step": 249445 }, { "epoch": 2.76, "learning_rate": 3.965143059615555e-06, "loss": 0.6119, "step": 249450 }, { "epoch": 2.76, "learning_rate": 3.964220332477043e-06, "loss": 0.571, "step": 249455 }, { "epoch": 2.76, "learning_rate": 3.963297605338531e-06, "loss": 0.5186, "step": 249460 }, { "epoch": 2.76, "learning_rate": 3.9623748782000185e-06, "loss": 0.5709, "step": 249465 }, { "epoch": 2.76, "learning_rate": 3.9614521510615055e-06, "loss": 0.6015, "step": 249470 }, { "epoch": 2.76, "learning_rate": 3.9605294239229925e-06, "loss": 0.6047, "step": 249475 }, { "epoch": 2.76, "learning_rate": 3.95960669678448e-06, "loss": 0.547, "step": 249480 }, { "epoch": 2.76, "learning_rate": 3.958683969645968e-06, "loss": 0.5869, "step": 249485 }, { "epoch": 2.76, "learning_rate": 3.957761242507456e-06, "loss": 0.5677, "step": 249490 }, { "epoch": 2.76, "learning_rate": 3.956838515368944e-06, "loss": 0.5068, "step": 249495 }, { "epoch": 2.76, "learning_rate": 3.955915788230431e-06, "loss": 0.5886, "step": 249500 }, { "epoch": 2.76, "learning_rate": 3.954993061091919e-06, "loss": 0.5989, "step": 249505 }, { "epoch": 2.76, "learning_rate": 3.954070333953406e-06, "loss": 0.5878, "step": 249510 }, { "epoch": 2.76, "learning_rate": 3.9531476068148936e-06, "loss": 0.5908, "step": 249515 }, { "epoch": 2.76, "learning_rate": 3.952224879676381e-06, "loss": 0.5312, "step": 249520 }, { "epoch": 2.76, "learning_rate": 3.951302152537869e-06, "loss": 0.6036, "step": 249525 }, { "epoch": 2.76, "learning_rate": 3.950379425399356e-06, "loss": 0.5409, "step": 249530 }, { "epoch": 2.76, "learning_rate": 3.949456698260844e-06, "loss": 0.6129, "step": 249535 }, { "epoch": 2.76, "learning_rate": 3.948533971122332e-06, "loss": 0.5588, "step": 249540 }, { "epoch": 2.76, "learning_rate": 3.947611243983819e-06, "loss": 0.5699, "step": 249545 }, { "epoch": 2.76, "learning_rate": 3.946688516845307e-06, "loss": 0.6292, "step": 249550 }, { "epoch": 2.76, "learning_rate": 3.945765789706795e-06, "loss": 0.615, "step": 249555 }, { "epoch": 2.76, "learning_rate": 3.944843062568282e-06, "loss": 0.5697, "step": 249560 }, { "epoch": 2.76, "learning_rate": 3.9439203354297695e-06, "loss": 0.6111, "step": 249565 }, { "epoch": 2.76, "learning_rate": 3.942997608291257e-06, "loss": 0.5239, "step": 249570 }, { "epoch": 2.76, "learning_rate": 3.942074881152745e-06, "loss": 0.5424, "step": 249575 }, { "epoch": 2.76, "learning_rate": 3.941152154014233e-06, "loss": 0.5716, "step": 249580 }, { "epoch": 2.76, "learning_rate": 3.94022942687572e-06, "loss": 0.5202, "step": 249585 }, { "epoch": 2.76, "learning_rate": 3.939306699737207e-06, "loss": 0.6498, "step": 249590 }, { "epoch": 2.76, "learning_rate": 3.938383972598695e-06, "loss": 0.5373, "step": 249595 }, { "epoch": 2.76, "learning_rate": 3.937461245460183e-06, "loss": 0.5665, "step": 249600 }, { "epoch": 2.76, "learning_rate": 3.936538518321671e-06, "loss": 0.5716, "step": 249605 }, { "epoch": 2.76, "learning_rate": 3.935615791183158e-06, "loss": 0.5714, "step": 249610 }, { "epoch": 2.76, "learning_rate": 3.9346930640446454e-06, "loss": 0.5428, "step": 249615 }, { "epoch": 2.76, "learning_rate": 3.9337703369061324e-06, "loss": 0.6139, "step": 249620 }, { "epoch": 2.76, "learning_rate": 3.93284760976762e-06, "loss": 0.5826, "step": 249625 }, { "epoch": 2.76, "learning_rate": 3.931924882629108e-06, "loss": 0.5541, "step": 249630 }, { "epoch": 2.76, "learning_rate": 3.931002155490596e-06, "loss": 0.5985, "step": 249635 }, { "epoch": 2.76, "learning_rate": 3.930079428352083e-06, "loss": 0.5731, "step": 249640 }, { "epoch": 2.76, "learning_rate": 3.929156701213571e-06, "loss": 0.5159, "step": 249645 }, { "epoch": 2.76, "learning_rate": 3.928233974075059e-06, "loss": 0.605, "step": 249650 }, { "epoch": 2.76, "learning_rate": 3.9273112469365465e-06, "loss": 0.5563, "step": 249655 }, { "epoch": 2.76, "learning_rate": 3.9263885197980335e-06, "loss": 0.5698, "step": 249660 }, { "epoch": 2.76, "learning_rate": 3.925465792659521e-06, "loss": 0.5776, "step": 249665 }, { "epoch": 2.76, "learning_rate": 3.924543065521008e-06, "loss": 0.5769, "step": 249670 }, { "epoch": 2.76, "learning_rate": 3.923620338382496e-06, "loss": 0.5693, "step": 249675 }, { "epoch": 2.76, "learning_rate": 3.922697611243984e-06, "loss": 0.5041, "step": 249680 }, { "epoch": 2.76, "learning_rate": 3.921774884105472e-06, "loss": 0.5852, "step": 249685 }, { "epoch": 2.76, "learning_rate": 3.92085215696696e-06, "loss": 0.5556, "step": 249690 }, { "epoch": 2.76, "learning_rate": 3.919929429828447e-06, "loss": 0.6041, "step": 249695 }, { "epoch": 2.76, "learning_rate": 3.919006702689934e-06, "loss": 0.6291, "step": 249700 }, { "epoch": 2.76, "learning_rate": 3.918083975551422e-06, "loss": 0.5695, "step": 249705 }, { "epoch": 2.76, "learning_rate": 3.9171612484129095e-06, "loss": 0.5986, "step": 249710 }, { "epoch": 2.77, "learning_rate": 3.916238521274397e-06, "loss": 0.6106, "step": 249715 }, { "epoch": 2.77, "learning_rate": 3.915315794135885e-06, "loss": 0.5437, "step": 249720 }, { "epoch": 2.77, "learning_rate": 3.914393066997372e-06, "loss": 0.5522, "step": 249725 }, { "epoch": 2.77, "learning_rate": 3.91347033985886e-06, "loss": 0.5773, "step": 249730 }, { "epoch": 2.77, "learning_rate": 3.912547612720347e-06, "loss": 0.5219, "step": 249735 }, { "epoch": 2.77, "learning_rate": 3.911624885581835e-06, "loss": 0.5688, "step": 249740 }, { "epoch": 2.77, "learning_rate": 3.910702158443323e-06, "loss": 0.5791, "step": 249745 }, { "epoch": 2.77, "learning_rate": 3.90977943130481e-06, "loss": 0.565, "step": 249750 }, { "epoch": 2.77, "learning_rate": 3.908856704166298e-06, "loss": 0.5681, "step": 249755 }, { "epoch": 2.77, "learning_rate": 3.9079339770277854e-06, "loss": 0.5693, "step": 249760 }, { "epoch": 2.77, "learning_rate": 3.907011249889273e-06, "loss": 0.5768, "step": 249765 }, { "epoch": 2.77, "learning_rate": 3.906088522750761e-06, "loss": 0.5712, "step": 249770 }, { "epoch": 2.77, "learning_rate": 3.905165795612248e-06, "loss": 0.6749, "step": 249775 }, { "epoch": 2.77, "learning_rate": 3.904243068473735e-06, "loss": 0.5217, "step": 249780 }, { "epoch": 2.77, "learning_rate": 3.903320341335223e-06, "loss": 0.5658, "step": 249785 }, { "epoch": 2.77, "learning_rate": 3.902397614196711e-06, "loss": 0.5552, "step": 249790 }, { "epoch": 2.77, "learning_rate": 3.901474887058199e-06, "loss": 0.593, "step": 249795 }, { "epoch": 2.77, "learning_rate": 3.9005521599196865e-06, "loss": 0.6034, "step": 249800 }, { "epoch": 2.77, "learning_rate": 3.8996294327811735e-06, "loss": 0.6351, "step": 249805 }, { "epoch": 2.77, "learning_rate": 3.8987067056426605e-06, "loss": 0.5522, "step": 249810 }, { "epoch": 2.77, "learning_rate": 3.897783978504148e-06, "loss": 0.5759, "step": 249815 }, { "epoch": 2.77, "learning_rate": 3.896861251365636e-06, "loss": 0.5582, "step": 249820 }, { "epoch": 2.77, "learning_rate": 3.895938524227124e-06, "loss": 0.5418, "step": 249825 }, { "epoch": 2.77, "learning_rate": 3.895015797088612e-06, "loss": 0.6068, "step": 249830 }, { "epoch": 2.77, "learning_rate": 3.894093069950099e-06, "loss": 0.6468, "step": 249835 }, { "epoch": 2.77, "learning_rate": 3.893170342811587e-06, "loss": 0.6051, "step": 249840 }, { "epoch": 2.77, "learning_rate": 3.892247615673075e-06, "loss": 0.6075, "step": 249845 }, { "epoch": 2.77, "learning_rate": 3.891324888534562e-06, "loss": 0.5807, "step": 249850 }, { "epoch": 2.77, "learning_rate": 3.8904021613960495e-06, "loss": 0.5684, "step": 249855 }, { "epoch": 2.77, "learning_rate": 3.889479434257537e-06, "loss": 0.6042, "step": 249860 }, { "epoch": 2.77, "learning_rate": 3.888556707119024e-06, "loss": 0.6455, "step": 249865 }, { "epoch": 2.77, "learning_rate": 3.887633979980512e-06, "loss": 0.5751, "step": 249870 }, { "epoch": 2.77, "learning_rate": 3.886711252842e-06, "loss": 0.5891, "step": 249875 }, { "epoch": 2.77, "learning_rate": 3.885788525703488e-06, "loss": 0.6197, "step": 249880 }, { "epoch": 2.77, "learning_rate": 3.884865798564975e-06, "loss": 0.5568, "step": 249885 }, { "epoch": 2.77, "learning_rate": 3.883943071426462e-06, "loss": 0.5079, "step": 249890 }, { "epoch": 2.77, "learning_rate": 3.88302034428795e-06, "loss": 0.6338, "step": 249895 }, { "epoch": 2.77, "learning_rate": 3.8820976171494376e-06, "loss": 0.5442, "step": 249900 }, { "epoch": 2.77, "learning_rate": 3.881174890010925e-06, "loss": 0.5448, "step": 249905 }, { "epoch": 2.77, "learning_rate": 3.880252162872413e-06, "loss": 0.5555, "step": 249910 }, { "epoch": 2.77, "learning_rate": 3.8793294357339e-06, "loss": 0.5691, "step": 249915 }, { "epoch": 2.77, "learning_rate": 3.878406708595388e-06, "loss": 0.5487, "step": 249920 }, { "epoch": 2.77, "learning_rate": 3.877483981456875e-06, "loss": 0.6275, "step": 249925 }, { "epoch": 2.77, "learning_rate": 3.876561254318363e-06, "loss": 0.5681, "step": 249930 }, { "epoch": 2.77, "learning_rate": 3.875638527179851e-06, "loss": 0.5735, "step": 249935 }, { "epoch": 2.77, "learning_rate": 3.874715800041339e-06, "loss": 0.5758, "step": 249940 }, { "epoch": 2.77, "learning_rate": 3.873793072902826e-06, "loss": 0.5318, "step": 249945 }, { "epoch": 2.77, "learning_rate": 3.8728703457643135e-06, "loss": 0.5933, "step": 249950 }, { "epoch": 2.77, "learning_rate": 3.871947618625801e-06, "loss": 0.5939, "step": 249955 }, { "epoch": 2.77, "learning_rate": 3.871024891487289e-06, "loss": 0.5646, "step": 249960 }, { "epoch": 2.77, "learning_rate": 3.870102164348776e-06, "loss": 0.5841, "step": 249965 }, { "epoch": 2.77, "learning_rate": 3.869179437210264e-06, "loss": 0.5711, "step": 249970 }, { "epoch": 2.77, "learning_rate": 3.868256710071751e-06, "loss": 0.5461, "step": 249975 }, { "epoch": 2.77, "learning_rate": 3.867333982933239e-06, "loss": 0.5622, "step": 249980 }, { "epoch": 2.77, "learning_rate": 3.866411255794727e-06, "loss": 0.56, "step": 249985 }, { "epoch": 2.77, "learning_rate": 3.865488528656215e-06, "loss": 0.5783, "step": 249990 }, { "epoch": 2.77, "learning_rate": 3.8645658015177025e-06, "loss": 0.6505, "step": 249995 }, { "epoch": 2.77, "learning_rate": 3.8636430743791895e-06, "loss": 0.6051, "step": 250000 }, { "epoch": 2.77, "eval_loss": 0.5446848273277283, "eval_runtime": 69.6144, "eval_samples_per_second": 28.73, "eval_steps_per_second": 14.365, "step": 250000 }, { "epoch": 2.77, "learning_rate": 3.8627203472406765e-06, "loss": 0.57, "step": 250005 }, { "epoch": 2.77, "learning_rate": 3.861797620102164e-06, "loss": 0.5464, "step": 250010 }, { "epoch": 2.77, "learning_rate": 3.860874892963652e-06, "loss": 0.5551, "step": 250015 }, { "epoch": 2.77, "learning_rate": 3.85995216582514e-06, "loss": 0.6107, "step": 250020 }, { "epoch": 2.77, "learning_rate": 3.859029438686627e-06, "loss": 0.6457, "step": 250025 }, { "epoch": 2.77, "learning_rate": 3.858106711548115e-06, "loss": 0.5344, "step": 250030 }, { "epoch": 2.77, "learning_rate": 3.857183984409603e-06, "loss": 0.5862, "step": 250035 }, { "epoch": 2.77, "learning_rate": 3.85626125727109e-06, "loss": 0.5835, "step": 250040 }, { "epoch": 2.77, "learning_rate": 3.8553385301325776e-06, "loss": 0.5812, "step": 250045 }, { "epoch": 2.77, "learning_rate": 3.854415802994065e-06, "loss": 0.5712, "step": 250050 }, { "epoch": 2.77, "learning_rate": 3.853493075855552e-06, "loss": 0.594, "step": 250055 }, { "epoch": 2.77, "learning_rate": 3.85257034871704e-06, "loss": 0.5539, "step": 250060 }, { "epoch": 2.77, "learning_rate": 3.851647621578528e-06, "loss": 0.6179, "step": 250065 }, { "epoch": 2.77, "learning_rate": 3.850724894440016e-06, "loss": 0.5502, "step": 250070 }, { "epoch": 2.77, "learning_rate": 3.849802167301503e-06, "loss": 0.6117, "step": 250075 }, { "epoch": 2.77, "learning_rate": 3.848879440162991e-06, "loss": 0.6073, "step": 250080 }, { "epoch": 2.77, "learning_rate": 3.847956713024478e-06, "loss": 0.5892, "step": 250085 }, { "epoch": 2.77, "learning_rate": 3.847033985885966e-06, "loss": 0.6043, "step": 250090 }, { "epoch": 2.77, "learning_rate": 3.8461112587474535e-06, "loss": 0.5583, "step": 250095 }, { "epoch": 2.77, "learning_rate": 3.845188531608941e-06, "loss": 0.583, "step": 250100 }, { "epoch": 2.77, "learning_rate": 3.844265804470429e-06, "loss": 0.6067, "step": 250105 }, { "epoch": 2.77, "learning_rate": 3.843343077331916e-06, "loss": 0.5724, "step": 250110 }, { "epoch": 2.77, "learning_rate": 3.842420350193403e-06, "loss": 0.5716, "step": 250115 }, { "epoch": 2.77, "learning_rate": 3.841497623054891e-06, "loss": 0.5941, "step": 250120 }, { "epoch": 2.77, "learning_rate": 3.840574895916379e-06, "loss": 0.5435, "step": 250125 }, { "epoch": 2.77, "learning_rate": 3.839652168777867e-06, "loss": 0.5307, "step": 250130 }, { "epoch": 2.77, "learning_rate": 3.838729441639355e-06, "loss": 0.553, "step": 250135 }, { "epoch": 2.77, "learning_rate": 3.837806714500842e-06, "loss": 0.6079, "step": 250140 }, { "epoch": 2.77, "learning_rate": 3.8368839873623295e-06, "loss": 0.571, "step": 250145 }, { "epoch": 2.77, "learning_rate": 3.835961260223817e-06, "loss": 0.5915, "step": 250150 }, { "epoch": 2.77, "learning_rate": 3.835038533085304e-06, "loss": 0.5936, "step": 250155 }, { "epoch": 2.77, "learning_rate": 3.834115805946792e-06, "loss": 0.5116, "step": 250160 }, { "epoch": 2.77, "learning_rate": 3.833193078808279e-06, "loss": 0.5447, "step": 250165 }, { "epoch": 2.77, "learning_rate": 3.832270351669767e-06, "loss": 0.5416, "step": 250170 }, { "epoch": 2.77, "learning_rate": 3.831347624531255e-06, "loss": 0.5425, "step": 250175 }, { "epoch": 2.77, "learning_rate": 3.830424897392743e-06, "loss": 0.5886, "step": 250180 }, { "epoch": 2.77, "learning_rate": 3.8295021702542305e-06, "loss": 0.5468, "step": 250185 }, { "epoch": 2.77, "learning_rate": 3.8285794431157176e-06, "loss": 0.5743, "step": 250190 }, { "epoch": 2.77, "learning_rate": 3.8276567159772046e-06, "loss": 0.5658, "step": 250195 }, { "epoch": 2.77, "learning_rate": 3.826733988838692e-06, "loss": 0.5554, "step": 250200 }, { "epoch": 2.77, "learning_rate": 3.82581126170018e-06, "loss": 0.6159, "step": 250205 }, { "epoch": 2.77, "learning_rate": 3.824888534561668e-06, "loss": 0.5772, "step": 250210 }, { "epoch": 2.77, "learning_rate": 3.823965807423156e-06, "loss": 0.596, "step": 250215 }, { "epoch": 2.77, "learning_rate": 3.823043080284643e-06, "loss": 0.5667, "step": 250220 }, { "epoch": 2.77, "learning_rate": 3.822120353146131e-06, "loss": 0.5327, "step": 250225 }, { "epoch": 2.77, "learning_rate": 3.821197626007618e-06, "loss": 0.5154, "step": 250230 }, { "epoch": 2.77, "learning_rate": 3.820274898869106e-06, "loss": 0.5765, "step": 250235 }, { "epoch": 2.77, "learning_rate": 3.8193521717305935e-06, "loss": 0.5844, "step": 250240 }, { "epoch": 2.77, "learning_rate": 3.818429444592081e-06, "loss": 0.5939, "step": 250245 }, { "epoch": 2.77, "learning_rate": 3.817506717453568e-06, "loss": 0.5917, "step": 250250 }, { "epoch": 2.77, "learning_rate": 3.816583990315056e-06, "loss": 0.5899, "step": 250255 }, { "epoch": 2.77, "learning_rate": 3.815661263176544e-06, "loss": 0.5794, "step": 250260 }, { "epoch": 2.77, "learning_rate": 3.814738536038031e-06, "loss": 0.6058, "step": 250265 }, { "epoch": 2.77, "learning_rate": 3.8138158088995185e-06, "loss": 0.5641, "step": 250270 }, { "epoch": 2.77, "learning_rate": 3.8128930817610063e-06, "loss": 0.5291, "step": 250275 }, { "epoch": 2.77, "learning_rate": 3.811970354622494e-06, "loss": 0.5732, "step": 250280 }, { "epoch": 2.77, "learning_rate": 3.8110476274839816e-06, "loss": 0.5733, "step": 250285 }, { "epoch": 2.77, "learning_rate": 3.8101249003454694e-06, "loss": 0.5613, "step": 250290 }, { "epoch": 2.77, "learning_rate": 3.809202173206957e-06, "loss": 0.6213, "step": 250295 }, { "epoch": 2.77, "learning_rate": 3.8082794460684447e-06, "loss": 0.6282, "step": 250300 }, { "epoch": 2.77, "learning_rate": 3.8073567189299317e-06, "loss": 0.5701, "step": 250305 }, { "epoch": 2.77, "learning_rate": 3.806433991791419e-06, "loss": 0.5895, "step": 250310 }, { "epoch": 2.77, "learning_rate": 3.805511264652907e-06, "loss": 0.6035, "step": 250315 }, { "epoch": 2.77, "learning_rate": 3.804588537514395e-06, "loss": 0.5372, "step": 250320 }, { "epoch": 2.77, "learning_rate": 3.8036658103758823e-06, "loss": 0.5416, "step": 250325 }, { "epoch": 2.77, "learning_rate": 3.80274308323737e-06, "loss": 0.5429, "step": 250330 }, { "epoch": 2.77, "learning_rate": 3.8018203560988575e-06, "loss": 0.5734, "step": 250335 }, { "epoch": 2.77, "learning_rate": 3.8008976289603454e-06, "loss": 0.5768, "step": 250340 }, { "epoch": 2.77, "learning_rate": 3.7999749018218324e-06, "loss": 0.6143, "step": 250345 }, { "epoch": 2.77, "learning_rate": 3.7990521746833202e-06, "loss": 0.5959, "step": 250350 }, { "epoch": 2.77, "learning_rate": 3.7981294475448077e-06, "loss": 0.65, "step": 250355 }, { "epoch": 2.77, "learning_rate": 3.7972067204062955e-06, "loss": 0.4994, "step": 250360 }, { "epoch": 2.77, "learning_rate": 3.796283993267783e-06, "loss": 0.5885, "step": 250365 }, { "epoch": 2.77, "learning_rate": 3.7953612661292708e-06, "loss": 0.5951, "step": 250370 }, { "epoch": 2.77, "learning_rate": 3.7944385389907586e-06, "loss": 0.5549, "step": 250375 }, { "epoch": 2.77, "learning_rate": 3.7935158118522452e-06, "loss": 0.5769, "step": 250380 }, { "epoch": 2.77, "learning_rate": 3.792593084713733e-06, "loss": 0.5766, "step": 250385 }, { "epoch": 2.77, "learning_rate": 3.791670357575221e-06, "loss": 0.5558, "step": 250390 }, { "epoch": 2.77, "learning_rate": 3.7907476304367083e-06, "loss": 0.5852, "step": 250395 }, { "epoch": 2.77, "learning_rate": 3.789824903298196e-06, "loss": 0.6146, "step": 250400 }, { "epoch": 2.77, "learning_rate": 3.7889021761596836e-06, "loss": 0.6124, "step": 250405 }, { "epoch": 2.77, "learning_rate": 3.7879794490211715e-06, "loss": 0.6622, "step": 250410 }, { "epoch": 2.77, "learning_rate": 3.7870567218826593e-06, "loss": 0.5687, "step": 250415 }, { "epoch": 2.77, "learning_rate": 3.7861339947441463e-06, "loss": 0.6516, "step": 250420 }, { "epoch": 2.77, "learning_rate": 3.7852112676056337e-06, "loss": 0.6201, "step": 250425 }, { "epoch": 2.77, "learning_rate": 3.7842885404671216e-06, "loss": 0.5372, "step": 250430 }, { "epoch": 2.77, "learning_rate": 3.783365813328609e-06, "loss": 0.5442, "step": 250435 }, { "epoch": 2.77, "learning_rate": 3.782443086190097e-06, "loss": 0.5911, "step": 250440 }, { "epoch": 2.77, "learning_rate": 3.7815203590515847e-06, "loss": 0.6074, "step": 250445 }, { "epoch": 2.77, "learning_rate": 3.780597631913072e-06, "loss": 0.6219, "step": 250450 }, { "epoch": 2.77, "learning_rate": 3.779674904774559e-06, "loss": 0.5594, "step": 250455 }, { "epoch": 2.77, "learning_rate": 3.778752177636047e-06, "loss": 0.589, "step": 250460 }, { "epoch": 2.77, "learning_rate": 3.7778294504975344e-06, "loss": 0.5746, "step": 250465 }, { "epoch": 2.77, "learning_rate": 3.7769067233590223e-06, "loss": 0.5631, "step": 250470 }, { "epoch": 2.77, "learning_rate": 3.7759839962205097e-06, "loss": 0.6073, "step": 250475 }, { "epoch": 2.77, "learning_rate": 3.7750612690819975e-06, "loss": 0.5782, "step": 250480 }, { "epoch": 2.77, "learning_rate": 3.7741385419434854e-06, "loss": 0.5763, "step": 250485 }, { "epoch": 2.77, "learning_rate": 3.773215814804973e-06, "loss": 0.5565, "step": 250490 }, { "epoch": 2.77, "learning_rate": 3.77229308766646e-06, "loss": 0.5191, "step": 250495 }, { "epoch": 2.77, "learning_rate": 3.7713703605279477e-06, "loss": 0.6091, "step": 250500 }, { "epoch": 2.77, "learning_rate": 3.770447633389435e-06, "loss": 0.5161, "step": 250505 }, { "epoch": 2.77, "learning_rate": 3.769524906250923e-06, "loss": 0.6158, "step": 250510 }, { "epoch": 2.77, "learning_rate": 3.7686021791124108e-06, "loss": 0.5417, "step": 250515 }, { "epoch": 2.77, "learning_rate": 3.767679451973898e-06, "loss": 0.5563, "step": 250520 }, { "epoch": 2.77, "learning_rate": 3.766756724835386e-06, "loss": 0.6043, "step": 250525 }, { "epoch": 2.77, "learning_rate": 3.7658339976968735e-06, "loss": 0.5569, "step": 250530 }, { "epoch": 2.77, "learning_rate": 3.7649112705583605e-06, "loss": 0.6434, "step": 250535 }, { "epoch": 2.77, "learning_rate": 3.7639885434198483e-06, "loss": 0.5573, "step": 250540 }, { "epoch": 2.77, "learning_rate": 3.7630658162813357e-06, "loss": 0.6025, "step": 250545 }, { "epoch": 2.77, "learning_rate": 3.7621430891428236e-06, "loss": 0.5656, "step": 250550 }, { "epoch": 2.77, "learning_rate": 3.7612203620043114e-06, "loss": 0.522, "step": 250555 }, { "epoch": 2.77, "learning_rate": 3.760297634865799e-06, "loss": 0.5946, "step": 250560 }, { "epoch": 2.77, "learning_rate": 3.7593749077272867e-06, "loss": 0.5456, "step": 250565 }, { "epoch": 2.77, "learning_rate": 3.7584521805887737e-06, "loss": 0.5933, "step": 250570 }, { "epoch": 2.77, "learning_rate": 3.757529453450261e-06, "loss": 0.616, "step": 250575 }, { "epoch": 2.77, "learning_rate": 3.756606726311749e-06, "loss": 0.5485, "step": 250580 }, { "epoch": 2.77, "learning_rate": 3.7556839991732364e-06, "loss": 0.6027, "step": 250585 }, { "epoch": 2.77, "learning_rate": 3.7547612720347243e-06, "loss": 0.5705, "step": 250590 }, { "epoch": 2.77, "learning_rate": 3.753838544896212e-06, "loss": 0.5418, "step": 250595 }, { "epoch": 2.77, "learning_rate": 3.7529158177576995e-06, "loss": 0.5434, "step": 250600 }, { "epoch": 2.77, "learning_rate": 3.7519930906191874e-06, "loss": 0.6367, "step": 250605 }, { "epoch": 2.77, "learning_rate": 3.7510703634806744e-06, "loss": 0.6091, "step": 250610 }, { "epoch": 2.77, "learning_rate": 3.750147636342162e-06, "loss": 0.5718, "step": 250615 }, { "epoch": 2.78, "learning_rate": 3.7492249092036497e-06, "loss": 0.6141, "step": 250620 }, { "epoch": 2.78, "learning_rate": 3.7483021820651375e-06, "loss": 0.5772, "step": 250625 }, { "epoch": 2.78, "learning_rate": 3.747379454926625e-06, "loss": 0.5943, "step": 250630 }, { "epoch": 2.78, "learning_rate": 3.746456727788113e-06, "loss": 0.5654, "step": 250635 }, { "epoch": 2.78, "learning_rate": 3.7455340006496002e-06, "loss": 0.5505, "step": 250640 }, { "epoch": 2.78, "learning_rate": 3.7446112735110872e-06, "loss": 0.5509, "step": 250645 }, { "epoch": 2.78, "learning_rate": 3.743688546372575e-06, "loss": 0.557, "step": 250650 }, { "epoch": 2.78, "learning_rate": 3.7427658192340625e-06, "loss": 0.612, "step": 250655 }, { "epoch": 2.78, "learning_rate": 3.7418430920955503e-06, "loss": 0.5392, "step": 250660 }, { "epoch": 2.78, "learning_rate": 3.740920364957038e-06, "loss": 0.5667, "step": 250665 }, { "epoch": 2.78, "learning_rate": 3.7399976378185256e-06, "loss": 0.5502, "step": 250670 }, { "epoch": 2.78, "learning_rate": 3.7390749106800135e-06, "loss": 0.5657, "step": 250675 }, { "epoch": 2.78, "learning_rate": 3.738152183541501e-06, "loss": 0.586, "step": 250680 }, { "epoch": 2.78, "learning_rate": 3.737229456402988e-06, "loss": 0.5204, "step": 250685 }, { "epoch": 2.78, "learning_rate": 3.7363067292644757e-06, "loss": 0.5528, "step": 250690 }, { "epoch": 2.78, "learning_rate": 3.7353840021259636e-06, "loss": 0.5948, "step": 250695 }, { "epoch": 2.78, "learning_rate": 3.734461274987451e-06, "loss": 0.6099, "step": 250700 }, { "epoch": 2.78, "learning_rate": 3.733538547848939e-06, "loss": 0.5922, "step": 250705 }, { "epoch": 2.78, "learning_rate": 3.7326158207104263e-06, "loss": 0.5916, "step": 250710 }, { "epoch": 2.78, "learning_rate": 3.731693093571914e-06, "loss": 0.5487, "step": 250715 }, { "epoch": 2.78, "learning_rate": 3.730770366433401e-06, "loss": 0.5705, "step": 250720 }, { "epoch": 2.78, "learning_rate": 3.7298476392948886e-06, "loss": 0.5123, "step": 250725 }, { "epoch": 2.78, "learning_rate": 3.7289249121563764e-06, "loss": 0.5876, "step": 250730 }, { "epoch": 2.78, "learning_rate": 3.7280021850178643e-06, "loss": 0.5721, "step": 250735 }, { "epoch": 2.78, "learning_rate": 3.7270794578793517e-06, "loss": 0.604, "step": 250740 }, { "epoch": 2.78, "learning_rate": 3.7261567307408395e-06, "loss": 0.5802, "step": 250745 }, { "epoch": 2.78, "learning_rate": 3.725234003602327e-06, "loss": 0.5811, "step": 250750 }, { "epoch": 2.78, "learning_rate": 3.724311276463815e-06, "loss": 0.5161, "step": 250755 }, { "epoch": 2.78, "learning_rate": 3.723388549325302e-06, "loss": 0.598, "step": 250760 }, { "epoch": 2.78, "learning_rate": 3.7224658221867897e-06, "loss": 0.5113, "step": 250765 }, { "epoch": 2.78, "learning_rate": 3.721543095048277e-06, "loss": 0.56, "step": 250770 }, { "epoch": 2.78, "learning_rate": 3.720620367909765e-06, "loss": 0.555, "step": 250775 }, { "epoch": 2.78, "learning_rate": 3.7196976407712524e-06, "loss": 0.5795, "step": 250780 }, { "epoch": 2.78, "learning_rate": 3.71877491363274e-06, "loss": 0.5558, "step": 250785 }, { "epoch": 2.78, "learning_rate": 3.7178521864942276e-06, "loss": 0.5948, "step": 250790 }, { "epoch": 2.78, "learning_rate": 3.7169294593557155e-06, "loss": 0.5957, "step": 250795 }, { "epoch": 2.78, "learning_rate": 3.7160067322172025e-06, "loss": 0.5938, "step": 250800 }, { "epoch": 2.78, "learning_rate": 3.7150840050786903e-06, "loss": 0.5923, "step": 250805 }, { "epoch": 2.78, "learning_rate": 3.7141612779401778e-06, "loss": 0.5657, "step": 250810 }, { "epoch": 2.78, "learning_rate": 3.7132385508016656e-06, "loss": 0.5655, "step": 250815 }, { "epoch": 2.78, "learning_rate": 3.712315823663153e-06, "loss": 0.6095, "step": 250820 }, { "epoch": 2.78, "learning_rate": 3.711393096524641e-06, "loss": 0.5858, "step": 250825 }, { "epoch": 2.78, "learning_rate": 3.7104703693861287e-06, "loss": 0.5324, "step": 250830 }, { "epoch": 2.78, "learning_rate": 3.7095476422476157e-06, "loss": 0.5853, "step": 250835 }, { "epoch": 2.78, "learning_rate": 3.708624915109103e-06, "loss": 0.5784, "step": 250840 }, { "epoch": 2.78, "learning_rate": 3.707702187970591e-06, "loss": 0.5557, "step": 250845 }, { "epoch": 2.78, "learning_rate": 3.7067794608320784e-06, "loss": 0.5464, "step": 250850 }, { "epoch": 2.78, "learning_rate": 3.7058567336935663e-06, "loss": 0.5813, "step": 250855 }, { "epoch": 2.78, "learning_rate": 3.7049340065550537e-06, "loss": 0.5931, "step": 250860 }, { "epoch": 2.78, "learning_rate": 3.7040112794165415e-06, "loss": 0.6051, "step": 250865 }, { "epoch": 2.78, "learning_rate": 3.7030885522780294e-06, "loss": 0.5651, "step": 250870 }, { "epoch": 2.78, "learning_rate": 3.7021658251395164e-06, "loss": 0.6615, "step": 250875 }, { "epoch": 2.78, "learning_rate": 3.701243098001004e-06, "loss": 0.5404, "step": 250880 }, { "epoch": 2.78, "learning_rate": 3.7003203708624917e-06, "loss": 0.5494, "step": 250885 }, { "epoch": 2.78, "learning_rate": 3.699397643723979e-06, "loss": 0.5191, "step": 250890 }, { "epoch": 2.78, "learning_rate": 3.698474916585467e-06, "loss": 0.587, "step": 250895 }, { "epoch": 2.78, "learning_rate": 3.697552189446955e-06, "loss": 0.5893, "step": 250900 }, { "epoch": 2.78, "learning_rate": 3.6966294623084422e-06, "loss": 0.552, "step": 250905 }, { "epoch": 2.78, "learning_rate": 3.6957067351699292e-06, "loss": 0.5827, "step": 250910 }, { "epoch": 2.78, "learning_rate": 3.694784008031417e-06, "loss": 0.595, "step": 250915 }, { "epoch": 2.78, "learning_rate": 3.6938612808929045e-06, "loss": 0.6452, "step": 250920 }, { "epoch": 2.78, "learning_rate": 3.6929385537543923e-06, "loss": 0.5691, "step": 250925 }, { "epoch": 2.78, "learning_rate": 3.6920158266158798e-06, "loss": 0.6098, "step": 250930 }, { "epoch": 2.78, "learning_rate": 3.6910930994773676e-06, "loss": 0.5841, "step": 250935 }, { "epoch": 2.78, "learning_rate": 3.6901703723388555e-06, "loss": 0.5677, "step": 250940 }, { "epoch": 2.78, "learning_rate": 3.689247645200343e-06, "loss": 0.5627, "step": 250945 }, { "epoch": 2.78, "learning_rate": 3.68832491806183e-06, "loss": 0.5501, "step": 250950 }, { "epoch": 2.78, "learning_rate": 3.6874021909233177e-06, "loss": 0.5559, "step": 250955 }, { "epoch": 2.78, "learning_rate": 3.686479463784805e-06, "loss": 0.5788, "step": 250960 }, { "epoch": 2.78, "learning_rate": 3.685556736646293e-06, "loss": 0.5668, "step": 250965 }, { "epoch": 2.78, "learning_rate": 3.684634009507781e-06, "loss": 0.5225, "step": 250970 }, { "epoch": 2.78, "learning_rate": 3.6837112823692683e-06, "loss": 0.6698, "step": 250975 }, { "epoch": 2.78, "learning_rate": 3.682788555230756e-06, "loss": 0.5861, "step": 250980 }, { "epoch": 2.78, "learning_rate": 3.6818658280922436e-06, "loss": 0.5224, "step": 250985 }, { "epoch": 2.78, "learning_rate": 3.6809431009537306e-06, "loss": 0.6168, "step": 250990 }, { "epoch": 2.78, "learning_rate": 3.6800203738152184e-06, "loss": 0.5976, "step": 250995 }, { "epoch": 2.78, "learning_rate": 3.679097646676706e-06, "loss": 0.5866, "step": 251000 }, { "epoch": 2.78, "eval_loss": 0.5534600019454956, "eval_runtime": 69.6917, "eval_samples_per_second": 28.698, "eval_steps_per_second": 14.349, "step": 251000 }, { "epoch": 2.78, "learning_rate": 3.6781749195381937e-06, "loss": 0.5543, "step": 251005 }, { "epoch": 2.78, "learning_rate": 3.6772521923996815e-06, "loss": 0.5426, "step": 251010 }, { "epoch": 2.78, "learning_rate": 3.676329465261169e-06, "loss": 0.5088, "step": 251015 }, { "epoch": 2.78, "learning_rate": 3.675406738122657e-06, "loss": 0.556, "step": 251020 }, { "epoch": 2.78, "learning_rate": 3.674484010984144e-06, "loss": 0.5992, "step": 251025 }, { "epoch": 2.78, "learning_rate": 3.6735612838456312e-06, "loss": 0.6166, "step": 251030 }, { "epoch": 2.78, "learning_rate": 3.672638556707119e-06, "loss": 0.5693, "step": 251035 }, { "epoch": 2.78, "learning_rate": 3.671715829568607e-06, "loss": 0.6066, "step": 251040 }, { "epoch": 2.78, "learning_rate": 3.6707931024300944e-06, "loss": 0.552, "step": 251045 }, { "epoch": 2.78, "learning_rate": 3.669870375291582e-06, "loss": 0.6052, "step": 251050 }, { "epoch": 2.78, "learning_rate": 3.6689476481530696e-06, "loss": 0.5437, "step": 251055 }, { "epoch": 2.78, "learning_rate": 3.6680249210145575e-06, "loss": 0.5739, "step": 251060 }, { "epoch": 2.78, "learning_rate": 3.6671021938760445e-06, "loss": 0.523, "step": 251065 }, { "epoch": 2.78, "learning_rate": 3.666179466737532e-06, "loss": 0.5143, "step": 251070 }, { "epoch": 2.78, "learning_rate": 3.6652567395990198e-06, "loss": 0.5864, "step": 251075 }, { "epoch": 2.78, "learning_rate": 3.6643340124605076e-06, "loss": 0.5794, "step": 251080 }, { "epoch": 2.78, "learning_rate": 3.663411285321995e-06, "loss": 0.5626, "step": 251085 }, { "epoch": 2.78, "learning_rate": 3.662488558183483e-06, "loss": 0.5477, "step": 251090 }, { "epoch": 2.78, "learning_rate": 3.6615658310449703e-06, "loss": 0.535, "step": 251095 }, { "epoch": 2.78, "learning_rate": 3.6606431039064573e-06, "loss": 0.5252, "step": 251100 }, { "epoch": 2.78, "learning_rate": 3.659720376767945e-06, "loss": 0.5735, "step": 251105 }, { "epoch": 2.78, "learning_rate": 3.658797649629433e-06, "loss": 0.6167, "step": 251110 }, { "epoch": 2.78, "learning_rate": 3.6578749224909204e-06, "loss": 0.5668, "step": 251115 }, { "epoch": 2.78, "learning_rate": 3.6569521953524083e-06, "loss": 0.5675, "step": 251120 }, { "epoch": 2.78, "learning_rate": 3.6560294682138957e-06, "loss": 0.5933, "step": 251125 }, { "epoch": 2.78, "learning_rate": 3.6551067410753836e-06, "loss": 0.5853, "step": 251130 }, { "epoch": 2.78, "learning_rate": 3.654184013936871e-06, "loss": 0.6321, "step": 251135 }, { "epoch": 2.78, "learning_rate": 3.653261286798358e-06, "loss": 0.5594, "step": 251140 }, { "epoch": 2.78, "learning_rate": 3.652338559659846e-06, "loss": 0.5313, "step": 251145 }, { "epoch": 2.78, "learning_rate": 3.6514158325213337e-06, "loss": 0.5541, "step": 251150 }, { "epoch": 2.78, "learning_rate": 3.650493105382821e-06, "loss": 0.4884, "step": 251155 }, { "epoch": 2.78, "learning_rate": 3.649570378244309e-06, "loss": 0.5573, "step": 251160 }, { "epoch": 2.78, "learning_rate": 3.6486476511057964e-06, "loss": 0.5245, "step": 251165 }, { "epoch": 2.78, "learning_rate": 3.6477249239672842e-06, "loss": 0.5827, "step": 251170 }, { "epoch": 2.78, "learning_rate": 3.646802196828772e-06, "loss": 0.5849, "step": 251175 }, { "epoch": 2.78, "learning_rate": 3.645879469690259e-06, "loss": 0.5346, "step": 251180 }, { "epoch": 2.78, "learning_rate": 3.6449567425517465e-06, "loss": 0.635, "step": 251185 }, { "epoch": 2.78, "learning_rate": 3.6440340154132343e-06, "loss": 0.5901, "step": 251190 }, { "epoch": 2.78, "learning_rate": 3.6431112882747218e-06, "loss": 0.5995, "step": 251195 }, { "epoch": 2.78, "learning_rate": 3.6421885611362096e-06, "loss": 0.5619, "step": 251200 }, { "epoch": 2.78, "learning_rate": 3.641265833997697e-06, "loss": 0.5361, "step": 251205 }, { "epoch": 2.78, "learning_rate": 3.640343106859185e-06, "loss": 0.6258, "step": 251210 }, { "epoch": 2.78, "learning_rate": 3.639420379720672e-06, "loss": 0.6104, "step": 251215 }, { "epoch": 2.78, "learning_rate": 3.6384976525821597e-06, "loss": 0.5963, "step": 251220 }, { "epoch": 2.78, "learning_rate": 3.637574925443647e-06, "loss": 0.6588, "step": 251225 }, { "epoch": 2.78, "learning_rate": 3.636652198305135e-06, "loss": 0.5784, "step": 251230 }, { "epoch": 2.78, "learning_rate": 3.6357294711666224e-06, "loss": 0.551, "step": 251235 }, { "epoch": 2.78, "learning_rate": 3.6348067440281103e-06, "loss": 0.5507, "step": 251240 }, { "epoch": 2.78, "learning_rate": 3.633884016889598e-06, "loss": 0.5684, "step": 251245 }, { "epoch": 2.78, "learning_rate": 3.6329612897510856e-06, "loss": 0.5791, "step": 251250 }, { "epoch": 2.78, "learning_rate": 3.6320385626125726e-06, "loss": 0.5953, "step": 251255 }, { "epoch": 2.78, "learning_rate": 3.6311158354740604e-06, "loss": 0.5512, "step": 251260 }, { "epoch": 2.78, "learning_rate": 3.630193108335548e-06, "loss": 0.571, "step": 251265 }, { "epoch": 2.78, "learning_rate": 3.6292703811970357e-06, "loss": 0.5524, "step": 251270 }, { "epoch": 2.78, "learning_rate": 3.628347654058523e-06, "loss": 0.5833, "step": 251275 }, { "epoch": 2.78, "learning_rate": 3.627424926920011e-06, "loss": 0.5713, "step": 251280 }, { "epoch": 2.78, "learning_rate": 3.626502199781499e-06, "loss": 0.5849, "step": 251285 }, { "epoch": 2.78, "learning_rate": 3.625579472642986e-06, "loss": 0.5944, "step": 251290 }, { "epoch": 2.78, "learning_rate": 3.6246567455044732e-06, "loss": 0.583, "step": 251295 }, { "epoch": 2.78, "learning_rate": 3.623734018365961e-06, "loss": 0.6254, "step": 251300 }, { "epoch": 2.78, "learning_rate": 3.6228112912274485e-06, "loss": 0.5594, "step": 251305 }, { "epoch": 2.78, "learning_rate": 3.6218885640889364e-06, "loss": 0.6029, "step": 251310 }, { "epoch": 2.78, "learning_rate": 3.620965836950424e-06, "loss": 0.6053, "step": 251315 }, { "epoch": 2.78, "learning_rate": 3.6200431098119116e-06, "loss": 0.6183, "step": 251320 }, { "epoch": 2.78, "learning_rate": 3.6191203826733995e-06, "loss": 0.5956, "step": 251325 }, { "epoch": 2.78, "learning_rate": 3.6181976555348865e-06, "loss": 0.6005, "step": 251330 }, { "epoch": 2.78, "learning_rate": 3.617274928396374e-06, "loss": 0.559, "step": 251335 }, { "epoch": 2.78, "learning_rate": 3.6163522012578618e-06, "loss": 0.58, "step": 251340 }, { "epoch": 2.78, "learning_rate": 3.615429474119349e-06, "loss": 0.5412, "step": 251345 }, { "epoch": 2.78, "learning_rate": 3.614506746980837e-06, "loss": 0.5594, "step": 251350 }, { "epoch": 2.78, "learning_rate": 3.613584019842325e-06, "loss": 0.582, "step": 251355 }, { "epoch": 2.78, "learning_rate": 3.6126612927038123e-06, "loss": 0.6023, "step": 251360 }, { "epoch": 2.78, "learning_rate": 3.6117385655652993e-06, "loss": 0.5241, "step": 251365 }, { "epoch": 2.78, "learning_rate": 3.610815838426787e-06, "loss": 0.5318, "step": 251370 }, { "epoch": 2.78, "learning_rate": 3.6098931112882746e-06, "loss": 0.5391, "step": 251375 }, { "epoch": 2.78, "learning_rate": 3.6089703841497624e-06, "loss": 0.5859, "step": 251380 }, { "epoch": 2.78, "learning_rate": 3.6080476570112503e-06, "loss": 0.6045, "step": 251385 }, { "epoch": 2.78, "learning_rate": 3.6071249298727377e-06, "loss": 0.6289, "step": 251390 }, { "epoch": 2.78, "learning_rate": 3.6062022027342256e-06, "loss": 0.5913, "step": 251395 }, { "epoch": 2.78, "learning_rate": 3.605279475595713e-06, "loss": 0.5615, "step": 251400 }, { "epoch": 2.78, "learning_rate": 3.6043567484572e-06, "loss": 0.5756, "step": 251405 }, { "epoch": 2.78, "learning_rate": 3.603434021318688e-06, "loss": 0.6208, "step": 251410 }, { "epoch": 2.78, "learning_rate": 3.6025112941801753e-06, "loss": 0.5909, "step": 251415 }, { "epoch": 2.78, "learning_rate": 3.601588567041663e-06, "loss": 0.5437, "step": 251420 }, { "epoch": 2.78, "learning_rate": 3.600665839903151e-06, "loss": 0.5801, "step": 251425 }, { "epoch": 2.78, "learning_rate": 3.5997431127646384e-06, "loss": 0.565, "step": 251430 }, { "epoch": 2.78, "learning_rate": 3.5988203856261262e-06, "loss": 0.5785, "step": 251435 }, { "epoch": 2.78, "learning_rate": 3.5978976584876137e-06, "loss": 0.5926, "step": 251440 }, { "epoch": 2.78, "learning_rate": 3.5969749313491007e-06, "loss": 0.5553, "step": 251445 }, { "epoch": 2.78, "learning_rate": 3.5960522042105885e-06, "loss": 0.5824, "step": 251450 }, { "epoch": 2.78, "learning_rate": 3.5951294770720764e-06, "loss": 0.5712, "step": 251455 }, { "epoch": 2.78, "learning_rate": 3.5942067499335638e-06, "loss": 0.5843, "step": 251460 }, { "epoch": 2.78, "learning_rate": 3.5932840227950516e-06, "loss": 0.5803, "step": 251465 }, { "epoch": 2.78, "learning_rate": 3.592361295656539e-06, "loss": 0.5873, "step": 251470 }, { "epoch": 2.78, "learning_rate": 3.591438568518027e-06, "loss": 0.6164, "step": 251475 }, { "epoch": 2.78, "learning_rate": 3.590515841379514e-06, "loss": 0.577, "step": 251480 }, { "epoch": 2.78, "learning_rate": 3.5895931142410013e-06, "loss": 0.5475, "step": 251485 }, { "epoch": 2.78, "learning_rate": 3.588670387102489e-06, "loss": 0.5712, "step": 251490 }, { "epoch": 2.78, "learning_rate": 3.587747659963977e-06, "loss": 0.5958, "step": 251495 }, { "epoch": 2.78, "learning_rate": 3.5868249328254644e-06, "loss": 0.6145, "step": 251500 }, { "epoch": 2.78, "learning_rate": 3.5859022056869523e-06, "loss": 0.6247, "step": 251505 }, { "epoch": 2.78, "learning_rate": 3.5849794785484397e-06, "loss": 0.6299, "step": 251510 }, { "epoch": 2.78, "learning_rate": 3.5840567514099276e-06, "loss": 0.5382, "step": 251515 }, { "epoch": 2.79, "learning_rate": 3.5831340242714146e-06, "loss": 0.5791, "step": 251520 }, { "epoch": 2.79, "learning_rate": 3.5822112971329024e-06, "loss": 0.6033, "step": 251525 }, { "epoch": 2.79, "learning_rate": 3.58128856999439e-06, "loss": 0.5853, "step": 251530 }, { "epoch": 2.79, "learning_rate": 3.5803658428558777e-06, "loss": 0.5376, "step": 251535 }, { "epoch": 2.79, "learning_rate": 3.579443115717365e-06, "loss": 0.5348, "step": 251540 }, { "epoch": 2.79, "learning_rate": 3.578520388578853e-06, "loss": 0.5983, "step": 251545 }, { "epoch": 2.79, "learning_rate": 3.5775976614403404e-06, "loss": 0.537, "step": 251550 }, { "epoch": 2.79, "learning_rate": 3.5766749343018274e-06, "loss": 0.5676, "step": 251555 }, { "epoch": 2.79, "learning_rate": 3.5757522071633152e-06, "loss": 0.5726, "step": 251560 }, { "epoch": 2.79, "learning_rate": 3.574829480024803e-06, "loss": 0.602, "step": 251565 }, { "epoch": 2.79, "learning_rate": 3.5739067528862905e-06, "loss": 0.5866, "step": 251570 }, { "epoch": 2.79, "learning_rate": 3.5729840257477784e-06, "loss": 0.5598, "step": 251575 }, { "epoch": 2.79, "learning_rate": 3.572061298609266e-06, "loss": 0.5387, "step": 251580 }, { "epoch": 2.79, "learning_rate": 3.5711385714707536e-06, "loss": 0.5664, "step": 251585 }, { "epoch": 2.79, "learning_rate": 3.5702158443322415e-06, "loss": 0.5534, "step": 251590 }, { "epoch": 2.79, "learning_rate": 3.569293117193728e-06, "loss": 0.5336, "step": 251595 }, { "epoch": 2.79, "learning_rate": 3.568370390055216e-06, "loss": 0.5606, "step": 251600 }, { "epoch": 2.79, "learning_rate": 3.5674476629167038e-06, "loss": 0.6103, "step": 251605 }, { "epoch": 2.79, "learning_rate": 3.566524935778191e-06, "loss": 0.5986, "step": 251610 }, { "epoch": 2.79, "learning_rate": 3.565602208639679e-06, "loss": 0.5541, "step": 251615 }, { "epoch": 2.79, "learning_rate": 3.5646794815011665e-06, "loss": 0.5742, "step": 251620 }, { "epoch": 2.79, "learning_rate": 3.5637567543626543e-06, "loss": 0.5546, "step": 251625 }, { "epoch": 2.79, "learning_rate": 3.562834027224142e-06, "loss": 0.5242, "step": 251630 }, { "epoch": 2.79, "learning_rate": 3.561911300085629e-06, "loss": 0.6209, "step": 251635 }, { "epoch": 2.79, "learning_rate": 3.5609885729471166e-06, "loss": 0.5745, "step": 251640 }, { "epoch": 2.79, "learning_rate": 3.5600658458086044e-06, "loss": 0.6161, "step": 251645 }, { "epoch": 2.79, "learning_rate": 3.559143118670092e-06, "loss": 0.6, "step": 251650 }, { "epoch": 2.79, "learning_rate": 3.5582203915315797e-06, "loss": 0.5601, "step": 251655 }, { "epoch": 2.79, "learning_rate": 3.5572976643930676e-06, "loss": 0.5419, "step": 251660 }, { "epoch": 2.79, "learning_rate": 3.556374937254555e-06, "loss": 0.5944, "step": 251665 }, { "epoch": 2.79, "learning_rate": 3.555452210116042e-06, "loss": 0.5847, "step": 251670 }, { "epoch": 2.79, "learning_rate": 3.55452948297753e-06, "loss": 0.6391, "step": 251675 }, { "epoch": 2.79, "learning_rate": 3.5536067558390173e-06, "loss": 0.5862, "step": 251680 }, { "epoch": 2.79, "learning_rate": 3.552684028700505e-06, "loss": 0.6019, "step": 251685 }, { "epoch": 2.79, "learning_rate": 3.5517613015619925e-06, "loss": 0.6073, "step": 251690 }, { "epoch": 2.79, "learning_rate": 3.5508385744234804e-06, "loss": 0.5852, "step": 251695 }, { "epoch": 2.79, "learning_rate": 3.5499158472849682e-06, "loss": 0.5561, "step": 251700 }, { "epoch": 2.79, "learning_rate": 3.5489931201464557e-06, "loss": 0.568, "step": 251705 }, { "epoch": 2.79, "learning_rate": 3.5480703930079427e-06, "loss": 0.609, "step": 251710 }, { "epoch": 2.79, "learning_rate": 3.5471476658694305e-06, "loss": 0.5717, "step": 251715 }, { "epoch": 2.79, "learning_rate": 3.546224938730918e-06, "loss": 0.5678, "step": 251720 }, { "epoch": 2.79, "learning_rate": 3.5453022115924058e-06, "loss": 0.6056, "step": 251725 }, { "epoch": 2.79, "learning_rate": 3.5443794844538936e-06, "loss": 0.5574, "step": 251730 }, { "epoch": 2.79, "learning_rate": 3.543456757315381e-06, "loss": 0.5427, "step": 251735 }, { "epoch": 2.79, "learning_rate": 3.542534030176869e-06, "loss": 0.5676, "step": 251740 }, { "epoch": 2.79, "learning_rate": 3.541611303038356e-06, "loss": 0.5967, "step": 251745 }, { "epoch": 2.79, "learning_rate": 3.5406885758998433e-06, "loss": 0.5583, "step": 251750 }, { "epoch": 2.79, "learning_rate": 3.539765848761331e-06, "loss": 0.5579, "step": 251755 }, { "epoch": 2.79, "learning_rate": 3.5388431216228186e-06, "loss": 0.5897, "step": 251760 }, { "epoch": 2.79, "learning_rate": 3.5379203944843065e-06, "loss": 0.6391, "step": 251765 }, { "epoch": 2.79, "learning_rate": 3.5369976673457943e-06, "loss": 0.6135, "step": 251770 }, { "epoch": 2.79, "learning_rate": 3.5360749402072817e-06, "loss": 0.6319, "step": 251775 }, { "epoch": 2.79, "learning_rate": 3.5351522130687696e-06, "loss": 0.6168, "step": 251780 }, { "epoch": 2.79, "learning_rate": 3.5342294859302566e-06, "loss": 0.5973, "step": 251785 }, { "epoch": 2.79, "learning_rate": 3.533306758791744e-06, "loss": 0.5706, "step": 251790 }, { "epoch": 2.79, "learning_rate": 3.532384031653232e-06, "loss": 0.4987, "step": 251795 }, { "epoch": 2.79, "learning_rate": 3.5314613045147197e-06, "loss": 0.5583, "step": 251800 }, { "epoch": 2.79, "learning_rate": 3.530538577376207e-06, "loss": 0.5553, "step": 251805 }, { "epoch": 2.79, "learning_rate": 3.529615850237695e-06, "loss": 0.5515, "step": 251810 }, { "epoch": 2.79, "learning_rate": 3.5286931230991824e-06, "loss": 0.5777, "step": 251815 }, { "epoch": 2.79, "learning_rate": 3.5277703959606702e-06, "loss": 0.5379, "step": 251820 }, { "epoch": 2.79, "learning_rate": 3.5268476688221572e-06, "loss": 0.5732, "step": 251825 }, { "epoch": 2.79, "learning_rate": 3.5259249416836447e-06, "loss": 0.6093, "step": 251830 }, { "epoch": 2.79, "learning_rate": 3.5250022145451325e-06, "loss": 0.6019, "step": 251835 }, { "epoch": 2.79, "learning_rate": 3.5240794874066204e-06, "loss": 0.5947, "step": 251840 }, { "epoch": 2.79, "learning_rate": 3.523156760268108e-06, "loss": 0.568, "step": 251845 }, { "epoch": 2.79, "learning_rate": 3.5222340331295956e-06, "loss": 0.572, "step": 251850 }, { "epoch": 2.79, "learning_rate": 3.521311305991083e-06, "loss": 0.5755, "step": 251855 }, { "epoch": 2.79, "learning_rate": 3.52038857885257e-06, "loss": 0.5452, "step": 251860 }, { "epoch": 2.79, "learning_rate": 3.519465851714058e-06, "loss": 0.5454, "step": 251865 }, { "epoch": 2.79, "learning_rate": 3.5185431245755453e-06, "loss": 0.6357, "step": 251870 }, { "epoch": 2.79, "learning_rate": 3.517620397437033e-06, "loss": 0.6, "step": 251875 }, { "epoch": 2.79, "learning_rate": 3.516697670298521e-06, "loss": 0.615, "step": 251880 }, { "epoch": 2.79, "learning_rate": 3.5157749431600085e-06, "loss": 0.5461, "step": 251885 }, { "epoch": 2.79, "learning_rate": 3.5148522160214963e-06, "loss": 0.6247, "step": 251890 }, { "epoch": 2.79, "learning_rate": 3.5139294888829837e-06, "loss": 0.5483, "step": 251895 }, { "epoch": 2.79, "learning_rate": 3.5130067617444707e-06, "loss": 0.5762, "step": 251900 }, { "epoch": 2.79, "learning_rate": 3.5120840346059586e-06, "loss": 0.5754, "step": 251905 }, { "epoch": 2.79, "learning_rate": 3.5111613074674464e-06, "loss": 0.6318, "step": 251910 }, { "epoch": 2.79, "learning_rate": 3.510238580328934e-06, "loss": 0.583, "step": 251915 }, { "epoch": 2.79, "learning_rate": 3.5093158531904217e-06, "loss": 0.5721, "step": 251920 }, { "epoch": 2.79, "learning_rate": 3.508393126051909e-06, "loss": 0.5755, "step": 251925 }, { "epoch": 2.79, "learning_rate": 3.507470398913397e-06, "loss": 0.5563, "step": 251930 }, { "epoch": 2.79, "learning_rate": 3.506547671774884e-06, "loss": 0.5594, "step": 251935 }, { "epoch": 2.79, "learning_rate": 3.5056249446363714e-06, "loss": 0.5998, "step": 251940 }, { "epoch": 2.79, "learning_rate": 3.5047022174978593e-06, "loss": 0.5729, "step": 251945 }, { "epoch": 2.79, "learning_rate": 3.503779490359347e-06, "loss": 0.5981, "step": 251950 }, { "epoch": 2.79, "learning_rate": 3.5028567632208345e-06, "loss": 0.5419, "step": 251955 }, { "epoch": 2.79, "learning_rate": 3.5019340360823224e-06, "loss": 0.6566, "step": 251960 }, { "epoch": 2.79, "learning_rate": 3.50101130894381e-06, "loss": 0.5627, "step": 251965 }, { "epoch": 2.79, "learning_rate": 3.5000885818052977e-06, "loss": 0.557, "step": 251970 }, { "epoch": 2.79, "learning_rate": 3.4991658546667847e-06, "loss": 0.6241, "step": 251975 }, { "epoch": 2.79, "learning_rate": 3.4982431275282725e-06, "loss": 0.5416, "step": 251980 }, { "epoch": 2.79, "learning_rate": 3.49732040038976e-06, "loss": 0.5692, "step": 251985 }, { "epoch": 2.79, "learning_rate": 3.4963976732512478e-06, "loss": 0.5675, "step": 251990 }, { "epoch": 2.79, "learning_rate": 3.495474946112735e-06, "loss": 0.5886, "step": 251995 }, { "epoch": 2.79, "learning_rate": 3.494552218974223e-06, "loss": 0.5348, "step": 252000 }, { "epoch": 2.79, "eval_loss": 0.5377010703086853, "eval_runtime": 69.753, "eval_samples_per_second": 28.673, "eval_steps_per_second": 14.336, "step": 252000 }, { "epoch": 2.79, "learning_rate": 3.493629491835711e-06, "loss": 0.6542, "step": 252005 }, { "epoch": 2.79, "learning_rate": 3.4927067646971983e-06, "loss": 0.5697, "step": 252010 }, { "epoch": 2.79, "learning_rate": 3.4917840375586853e-06, "loss": 0.5746, "step": 252015 }, { "epoch": 2.79, "learning_rate": 3.490861310420173e-06, "loss": 0.5752, "step": 252020 }, { "epoch": 2.79, "learning_rate": 3.4899385832816606e-06, "loss": 0.5748, "step": 252025 }, { "epoch": 2.79, "learning_rate": 3.4890158561431485e-06, "loss": 0.5955, "step": 252030 }, { "epoch": 2.79, "learning_rate": 3.488093129004636e-06, "loss": 0.5437, "step": 252035 }, { "epoch": 2.79, "learning_rate": 3.4871704018661237e-06, "loss": 0.5107, "step": 252040 }, { "epoch": 2.79, "learning_rate": 3.4862476747276116e-06, "loss": 0.5921, "step": 252045 }, { "epoch": 2.79, "learning_rate": 3.4853249475890986e-06, "loss": 0.5896, "step": 252050 }, { "epoch": 2.79, "learning_rate": 3.484402220450586e-06, "loss": 0.5659, "step": 252055 }, { "epoch": 2.79, "learning_rate": 3.483479493312074e-06, "loss": 0.5503, "step": 252060 }, { "epoch": 2.79, "learning_rate": 3.4825567661735613e-06, "loss": 0.5443, "step": 252065 }, { "epoch": 2.79, "learning_rate": 3.481634039035049e-06, "loss": 0.6377, "step": 252070 }, { "epoch": 2.79, "learning_rate": 3.4807113118965366e-06, "loss": 0.5886, "step": 252075 }, { "epoch": 2.79, "learning_rate": 3.4797885847580244e-06, "loss": 0.5896, "step": 252080 }, { "epoch": 2.79, "learning_rate": 3.4788658576195122e-06, "loss": 0.5785, "step": 252085 }, { "epoch": 2.79, "learning_rate": 3.4779431304809993e-06, "loss": 0.5529, "step": 252090 }, { "epoch": 2.79, "learning_rate": 3.4770204033424867e-06, "loss": 0.5528, "step": 252095 }, { "epoch": 2.79, "learning_rate": 3.4760976762039745e-06, "loss": 0.6057, "step": 252100 }, { "epoch": 2.79, "learning_rate": 3.475174949065462e-06, "loss": 0.5387, "step": 252105 }, { "epoch": 2.79, "learning_rate": 3.47425222192695e-06, "loss": 0.6298, "step": 252110 }, { "epoch": 2.79, "learning_rate": 3.4733294947884376e-06, "loss": 0.6144, "step": 252115 }, { "epoch": 2.79, "learning_rate": 3.472406767649925e-06, "loss": 0.5983, "step": 252120 }, { "epoch": 2.79, "learning_rate": 3.471484040511412e-06, "loss": 0.5578, "step": 252125 }, { "epoch": 2.79, "learning_rate": 3.4705613133729e-06, "loss": 0.6177, "step": 252130 }, { "epoch": 2.79, "learning_rate": 3.4696385862343873e-06, "loss": 0.5617, "step": 252135 }, { "epoch": 2.79, "learning_rate": 3.468715859095875e-06, "loss": 0.5685, "step": 252140 }, { "epoch": 2.79, "learning_rate": 3.4677931319573626e-06, "loss": 0.6002, "step": 252145 }, { "epoch": 2.79, "learning_rate": 3.4668704048188505e-06, "loss": 0.5535, "step": 252150 }, { "epoch": 2.79, "learning_rate": 3.4659476776803383e-06, "loss": 0.6076, "step": 252155 }, { "epoch": 2.79, "learning_rate": 3.4650249505418257e-06, "loss": 0.5835, "step": 252160 }, { "epoch": 2.79, "learning_rate": 3.4641022234033127e-06, "loss": 0.5575, "step": 252165 }, { "epoch": 2.79, "learning_rate": 3.4631794962648006e-06, "loss": 0.5586, "step": 252170 }, { "epoch": 2.79, "learning_rate": 3.462256769126288e-06, "loss": 0.6047, "step": 252175 }, { "epoch": 2.79, "learning_rate": 3.461334041987776e-06, "loss": 0.5737, "step": 252180 }, { "epoch": 2.79, "learning_rate": 3.4604113148492637e-06, "loss": 0.5968, "step": 252185 }, { "epoch": 2.79, "learning_rate": 3.459488587710751e-06, "loss": 0.5862, "step": 252190 }, { "epoch": 2.79, "learning_rate": 3.458565860572239e-06, "loss": 0.5642, "step": 252195 }, { "epoch": 2.79, "learning_rate": 3.457643133433726e-06, "loss": 0.5784, "step": 252200 }, { "epoch": 2.79, "learning_rate": 3.4567204062952134e-06, "loss": 0.5401, "step": 252205 }, { "epoch": 2.79, "learning_rate": 3.4557976791567013e-06, "loss": 0.5515, "step": 252210 }, { "epoch": 2.79, "learning_rate": 3.4548749520181887e-06, "loss": 0.5687, "step": 252215 }, { "epoch": 2.79, "learning_rate": 3.4539522248796765e-06, "loss": 0.5574, "step": 252220 }, { "epoch": 2.79, "learning_rate": 3.4530294977411644e-06, "loss": 0.569, "step": 252225 }, { "epoch": 2.79, "learning_rate": 3.452106770602652e-06, "loss": 0.5335, "step": 252230 }, { "epoch": 2.79, "learning_rate": 3.4511840434641397e-06, "loss": 0.5572, "step": 252235 }, { "epoch": 2.79, "learning_rate": 3.4502613163256267e-06, "loss": 0.5664, "step": 252240 }, { "epoch": 2.79, "learning_rate": 3.449338589187114e-06, "loss": 0.5876, "step": 252245 }, { "epoch": 2.79, "learning_rate": 3.448415862048602e-06, "loss": 0.5648, "step": 252250 }, { "epoch": 2.79, "learning_rate": 3.4474931349100898e-06, "loss": 0.5869, "step": 252255 }, { "epoch": 2.79, "learning_rate": 3.4465704077715772e-06, "loss": 0.5843, "step": 252260 }, { "epoch": 2.79, "learning_rate": 3.445647680633065e-06, "loss": 0.5194, "step": 252265 }, { "epoch": 2.79, "learning_rate": 3.4447249534945525e-06, "loss": 0.5677, "step": 252270 }, { "epoch": 2.79, "learning_rate": 3.4438022263560403e-06, "loss": 0.6264, "step": 252275 }, { "epoch": 2.79, "learning_rate": 3.4428794992175273e-06, "loss": 0.6074, "step": 252280 }, { "epoch": 2.79, "learning_rate": 3.4419567720790148e-06, "loss": 0.6304, "step": 252285 }, { "epoch": 2.79, "learning_rate": 3.4410340449405026e-06, "loss": 0.5732, "step": 252290 }, { "epoch": 2.79, "learning_rate": 3.4401113178019905e-06, "loss": 0.585, "step": 252295 }, { "epoch": 2.79, "learning_rate": 3.439188590663478e-06, "loss": 0.587, "step": 252300 }, { "epoch": 2.79, "learning_rate": 3.4382658635249657e-06, "loss": 0.5859, "step": 252305 }, { "epoch": 2.79, "learning_rate": 3.437343136386453e-06, "loss": 0.5728, "step": 252310 }, { "epoch": 2.79, "learning_rate": 3.43642040924794e-06, "loss": 0.5242, "step": 252315 }, { "epoch": 2.79, "learning_rate": 3.435497682109428e-06, "loss": 0.5888, "step": 252320 }, { "epoch": 2.79, "learning_rate": 3.434574954970916e-06, "loss": 0.5594, "step": 252325 }, { "epoch": 2.79, "learning_rate": 3.4336522278324033e-06, "loss": 0.5866, "step": 252330 }, { "epoch": 2.79, "learning_rate": 3.432729500693891e-06, "loss": 0.5575, "step": 252335 }, { "epoch": 2.79, "learning_rate": 3.4318067735553786e-06, "loss": 0.5876, "step": 252340 }, { "epoch": 2.79, "learning_rate": 3.4308840464168664e-06, "loss": 0.5814, "step": 252345 }, { "epoch": 2.79, "learning_rate": 3.429961319278354e-06, "loss": 0.6096, "step": 252350 }, { "epoch": 2.79, "learning_rate": 3.429038592139841e-06, "loss": 0.5228, "step": 252355 }, { "epoch": 2.79, "learning_rate": 3.4281158650013287e-06, "loss": 0.5926, "step": 252360 }, { "epoch": 2.79, "learning_rate": 3.4271931378628165e-06, "loss": 0.5725, "step": 252365 }, { "epoch": 2.79, "learning_rate": 3.426270410724304e-06, "loss": 0.5209, "step": 252370 }, { "epoch": 2.79, "learning_rate": 3.425347683585792e-06, "loss": 0.6075, "step": 252375 }, { "epoch": 2.79, "learning_rate": 3.4244249564472792e-06, "loss": 0.5978, "step": 252380 }, { "epoch": 2.79, "learning_rate": 3.423502229308767e-06, "loss": 0.5446, "step": 252385 }, { "epoch": 2.79, "learning_rate": 3.422579502170254e-06, "loss": 0.6116, "step": 252390 }, { "epoch": 2.79, "learning_rate": 3.421656775031742e-06, "loss": 0.5472, "step": 252395 }, { "epoch": 2.79, "learning_rate": 3.4207340478932294e-06, "loss": 0.5643, "step": 252400 }, { "epoch": 2.79, "learning_rate": 3.419811320754717e-06, "loss": 0.5587, "step": 252405 }, { "epoch": 2.79, "learning_rate": 3.4188885936162046e-06, "loss": 0.5702, "step": 252410 }, { "epoch": 2.79, "learning_rate": 3.4179658664776925e-06, "loss": 0.5882, "step": 252415 }, { "epoch": 2.79, "learning_rate": 3.41704313933918e-06, "loss": 0.634, "step": 252420 }, { "epoch": 2.8, "learning_rate": 3.4161204122006677e-06, "loss": 0.5841, "step": 252425 }, { "epoch": 2.8, "learning_rate": 3.4151976850621548e-06, "loss": 0.6132, "step": 252430 }, { "epoch": 2.8, "learning_rate": 3.4142749579236426e-06, "loss": 0.5735, "step": 252435 }, { "epoch": 2.8, "learning_rate": 3.41335223078513e-06, "loss": 0.5864, "step": 252440 }, { "epoch": 2.8, "learning_rate": 3.412429503646618e-06, "loss": 0.5939, "step": 252445 }, { "epoch": 2.8, "learning_rate": 3.4115067765081053e-06, "loss": 0.55, "step": 252450 }, { "epoch": 2.8, "learning_rate": 3.410584049369593e-06, "loss": 0.5711, "step": 252455 }, { "epoch": 2.8, "learning_rate": 3.409661322231081e-06, "loss": 0.5592, "step": 252460 }, { "epoch": 2.8, "learning_rate": 3.4087385950925684e-06, "loss": 0.5939, "step": 252465 }, { "epoch": 2.8, "learning_rate": 3.4078158679540554e-06, "loss": 0.5701, "step": 252470 }, { "epoch": 2.8, "learning_rate": 3.4068931408155433e-06, "loss": 0.5128, "step": 252475 }, { "epoch": 2.8, "learning_rate": 3.4059704136770307e-06, "loss": 0.5884, "step": 252480 }, { "epoch": 2.8, "learning_rate": 3.4050476865385185e-06, "loss": 0.5752, "step": 252485 }, { "epoch": 2.8, "learning_rate": 3.404124959400006e-06, "loss": 0.5612, "step": 252490 }, { "epoch": 2.8, "learning_rate": 3.403202232261494e-06, "loss": 0.5721, "step": 252495 }, { "epoch": 2.8, "learning_rate": 3.4022795051229817e-06, "loss": 0.5524, "step": 252500 }, { "epoch": 2.8, "learning_rate": 3.4013567779844687e-06, "loss": 0.6248, "step": 252505 }, { "epoch": 2.8, "learning_rate": 3.400434050845956e-06, "loss": 0.5864, "step": 252510 }, { "epoch": 2.8, "learning_rate": 3.399511323707444e-06, "loss": 0.5797, "step": 252515 }, { "epoch": 2.8, "learning_rate": 3.3985885965689314e-06, "loss": 0.5924, "step": 252520 }, { "epoch": 2.8, "learning_rate": 3.3976658694304192e-06, "loss": 0.592, "step": 252525 }, { "epoch": 2.8, "learning_rate": 3.396743142291907e-06, "loss": 0.5767, "step": 252530 }, { "epoch": 2.8, "learning_rate": 3.3958204151533945e-06, "loss": 0.6219, "step": 252535 }, { "epoch": 2.8, "learning_rate": 3.3948976880148823e-06, "loss": 0.5336, "step": 252540 }, { "epoch": 2.8, "learning_rate": 3.3939749608763693e-06, "loss": 0.5917, "step": 252545 }, { "epoch": 2.8, "learning_rate": 3.3930522337378568e-06, "loss": 0.5589, "step": 252550 }, { "epoch": 2.8, "learning_rate": 3.3921295065993446e-06, "loss": 0.6351, "step": 252555 }, { "epoch": 2.8, "learning_rate": 3.391206779460832e-06, "loss": 0.5836, "step": 252560 }, { "epoch": 2.8, "learning_rate": 3.39028405232232e-06, "loss": 0.5941, "step": 252565 }, { "epoch": 2.8, "learning_rate": 3.3893613251838077e-06, "loss": 0.5635, "step": 252570 }, { "epoch": 2.8, "learning_rate": 3.388438598045295e-06, "loss": 0.5638, "step": 252575 }, { "epoch": 2.8, "learning_rate": 3.387515870906782e-06, "loss": 0.5743, "step": 252580 }, { "epoch": 2.8, "learning_rate": 3.38659314376827e-06, "loss": 0.6374, "step": 252585 }, { "epoch": 2.8, "learning_rate": 3.3856704166297574e-06, "loss": 0.5676, "step": 252590 }, { "epoch": 2.8, "learning_rate": 3.3847476894912453e-06, "loss": 0.632, "step": 252595 }, { "epoch": 2.8, "learning_rate": 3.383824962352733e-06, "loss": 0.5948, "step": 252600 }, { "epoch": 2.8, "learning_rate": 3.3829022352142206e-06, "loss": 0.5514, "step": 252605 }, { "epoch": 2.8, "learning_rate": 3.3819795080757084e-06, "loss": 0.6133, "step": 252610 }, { "epoch": 2.8, "learning_rate": 3.381056780937196e-06, "loss": 0.5529, "step": 252615 }, { "epoch": 2.8, "learning_rate": 3.380134053798683e-06, "loss": 0.5892, "step": 252620 }, { "epoch": 2.8, "learning_rate": 3.3792113266601707e-06, "loss": 0.5582, "step": 252625 }, { "epoch": 2.8, "learning_rate": 3.378288599521658e-06, "loss": 0.6005, "step": 252630 }, { "epoch": 2.8, "learning_rate": 3.377365872383146e-06, "loss": 0.5802, "step": 252635 }, { "epoch": 2.8, "learning_rate": 3.376443145244634e-06, "loss": 0.5895, "step": 252640 }, { "epoch": 2.8, "learning_rate": 3.3755204181061212e-06, "loss": 0.6098, "step": 252645 }, { "epoch": 2.8, "learning_rate": 3.374597690967609e-06, "loss": 0.5741, "step": 252650 }, { "epoch": 2.8, "learning_rate": 3.3736749638290965e-06, "loss": 0.5438, "step": 252655 }, { "epoch": 2.8, "learning_rate": 3.3727522366905835e-06, "loss": 0.5349, "step": 252660 }, { "epoch": 2.8, "learning_rate": 3.3718295095520714e-06, "loss": 0.5776, "step": 252665 }, { "epoch": 2.8, "learning_rate": 3.370906782413559e-06, "loss": 0.5786, "step": 252670 }, { "epoch": 2.8, "learning_rate": 3.3699840552750466e-06, "loss": 0.5448, "step": 252675 }, { "epoch": 2.8, "learning_rate": 3.3690613281365345e-06, "loss": 0.5935, "step": 252680 }, { "epoch": 2.8, "learning_rate": 3.368138600998022e-06, "loss": 0.5827, "step": 252685 }, { "epoch": 2.8, "learning_rate": 3.3672158738595098e-06, "loss": 0.5502, "step": 252690 }, { "epoch": 2.8, "learning_rate": 3.3662931467209968e-06, "loss": 0.5802, "step": 252695 }, { "epoch": 2.8, "learning_rate": 3.365370419582484e-06, "loss": 0.5217, "step": 252700 }, { "epoch": 2.8, "learning_rate": 3.364447692443972e-06, "loss": 0.5736, "step": 252705 }, { "epoch": 2.8, "learning_rate": 3.36352496530546e-06, "loss": 0.5731, "step": 252710 }, { "epoch": 2.8, "learning_rate": 3.3626022381669473e-06, "loss": 0.5197, "step": 252715 }, { "epoch": 2.8, "learning_rate": 3.361679511028435e-06, "loss": 0.6251, "step": 252720 }, { "epoch": 2.8, "learning_rate": 3.3607567838899226e-06, "loss": 0.6682, "step": 252725 }, { "epoch": 2.8, "learning_rate": 3.3598340567514104e-06, "loss": 0.6364, "step": 252730 }, { "epoch": 2.8, "learning_rate": 3.3589113296128974e-06, "loss": 0.6423, "step": 252735 }, { "epoch": 2.8, "learning_rate": 3.3579886024743853e-06, "loss": 0.5958, "step": 252740 }, { "epoch": 2.8, "learning_rate": 3.3570658753358727e-06, "loss": 0.5782, "step": 252745 }, { "epoch": 2.8, "learning_rate": 3.3561431481973605e-06, "loss": 0.594, "step": 252750 }, { "epoch": 2.8, "learning_rate": 3.355220421058848e-06, "loss": 0.5703, "step": 252755 }, { "epoch": 2.8, "learning_rate": 3.354297693920336e-06, "loss": 0.5397, "step": 252760 }, { "epoch": 2.8, "learning_rate": 3.3533749667818232e-06, "loss": 0.6293, "step": 252765 }, { "epoch": 2.8, "learning_rate": 3.3524522396433102e-06, "loss": 0.5338, "step": 252770 }, { "epoch": 2.8, "learning_rate": 3.351529512504798e-06, "loss": 0.6201, "step": 252775 }, { "epoch": 2.8, "learning_rate": 3.350606785366286e-06, "loss": 0.5544, "step": 252780 }, { "epoch": 2.8, "learning_rate": 3.3496840582277734e-06, "loss": 0.606, "step": 252785 }, { "epoch": 2.8, "learning_rate": 3.3487613310892612e-06, "loss": 0.5929, "step": 252790 }, { "epoch": 2.8, "learning_rate": 3.3478386039507486e-06, "loss": 0.5847, "step": 252795 }, { "epoch": 2.8, "learning_rate": 3.3469158768122365e-06, "loss": 0.558, "step": 252800 }, { "epoch": 2.8, "learning_rate": 3.3459931496737243e-06, "loss": 0.597, "step": 252805 }, { "epoch": 2.8, "learning_rate": 3.3450704225352113e-06, "loss": 0.5868, "step": 252810 }, { "epoch": 2.8, "learning_rate": 3.3441476953966988e-06, "loss": 0.6355, "step": 252815 }, { "epoch": 2.8, "learning_rate": 3.3432249682581866e-06, "loss": 0.5972, "step": 252820 }, { "epoch": 2.8, "learning_rate": 3.342302241119674e-06, "loss": 0.5955, "step": 252825 }, { "epoch": 2.8, "learning_rate": 3.341379513981162e-06, "loss": 0.5813, "step": 252830 }, { "epoch": 2.8, "learning_rate": 3.3404567868426493e-06, "loss": 0.5716, "step": 252835 }, { "epoch": 2.8, "learning_rate": 3.339534059704137e-06, "loss": 0.5964, "step": 252840 }, { "epoch": 2.8, "learning_rate": 3.338611332565625e-06, "loss": 0.5919, "step": 252845 }, { "epoch": 2.8, "learning_rate": 3.337688605427112e-06, "loss": 0.553, "step": 252850 }, { "epoch": 2.8, "learning_rate": 3.3367658782885994e-06, "loss": 0.5386, "step": 252855 }, { "epoch": 2.8, "learning_rate": 3.3358431511500873e-06, "loss": 0.6315, "step": 252860 }, { "epoch": 2.8, "learning_rate": 3.3349204240115747e-06, "loss": 0.581, "step": 252865 }, { "epoch": 2.8, "learning_rate": 3.3339976968730626e-06, "loss": 0.5651, "step": 252870 }, { "epoch": 2.8, "learning_rate": 3.3330749697345504e-06, "loss": 0.5741, "step": 252875 }, { "epoch": 2.8, "learning_rate": 3.332152242596038e-06, "loss": 0.6137, "step": 252880 }, { "epoch": 2.8, "learning_rate": 3.331229515457525e-06, "loss": 0.5487, "step": 252885 }, { "epoch": 2.8, "learning_rate": 3.3303067883190127e-06, "loss": 0.5666, "step": 252890 }, { "epoch": 2.8, "learning_rate": 3.3293840611805e-06, "loss": 0.5696, "step": 252895 }, { "epoch": 2.8, "learning_rate": 3.328461334041988e-06, "loss": 0.5652, "step": 252900 }, { "epoch": 2.8, "learning_rate": 3.3275386069034754e-06, "loss": 0.7108, "step": 252905 }, { "epoch": 2.8, "learning_rate": 3.3266158797649632e-06, "loss": 0.5827, "step": 252910 }, { "epoch": 2.8, "learning_rate": 3.325693152626451e-06, "loss": 0.545, "step": 252915 }, { "epoch": 2.8, "learning_rate": 3.3247704254879385e-06, "loss": 0.6426, "step": 252920 }, { "epoch": 2.8, "learning_rate": 3.3238476983494255e-06, "loss": 0.5813, "step": 252925 }, { "epoch": 2.8, "learning_rate": 3.3229249712109134e-06, "loss": 0.5414, "step": 252930 }, { "epoch": 2.8, "learning_rate": 3.3220022440724008e-06, "loss": 0.5627, "step": 252935 }, { "epoch": 2.8, "learning_rate": 3.3210795169338886e-06, "loss": 0.6124, "step": 252940 }, { "epoch": 2.8, "learning_rate": 3.3201567897953765e-06, "loss": 0.5979, "step": 252945 }, { "epoch": 2.8, "learning_rate": 3.319234062656864e-06, "loss": 0.5502, "step": 252950 }, { "epoch": 2.8, "learning_rate": 3.3183113355183518e-06, "loss": 0.605, "step": 252955 }, { "epoch": 2.8, "learning_rate": 3.3173886083798388e-06, "loss": 0.5896, "step": 252960 }, { "epoch": 2.8, "learning_rate": 3.316465881241326e-06, "loss": 0.5567, "step": 252965 }, { "epoch": 2.8, "learning_rate": 3.315543154102814e-06, "loss": 0.5837, "step": 252970 }, { "epoch": 2.8, "learning_rate": 3.3146204269643015e-06, "loss": 0.6232, "step": 252975 }, { "epoch": 2.8, "learning_rate": 3.3136976998257893e-06, "loss": 0.6316, "step": 252980 }, { "epoch": 2.8, "learning_rate": 3.312774972687277e-06, "loss": 0.5881, "step": 252985 }, { "epoch": 2.8, "learning_rate": 3.3118522455487646e-06, "loss": 0.5484, "step": 252990 }, { "epoch": 2.8, "learning_rate": 3.3109295184102524e-06, "loss": 0.6106, "step": 252995 }, { "epoch": 2.8, "learning_rate": 3.3100067912717394e-06, "loss": 0.5606, "step": 253000 }, { "epoch": 2.8, "eval_loss": 0.5524073243141174, "eval_runtime": 69.7059, "eval_samples_per_second": 28.692, "eval_steps_per_second": 14.346, "step": 253000 }, { "epoch": 2.8, "learning_rate": 3.309084064133227e-06, "loss": 0.5561, "step": 253005 }, { "epoch": 2.8, "learning_rate": 3.3081613369947147e-06, "loss": 0.58, "step": 253010 }, { "epoch": 2.8, "learning_rate": 3.3072386098562026e-06, "loss": 0.583, "step": 253015 }, { "epoch": 2.8, "learning_rate": 3.30631588271769e-06, "loss": 0.5591, "step": 253020 }, { "epoch": 2.8, "learning_rate": 3.305393155579178e-06, "loss": 0.5245, "step": 253025 }, { "epoch": 2.8, "learning_rate": 3.3044704284406653e-06, "loss": 0.5718, "step": 253030 }, { "epoch": 2.8, "learning_rate": 3.3035477013021523e-06, "loss": 0.561, "step": 253035 }, { "epoch": 2.8, "learning_rate": 3.30262497416364e-06, "loss": 0.5782, "step": 253040 }, { "epoch": 2.8, "learning_rate": 3.3017022470251275e-06, "loss": 0.5678, "step": 253045 }, { "epoch": 2.8, "learning_rate": 3.3007795198866154e-06, "loss": 0.4924, "step": 253050 }, { "epoch": 2.8, "learning_rate": 3.2998567927481032e-06, "loss": 0.5721, "step": 253055 }, { "epoch": 2.8, "learning_rate": 3.2989340656095906e-06, "loss": 0.5684, "step": 253060 }, { "epoch": 2.8, "learning_rate": 3.2980113384710785e-06, "loss": 0.5696, "step": 253065 }, { "epoch": 2.8, "learning_rate": 3.297088611332566e-06, "loss": 0.574, "step": 253070 }, { "epoch": 2.8, "learning_rate": 3.296165884194053e-06, "loss": 0.5763, "step": 253075 }, { "epoch": 2.8, "learning_rate": 3.2952431570555408e-06, "loss": 0.6175, "step": 253080 }, { "epoch": 2.8, "learning_rate": 3.2943204299170286e-06, "loss": 0.6179, "step": 253085 }, { "epoch": 2.8, "learning_rate": 3.293397702778516e-06, "loss": 0.5457, "step": 253090 }, { "epoch": 2.8, "learning_rate": 3.292474975640004e-06, "loss": 0.6131, "step": 253095 }, { "epoch": 2.8, "learning_rate": 3.2915522485014913e-06, "loss": 0.5843, "step": 253100 }, { "epoch": 2.8, "learning_rate": 3.290629521362979e-06, "loss": 0.5594, "step": 253105 }, { "epoch": 2.8, "learning_rate": 3.2897067942244666e-06, "loss": 0.5456, "step": 253110 }, { "epoch": 2.8, "learning_rate": 3.2887840670859536e-06, "loss": 0.6259, "step": 253115 }, { "epoch": 2.8, "learning_rate": 3.2878613399474414e-06, "loss": 0.6103, "step": 253120 }, { "epoch": 2.8, "learning_rate": 3.2869386128089293e-06, "loss": 0.58, "step": 253125 }, { "epoch": 2.8, "learning_rate": 3.2860158856704167e-06, "loss": 0.5997, "step": 253130 }, { "epoch": 2.8, "learning_rate": 3.2850931585319046e-06, "loss": 0.6175, "step": 253135 }, { "epoch": 2.8, "learning_rate": 3.284170431393392e-06, "loss": 0.5622, "step": 253140 }, { "epoch": 2.8, "learning_rate": 3.28324770425488e-06, "loss": 0.5272, "step": 253145 }, { "epoch": 2.8, "learning_rate": 3.282324977116367e-06, "loss": 0.6141, "step": 253150 }, { "epoch": 2.8, "learning_rate": 3.2814022499778543e-06, "loss": 0.586, "step": 253155 }, { "epoch": 2.8, "learning_rate": 3.280479522839342e-06, "loss": 0.5771, "step": 253160 }, { "epoch": 2.8, "learning_rate": 3.27955679570083e-06, "loss": 0.5897, "step": 253165 }, { "epoch": 2.8, "learning_rate": 3.2786340685623174e-06, "loss": 0.5907, "step": 253170 }, { "epoch": 2.8, "learning_rate": 3.2777113414238052e-06, "loss": 0.574, "step": 253175 }, { "epoch": 2.8, "learning_rate": 3.2767886142852927e-06, "loss": 0.576, "step": 253180 }, { "epoch": 2.8, "learning_rate": 3.2758658871467805e-06, "loss": 0.5683, "step": 253185 }, { "epoch": 2.8, "learning_rate": 3.2749431600082675e-06, "loss": 0.5581, "step": 253190 }, { "epoch": 2.8, "learning_rate": 3.2740204328697554e-06, "loss": 0.5832, "step": 253195 }, { "epoch": 2.8, "learning_rate": 3.273097705731243e-06, "loss": 0.5752, "step": 253200 }, { "epoch": 2.8, "learning_rate": 3.2721749785927306e-06, "loss": 0.5603, "step": 253205 }, { "epoch": 2.8, "learning_rate": 3.271252251454218e-06, "loss": 0.5521, "step": 253210 }, { "epoch": 2.8, "learning_rate": 3.270329524315706e-06, "loss": 0.5927, "step": 253215 }, { "epoch": 2.8, "learning_rate": 3.2694067971771938e-06, "loss": 0.5591, "step": 253220 }, { "epoch": 2.8, "learning_rate": 3.2684840700386803e-06, "loss": 0.5399, "step": 253225 }, { "epoch": 2.8, "learning_rate": 3.267561342900168e-06, "loss": 0.6221, "step": 253230 }, { "epoch": 2.8, "learning_rate": 3.266638615761656e-06, "loss": 0.565, "step": 253235 }, { "epoch": 2.8, "learning_rate": 3.2657158886231435e-06, "loss": 0.6131, "step": 253240 }, { "epoch": 2.8, "learning_rate": 3.2647931614846313e-06, "loss": 0.537, "step": 253245 }, { "epoch": 2.8, "learning_rate": 3.2638704343461187e-06, "loss": 0.576, "step": 253250 }, { "epoch": 2.8, "learning_rate": 3.2629477072076066e-06, "loss": 0.6067, "step": 253255 }, { "epoch": 2.8, "learning_rate": 3.2620249800690944e-06, "loss": 0.5789, "step": 253260 }, { "epoch": 2.8, "learning_rate": 3.2611022529305814e-06, "loss": 0.5906, "step": 253265 }, { "epoch": 2.8, "learning_rate": 3.260179525792069e-06, "loss": 0.5757, "step": 253270 }, { "epoch": 2.8, "learning_rate": 3.2592567986535567e-06, "loss": 0.6003, "step": 253275 }, { "epoch": 2.8, "learning_rate": 3.258334071515044e-06, "loss": 0.5852, "step": 253280 }, { "epoch": 2.8, "learning_rate": 3.257411344376532e-06, "loss": 0.5669, "step": 253285 }, { "epoch": 2.8, "learning_rate": 3.25648861723802e-06, "loss": 0.5903, "step": 253290 }, { "epoch": 2.8, "learning_rate": 3.2555658900995073e-06, "loss": 0.5963, "step": 253295 }, { "epoch": 2.8, "learning_rate": 3.254643162960995e-06, "loss": 0.5303, "step": 253300 }, { "epoch": 2.8, "learning_rate": 3.253720435822482e-06, "loss": 0.5539, "step": 253305 }, { "epoch": 2.8, "learning_rate": 3.2527977086839695e-06, "loss": 0.6157, "step": 253310 }, { "epoch": 2.8, "learning_rate": 3.2518749815454574e-06, "loss": 0.5471, "step": 253315 }, { "epoch": 2.8, "learning_rate": 3.250952254406945e-06, "loss": 0.5492, "step": 253320 }, { "epoch": 2.8, "learning_rate": 3.2500295272684327e-06, "loss": 0.58, "step": 253325 }, { "epoch": 2.81, "learning_rate": 3.2491068001299205e-06, "loss": 0.5781, "step": 253330 }, { "epoch": 2.81, "learning_rate": 3.248184072991408e-06, "loss": 0.5739, "step": 253335 }, { "epoch": 2.81, "learning_rate": 3.247261345852895e-06, "loss": 0.6071, "step": 253340 }, { "epoch": 2.81, "learning_rate": 3.2463386187143828e-06, "loss": 0.5357, "step": 253345 }, { "epoch": 2.81, "learning_rate": 3.24541589157587e-06, "loss": 0.5489, "step": 253350 }, { "epoch": 2.81, "learning_rate": 3.244493164437358e-06, "loss": 0.5335, "step": 253355 }, { "epoch": 2.81, "learning_rate": 3.2435704372988455e-06, "loss": 0.574, "step": 253360 }, { "epoch": 2.81, "learning_rate": 3.2426477101603333e-06, "loss": 0.5366, "step": 253365 }, { "epoch": 2.81, "learning_rate": 3.241724983021821e-06, "loss": 0.5794, "step": 253370 }, { "epoch": 2.81, "learning_rate": 3.2408022558833086e-06, "loss": 0.5438, "step": 253375 }, { "epoch": 2.81, "learning_rate": 3.2398795287447956e-06, "loss": 0.5666, "step": 253380 }, { "epoch": 2.81, "learning_rate": 3.2389568016062834e-06, "loss": 0.5517, "step": 253385 }, { "epoch": 2.81, "learning_rate": 3.238034074467771e-06, "loss": 0.6125, "step": 253390 }, { "epoch": 2.81, "learning_rate": 3.2371113473292587e-06, "loss": 0.5983, "step": 253395 }, { "epoch": 2.81, "learning_rate": 3.2361886201907466e-06, "loss": 0.5789, "step": 253400 }, { "epoch": 2.81, "learning_rate": 3.235265893052234e-06, "loss": 0.5346, "step": 253405 }, { "epoch": 2.81, "learning_rate": 3.234343165913722e-06, "loss": 0.5595, "step": 253410 }, { "epoch": 2.81, "learning_rate": 3.233420438775209e-06, "loss": 0.5293, "step": 253415 }, { "epoch": 2.81, "learning_rate": 3.2324977116366963e-06, "loss": 0.5471, "step": 253420 }, { "epoch": 2.81, "learning_rate": 3.231574984498184e-06, "loss": 0.615, "step": 253425 }, { "epoch": 2.81, "learning_rate": 3.2306522573596715e-06, "loss": 0.5867, "step": 253430 }, { "epoch": 2.81, "learning_rate": 3.2297295302211594e-06, "loss": 0.5671, "step": 253435 }, { "epoch": 2.81, "learning_rate": 3.2288068030826472e-06, "loss": 0.5898, "step": 253440 }, { "epoch": 2.81, "learning_rate": 3.2278840759441347e-06, "loss": 0.5521, "step": 253445 }, { "epoch": 2.81, "learning_rate": 3.2269613488056225e-06, "loss": 0.5467, "step": 253450 }, { "epoch": 2.81, "learning_rate": 3.2260386216671095e-06, "loss": 0.5765, "step": 253455 }, { "epoch": 2.81, "learning_rate": 3.225115894528597e-06, "loss": 0.6331, "step": 253460 }, { "epoch": 2.81, "learning_rate": 3.224193167390085e-06, "loss": 0.574, "step": 253465 }, { "epoch": 2.81, "learning_rate": 3.2232704402515726e-06, "loss": 0.6417, "step": 253470 }, { "epoch": 2.81, "learning_rate": 3.22234771311306e-06, "loss": 0.5894, "step": 253475 }, { "epoch": 2.81, "learning_rate": 3.221424985974548e-06, "loss": 0.5382, "step": 253480 }, { "epoch": 2.81, "learning_rate": 3.2205022588360353e-06, "loss": 0.5757, "step": 253485 }, { "epoch": 2.81, "learning_rate": 3.219579531697523e-06, "loss": 0.5602, "step": 253490 }, { "epoch": 2.81, "learning_rate": 3.21865680455901e-06, "loss": 0.5584, "step": 253495 }, { "epoch": 2.81, "learning_rate": 3.2177340774204976e-06, "loss": 0.5704, "step": 253500 }, { "epoch": 2.81, "learning_rate": 3.2168113502819855e-06, "loss": 0.6091, "step": 253505 }, { "epoch": 2.81, "learning_rate": 3.2158886231434733e-06, "loss": 0.5599, "step": 253510 }, { "epoch": 2.81, "learning_rate": 3.2149658960049607e-06, "loss": 0.5672, "step": 253515 }, { "epoch": 2.81, "learning_rate": 3.2140431688664486e-06, "loss": 0.5764, "step": 253520 }, { "epoch": 2.81, "learning_rate": 3.213120441727936e-06, "loss": 0.5198, "step": 253525 }, { "epoch": 2.81, "learning_rate": 3.212197714589423e-06, "loss": 0.6102, "step": 253530 }, { "epoch": 2.81, "learning_rate": 3.211274987450911e-06, "loss": 0.5878, "step": 253535 }, { "epoch": 2.81, "learning_rate": 3.2103522603123987e-06, "loss": 0.6063, "step": 253540 }, { "epoch": 2.81, "learning_rate": 3.209429533173886e-06, "loss": 0.56, "step": 253545 }, { "epoch": 2.81, "learning_rate": 3.208506806035374e-06, "loss": 0.5674, "step": 253550 }, { "epoch": 2.81, "learning_rate": 3.2075840788968614e-06, "loss": 0.5523, "step": 253555 }, { "epoch": 2.81, "learning_rate": 3.2066613517583493e-06, "loss": 0.5825, "step": 253560 }, { "epoch": 2.81, "learning_rate": 3.205738624619837e-06, "loss": 0.554, "step": 253565 }, { "epoch": 2.81, "learning_rate": 3.2048158974813237e-06, "loss": 0.6178, "step": 253570 }, { "epoch": 2.81, "learning_rate": 3.2038931703428115e-06, "loss": 0.5342, "step": 253575 }, { "epoch": 2.81, "learning_rate": 3.2029704432042994e-06, "loss": 0.5814, "step": 253580 }, { "epoch": 2.81, "learning_rate": 3.202047716065787e-06, "loss": 0.5299, "step": 253585 }, { "epoch": 2.81, "learning_rate": 3.2011249889272747e-06, "loss": 0.5694, "step": 253590 }, { "epoch": 2.81, "learning_rate": 3.200202261788762e-06, "loss": 0.5699, "step": 253595 }, { "epoch": 2.81, "learning_rate": 3.19927953465025e-06, "loss": 0.6385, "step": 253600 }, { "epoch": 2.81, "learning_rate": 3.198356807511737e-06, "loss": 0.6068, "step": 253605 }, { "epoch": 2.81, "learning_rate": 3.1974340803732248e-06, "loss": 0.5414, "step": 253610 }, { "epoch": 2.81, "learning_rate": 3.196511353234712e-06, "loss": 0.5951, "step": 253615 }, { "epoch": 2.81, "learning_rate": 3.1955886260962e-06, "loss": 0.6388, "step": 253620 }, { "epoch": 2.81, "learning_rate": 3.1946658989576875e-06, "loss": 0.5622, "step": 253625 }, { "epoch": 2.81, "learning_rate": 3.1937431718191753e-06, "loss": 0.556, "step": 253630 }, { "epoch": 2.81, "learning_rate": 3.1928204446806628e-06, "loss": 0.6477, "step": 253635 }, { "epoch": 2.81, "learning_rate": 3.1918977175421506e-06, "loss": 0.5425, "step": 253640 }, { "epoch": 2.81, "learning_rate": 3.1909749904036376e-06, "loss": 0.6017, "step": 253645 }, { "epoch": 2.81, "learning_rate": 3.1900522632651255e-06, "loss": 0.5784, "step": 253650 }, { "epoch": 2.81, "learning_rate": 3.189129536126613e-06, "loss": 0.537, "step": 253655 }, { "epoch": 2.81, "learning_rate": 3.1882068089881007e-06, "loss": 0.5362, "step": 253660 }, { "epoch": 2.81, "learning_rate": 3.187284081849588e-06, "loss": 0.567, "step": 253665 }, { "epoch": 2.81, "learning_rate": 3.186361354711076e-06, "loss": 0.5806, "step": 253670 }, { "epoch": 2.81, "learning_rate": 3.185438627572564e-06, "loss": 0.6107, "step": 253675 }, { "epoch": 2.81, "learning_rate": 3.184515900434051e-06, "loss": 0.5483, "step": 253680 }, { "epoch": 2.81, "learning_rate": 3.1835931732955383e-06, "loss": 0.6175, "step": 253685 }, { "epoch": 2.81, "learning_rate": 3.182670446157026e-06, "loss": 0.5656, "step": 253690 }, { "epoch": 2.81, "learning_rate": 3.1817477190185135e-06, "loss": 0.52, "step": 253695 }, { "epoch": 2.81, "learning_rate": 3.1808249918800014e-06, "loss": 0.5939, "step": 253700 }, { "epoch": 2.81, "learning_rate": 3.179902264741489e-06, "loss": 0.5739, "step": 253705 }, { "epoch": 2.81, "learning_rate": 3.1789795376029767e-06, "loss": 0.5682, "step": 253710 }, { "epoch": 2.81, "learning_rate": 3.1780568104644645e-06, "loss": 0.5605, "step": 253715 }, { "epoch": 2.81, "learning_rate": 3.1771340833259515e-06, "loss": 0.5754, "step": 253720 }, { "epoch": 2.81, "learning_rate": 3.176211356187439e-06, "loss": 0.5782, "step": 253725 }, { "epoch": 2.81, "learning_rate": 3.175288629048927e-06, "loss": 0.5762, "step": 253730 }, { "epoch": 2.81, "learning_rate": 3.1743659019104142e-06, "loss": 0.5657, "step": 253735 }, { "epoch": 2.81, "learning_rate": 3.173443174771902e-06, "loss": 0.5608, "step": 253740 }, { "epoch": 2.81, "learning_rate": 3.17252044763339e-06, "loss": 0.5496, "step": 253745 }, { "epoch": 2.81, "learning_rate": 3.1715977204948773e-06, "loss": 0.5679, "step": 253750 }, { "epoch": 2.81, "learning_rate": 3.170674993356365e-06, "loss": 0.5178, "step": 253755 }, { "epoch": 2.81, "learning_rate": 3.169752266217852e-06, "loss": 0.5654, "step": 253760 }, { "epoch": 2.81, "learning_rate": 3.1688295390793396e-06, "loss": 0.57, "step": 253765 }, { "epoch": 2.81, "learning_rate": 3.1679068119408275e-06, "loss": 0.5065, "step": 253770 }, { "epoch": 2.81, "learning_rate": 3.166984084802315e-06, "loss": 0.5725, "step": 253775 }, { "epoch": 2.81, "learning_rate": 3.1660613576638027e-06, "loss": 0.6546, "step": 253780 }, { "epoch": 2.81, "learning_rate": 3.1651386305252906e-06, "loss": 0.6164, "step": 253785 }, { "epoch": 2.81, "learning_rate": 3.164215903386778e-06, "loss": 0.5918, "step": 253790 }, { "epoch": 2.81, "learning_rate": 3.163293176248265e-06, "loss": 0.531, "step": 253795 }, { "epoch": 2.81, "learning_rate": 3.162370449109753e-06, "loss": 0.6102, "step": 253800 }, { "epoch": 2.81, "learning_rate": 3.1614477219712403e-06, "loss": 0.5975, "step": 253805 }, { "epoch": 2.81, "learning_rate": 3.160524994832728e-06, "loss": 0.5297, "step": 253810 }, { "epoch": 2.81, "learning_rate": 3.159602267694216e-06, "loss": 0.564, "step": 253815 }, { "epoch": 2.81, "learning_rate": 3.1586795405557034e-06, "loss": 0.5842, "step": 253820 }, { "epoch": 2.81, "learning_rate": 3.1577568134171913e-06, "loss": 0.5755, "step": 253825 }, { "epoch": 2.81, "learning_rate": 3.1568340862786787e-06, "loss": 0.5768, "step": 253830 }, { "epoch": 2.81, "learning_rate": 3.1559113591401657e-06, "loss": 0.5911, "step": 253835 }, { "epoch": 2.81, "learning_rate": 3.1549886320016535e-06, "loss": 0.5958, "step": 253840 }, { "epoch": 2.81, "learning_rate": 3.154065904863141e-06, "loss": 0.5777, "step": 253845 }, { "epoch": 2.81, "learning_rate": 3.153143177724629e-06, "loss": 0.5515, "step": 253850 }, { "epoch": 2.81, "learning_rate": 3.1522204505861167e-06, "loss": 0.6089, "step": 253855 }, { "epoch": 2.81, "learning_rate": 3.151297723447604e-06, "loss": 0.6022, "step": 253860 }, { "epoch": 2.81, "learning_rate": 3.150374996309092e-06, "loss": 0.5593, "step": 253865 }, { "epoch": 2.81, "learning_rate": 3.149452269170579e-06, "loss": 0.6234, "step": 253870 }, { "epoch": 2.81, "learning_rate": 3.1485295420320664e-06, "loss": 0.5984, "step": 253875 }, { "epoch": 2.81, "learning_rate": 3.147606814893554e-06, "loss": 0.5762, "step": 253880 }, { "epoch": 2.81, "learning_rate": 3.146684087755042e-06, "loss": 0.5839, "step": 253885 }, { "epoch": 2.81, "learning_rate": 3.1457613606165295e-06, "loss": 0.6035, "step": 253890 }, { "epoch": 2.81, "learning_rate": 3.1448386334780173e-06, "loss": 0.5991, "step": 253895 }, { "epoch": 2.81, "learning_rate": 3.1439159063395048e-06, "loss": 0.5929, "step": 253900 }, { "epoch": 2.81, "learning_rate": 3.1429931792009926e-06, "loss": 0.5599, "step": 253905 }, { "epoch": 2.81, "learning_rate": 3.1420704520624796e-06, "loss": 0.5744, "step": 253910 }, { "epoch": 2.81, "learning_rate": 3.141147724923967e-06, "loss": 0.5895, "step": 253915 }, { "epoch": 2.81, "learning_rate": 3.140224997785455e-06, "loss": 0.6061, "step": 253920 }, { "epoch": 2.81, "learning_rate": 3.1393022706469427e-06, "loss": 0.5954, "step": 253925 }, { "epoch": 2.81, "learning_rate": 3.13837954350843e-06, "loss": 0.6307, "step": 253930 }, { "epoch": 2.81, "learning_rate": 3.137456816369918e-06, "loss": 0.5701, "step": 253935 }, { "epoch": 2.81, "learning_rate": 3.1365340892314054e-06, "loss": 0.5535, "step": 253940 }, { "epoch": 2.81, "learning_rate": 3.1356113620928933e-06, "loss": 0.6499, "step": 253945 }, { "epoch": 2.81, "learning_rate": 3.1346886349543803e-06, "loss": 0.5726, "step": 253950 }, { "epoch": 2.81, "learning_rate": 3.133765907815868e-06, "loss": 0.6212, "step": 253955 }, { "epoch": 2.81, "learning_rate": 3.1328431806773556e-06, "loss": 0.5495, "step": 253960 }, { "epoch": 2.81, "learning_rate": 3.1319204535388434e-06, "loss": 0.5943, "step": 253965 }, { "epoch": 2.81, "learning_rate": 3.130997726400331e-06, "loss": 0.5503, "step": 253970 }, { "epoch": 2.81, "learning_rate": 3.1300749992618187e-06, "loss": 0.5288, "step": 253975 }, { "epoch": 2.81, "learning_rate": 3.129152272123306e-06, "loss": 0.6213, "step": 253980 }, { "epoch": 2.81, "learning_rate": 3.128229544984793e-06, "loss": 0.5375, "step": 253985 }, { "epoch": 2.81, "learning_rate": 3.127306817846281e-06, "loss": 0.6552, "step": 253990 }, { "epoch": 2.81, "learning_rate": 3.126384090707769e-06, "loss": 0.5333, "step": 253995 }, { "epoch": 2.81, "learning_rate": 3.1254613635692562e-06, "loss": 0.5142, "step": 254000 }, { "epoch": 2.81, "eval_loss": 0.5440714359283447, "eval_runtime": 69.6754, "eval_samples_per_second": 28.705, "eval_steps_per_second": 14.352, "step": 254000 }, { "epoch": 2.81, "learning_rate": 3.124538636430744e-06, "loss": 0.5455, "step": 254005 }, { "epoch": 2.81, "learning_rate": 3.1236159092922315e-06, "loss": 0.6074, "step": 254010 }, { "epoch": 2.81, "learning_rate": 3.122693182153719e-06, "loss": 0.5852, "step": 254015 }, { "epoch": 2.81, "learning_rate": 3.1217704550152068e-06, "loss": 0.6034, "step": 254020 }, { "epoch": 2.81, "learning_rate": 3.120847727876694e-06, "loss": 0.5947, "step": 254025 }, { "epoch": 2.81, "learning_rate": 3.119925000738182e-06, "loss": 0.5774, "step": 254030 }, { "epoch": 2.81, "learning_rate": 3.1190022735996695e-06, "loss": 0.5765, "step": 254035 }, { "epoch": 2.81, "learning_rate": 3.118079546461157e-06, "loss": 0.5812, "step": 254040 }, { "epoch": 2.81, "learning_rate": 3.1171568193226447e-06, "loss": 0.5967, "step": 254045 }, { "epoch": 2.81, "learning_rate": 3.116234092184132e-06, "loss": 0.6336, "step": 254050 }, { "epoch": 2.81, "learning_rate": 3.1153113650456196e-06, "loss": 0.6138, "step": 254055 }, { "epoch": 2.81, "learning_rate": 3.1143886379071074e-06, "loss": 0.5827, "step": 254060 }, { "epoch": 2.81, "learning_rate": 3.113465910768595e-06, "loss": 0.5883, "step": 254065 }, { "epoch": 2.81, "learning_rate": 3.1125431836300823e-06, "loss": 0.5268, "step": 254070 }, { "epoch": 2.81, "learning_rate": 3.11162045649157e-06, "loss": 0.578, "step": 254075 }, { "epoch": 2.81, "learning_rate": 3.1106977293530576e-06, "loss": 0.5978, "step": 254080 }, { "epoch": 2.81, "learning_rate": 3.1097750022145454e-06, "loss": 0.5441, "step": 254085 }, { "epoch": 2.81, "learning_rate": 3.108852275076033e-06, "loss": 0.5604, "step": 254090 }, { "epoch": 2.81, "learning_rate": 3.1079295479375203e-06, "loss": 0.6079, "step": 254095 }, { "epoch": 2.81, "learning_rate": 3.107006820799008e-06, "loss": 0.5968, "step": 254100 }, { "epoch": 2.81, "learning_rate": 3.106084093660496e-06, "loss": 0.5581, "step": 254105 }, { "epoch": 2.81, "learning_rate": 3.105161366521983e-06, "loss": 0.5737, "step": 254110 }, { "epoch": 2.81, "learning_rate": 3.104238639383471e-06, "loss": 0.6511, "step": 254115 }, { "epoch": 2.81, "learning_rate": 3.1033159122449582e-06, "loss": 0.5972, "step": 254120 }, { "epoch": 2.81, "learning_rate": 3.102393185106446e-06, "loss": 0.5876, "step": 254125 }, { "epoch": 2.81, "learning_rate": 3.1014704579679335e-06, "loss": 0.6048, "step": 254130 }, { "epoch": 2.81, "learning_rate": 3.100547730829421e-06, "loss": 0.6194, "step": 254135 }, { "epoch": 2.81, "learning_rate": 3.099625003690909e-06, "loss": 0.5827, "step": 254140 }, { "epoch": 2.81, "learning_rate": 3.0987022765523962e-06, "loss": 0.5558, "step": 254145 }, { "epoch": 2.81, "learning_rate": 3.0977795494138836e-06, "loss": 0.5261, "step": 254150 }, { "epoch": 2.81, "learning_rate": 3.0968568222753715e-06, "loss": 0.605, "step": 254155 }, { "epoch": 2.81, "learning_rate": 3.0959340951368593e-06, "loss": 0.5448, "step": 254160 }, { "epoch": 2.81, "learning_rate": 3.0950113679983463e-06, "loss": 0.6215, "step": 254165 }, { "epoch": 2.81, "learning_rate": 3.094088640859834e-06, "loss": 0.5862, "step": 254170 }, { "epoch": 2.81, "learning_rate": 3.0931659137213216e-06, "loss": 0.6374, "step": 254175 }, { "epoch": 2.81, "learning_rate": 3.0922431865828095e-06, "loss": 0.5587, "step": 254180 }, { "epoch": 2.81, "learning_rate": 3.091320459444297e-06, "loss": 0.5905, "step": 254185 }, { "epoch": 2.81, "learning_rate": 3.0903977323057843e-06, "loss": 0.5884, "step": 254190 }, { "epoch": 2.81, "learning_rate": 3.089475005167272e-06, "loss": 0.6085, "step": 254195 }, { "epoch": 2.81, "learning_rate": 3.08855227802876e-06, "loss": 0.5908, "step": 254200 }, { "epoch": 2.81, "learning_rate": 3.087629550890247e-06, "loss": 0.6052, "step": 254205 }, { "epoch": 2.81, "learning_rate": 3.086706823751735e-06, "loss": 0.6062, "step": 254210 }, { "epoch": 2.81, "learning_rate": 3.0857840966132227e-06, "loss": 0.5205, "step": 254215 }, { "epoch": 2.81, "learning_rate": 3.08486136947471e-06, "loss": 0.6046, "step": 254220 }, { "epoch": 2.81, "learning_rate": 3.0839386423361976e-06, "loss": 0.5628, "step": 254225 }, { "epoch": 2.82, "learning_rate": 3.0830159151976854e-06, "loss": 0.5907, "step": 254230 }, { "epoch": 2.82, "learning_rate": 3.082093188059173e-06, "loss": 0.6157, "step": 254235 }, { "epoch": 2.82, "learning_rate": 3.0811704609206603e-06, "loss": 0.6081, "step": 254240 }, { "epoch": 2.82, "learning_rate": 3.0802477337821477e-06, "loss": 0.5788, "step": 254245 }, { "epoch": 2.82, "learning_rate": 3.0793250066436355e-06, "loss": 0.6101, "step": 254250 }, { "epoch": 2.82, "learning_rate": 3.0784022795051234e-06, "loss": 0.5646, "step": 254255 }, { "epoch": 2.82, "learning_rate": 3.0774795523666104e-06, "loss": 0.5455, "step": 254260 }, { "epoch": 2.82, "learning_rate": 3.0765568252280982e-06, "loss": 0.5618, "step": 254265 }, { "epoch": 2.82, "learning_rate": 3.075634098089586e-06, "loss": 0.5798, "step": 254270 }, { "epoch": 2.82, "learning_rate": 3.0747113709510735e-06, "loss": 0.5586, "step": 254275 }, { "epoch": 2.82, "learning_rate": 3.073788643812561e-06, "loss": 0.5678, "step": 254280 }, { "epoch": 2.82, "learning_rate": 3.0728659166740488e-06, "loss": 0.5829, "step": 254285 }, { "epoch": 2.82, "learning_rate": 3.071943189535536e-06, "loss": 0.6247, "step": 254290 }, { "epoch": 2.82, "learning_rate": 3.071020462397024e-06, "loss": 0.5938, "step": 254295 }, { "epoch": 2.82, "learning_rate": 3.0700977352585115e-06, "loss": 0.5867, "step": 254300 }, { "epoch": 2.82, "learning_rate": 3.069175008119999e-06, "loss": 0.6291, "step": 254305 }, { "epoch": 2.82, "learning_rate": 3.0682522809814867e-06, "loss": 0.6062, "step": 254310 }, { "epoch": 2.82, "learning_rate": 3.067329553842974e-06, "loss": 0.5948, "step": 254315 }, { "epoch": 2.82, "learning_rate": 3.0664068267044616e-06, "loss": 0.6012, "step": 254320 }, { "epoch": 2.82, "learning_rate": 3.0654840995659494e-06, "loss": 0.6074, "step": 254325 }, { "epoch": 2.82, "learning_rate": 3.064561372427437e-06, "loss": 0.6086, "step": 254330 }, { "epoch": 2.82, "learning_rate": 3.0636386452889243e-06, "loss": 0.574, "step": 254335 }, { "epoch": 2.82, "learning_rate": 3.062715918150412e-06, "loss": 0.5462, "step": 254340 }, { "epoch": 2.82, "learning_rate": 3.0617931910118996e-06, "loss": 0.5729, "step": 254345 }, { "epoch": 2.82, "learning_rate": 3.0608704638733874e-06, "loss": 0.6185, "step": 254350 }, { "epoch": 2.82, "learning_rate": 3.059947736734875e-06, "loss": 0.5866, "step": 254355 }, { "epoch": 2.82, "learning_rate": 3.0590250095963623e-06, "loss": 0.6, "step": 254360 }, { "epoch": 2.82, "learning_rate": 3.05810228245785e-06, "loss": 0.5599, "step": 254365 }, { "epoch": 2.82, "learning_rate": 3.0571795553193375e-06, "loss": 0.5898, "step": 254370 }, { "epoch": 2.82, "learning_rate": 3.056256828180825e-06, "loss": 0.5918, "step": 254375 }, { "epoch": 2.82, "learning_rate": 3.055334101042313e-06, "loss": 0.5609, "step": 254380 }, { "epoch": 2.82, "learning_rate": 3.0544113739038002e-06, "loss": 0.5375, "step": 254385 }, { "epoch": 2.82, "learning_rate": 3.053488646765288e-06, "loss": 0.6192, "step": 254390 }, { "epoch": 2.82, "learning_rate": 3.0525659196267755e-06, "loss": 0.6008, "step": 254395 }, { "epoch": 2.82, "learning_rate": 3.051643192488263e-06, "loss": 0.556, "step": 254400 }, { "epoch": 2.82, "learning_rate": 3.050720465349751e-06, "loss": 0.5912, "step": 254405 }, { "epoch": 2.82, "learning_rate": 3.0497977382112382e-06, "loss": 0.5616, "step": 254410 }, { "epoch": 2.82, "learning_rate": 3.0488750110727256e-06, "loss": 0.6034, "step": 254415 }, { "epoch": 2.82, "learning_rate": 3.0479522839342135e-06, "loss": 0.5925, "step": 254420 }, { "epoch": 2.82, "learning_rate": 3.047029556795701e-06, "loss": 0.6074, "step": 254425 }, { "epoch": 2.82, "learning_rate": 3.0461068296571883e-06, "loss": 0.5165, "step": 254430 }, { "epoch": 2.82, "learning_rate": 3.045184102518676e-06, "loss": 0.5906, "step": 254435 }, { "epoch": 2.82, "learning_rate": 3.0442613753801636e-06, "loss": 0.5286, "step": 254440 }, { "epoch": 2.82, "learning_rate": 3.0433386482416515e-06, "loss": 0.6073, "step": 254445 }, { "epoch": 2.82, "learning_rate": 3.042415921103139e-06, "loss": 0.5961, "step": 254450 }, { "epoch": 2.82, "learning_rate": 3.0414931939646263e-06, "loss": 0.5389, "step": 254455 }, { "epoch": 2.82, "learning_rate": 3.040570466826114e-06, "loss": 0.586, "step": 254460 }, { "epoch": 2.82, "learning_rate": 3.0396477396876016e-06, "loss": 0.5397, "step": 254465 }, { "epoch": 2.82, "learning_rate": 3.038725012549089e-06, "loss": 0.5682, "step": 254470 }, { "epoch": 2.82, "learning_rate": 3.037802285410577e-06, "loss": 0.5796, "step": 254475 }, { "epoch": 2.82, "learning_rate": 3.0368795582720643e-06, "loss": 0.6504, "step": 254480 }, { "epoch": 2.82, "learning_rate": 3.035956831133552e-06, "loss": 0.5922, "step": 254485 }, { "epoch": 2.82, "learning_rate": 3.0350341039950396e-06, "loss": 0.5513, "step": 254490 }, { "epoch": 2.82, "learning_rate": 3.034111376856527e-06, "loss": 0.5784, "step": 254495 }, { "epoch": 2.82, "learning_rate": 3.033188649718015e-06, "loss": 0.5901, "step": 254500 }, { "epoch": 2.82, "learning_rate": 3.0322659225795027e-06, "loss": 0.528, "step": 254505 }, { "epoch": 2.82, "learning_rate": 3.0313431954409897e-06, "loss": 0.5887, "step": 254510 }, { "epoch": 2.82, "learning_rate": 3.0304204683024775e-06, "loss": 0.6184, "step": 254515 }, { "epoch": 2.82, "learning_rate": 3.029497741163965e-06, "loss": 0.537, "step": 254520 }, { "epoch": 2.82, "learning_rate": 3.0285750140254524e-06, "loss": 0.6164, "step": 254525 }, { "epoch": 2.82, "learning_rate": 3.0276522868869402e-06, "loss": 0.5401, "step": 254530 }, { "epoch": 2.82, "learning_rate": 3.0267295597484277e-06, "loss": 0.5715, "step": 254535 }, { "epoch": 2.82, "learning_rate": 3.0258068326099155e-06, "loss": 0.5703, "step": 254540 }, { "epoch": 2.82, "learning_rate": 3.024884105471403e-06, "loss": 0.5706, "step": 254545 }, { "epoch": 2.82, "learning_rate": 3.0239613783328904e-06, "loss": 0.553, "step": 254550 }, { "epoch": 2.82, "learning_rate": 3.023038651194378e-06, "loss": 0.5564, "step": 254555 }, { "epoch": 2.82, "learning_rate": 3.022115924055866e-06, "loss": 0.633, "step": 254560 }, { "epoch": 2.82, "learning_rate": 3.021193196917353e-06, "loss": 0.5525, "step": 254565 }, { "epoch": 2.82, "learning_rate": 3.020270469778841e-06, "loss": 0.5776, "step": 254570 }, { "epoch": 2.82, "learning_rate": 3.0193477426403288e-06, "loss": 0.5633, "step": 254575 }, { "epoch": 2.82, "learning_rate": 3.018425015501816e-06, "loss": 0.5843, "step": 254580 }, { "epoch": 2.82, "learning_rate": 3.0175022883633036e-06, "loss": 0.5688, "step": 254585 }, { "epoch": 2.82, "learning_rate": 3.016579561224791e-06, "loss": 0.5635, "step": 254590 }, { "epoch": 2.82, "learning_rate": 3.015656834086279e-06, "loss": 0.5666, "step": 254595 }, { "epoch": 2.82, "learning_rate": 3.0147341069477667e-06, "loss": 0.5226, "step": 254600 }, { "epoch": 2.82, "learning_rate": 3.0138113798092537e-06, "loss": 0.5017, "step": 254605 }, { "epoch": 2.82, "learning_rate": 3.0128886526707416e-06, "loss": 0.5752, "step": 254610 }, { "epoch": 2.82, "learning_rate": 3.0119659255322294e-06, "loss": 0.5914, "step": 254615 }, { "epoch": 2.82, "learning_rate": 3.0110431983937164e-06, "loss": 0.629, "step": 254620 }, { "epoch": 2.82, "learning_rate": 3.0101204712552043e-06, "loss": 0.581, "step": 254625 }, { "epoch": 2.82, "learning_rate": 3.009197744116692e-06, "loss": 0.6583, "step": 254630 }, { "epoch": 2.82, "learning_rate": 3.0082750169781795e-06, "loss": 0.5426, "step": 254635 }, { "epoch": 2.82, "learning_rate": 3.007352289839667e-06, "loss": 0.5766, "step": 254640 }, { "epoch": 2.82, "learning_rate": 3.006429562701155e-06, "loss": 0.6256, "step": 254645 }, { "epoch": 2.82, "learning_rate": 3.0055068355626422e-06, "loss": 0.5945, "step": 254650 }, { "epoch": 2.82, "learning_rate": 3.00458410842413e-06, "loss": 0.5826, "step": 254655 }, { "epoch": 2.82, "learning_rate": 3.003661381285617e-06, "loss": 0.5388, "step": 254660 }, { "epoch": 2.82, "learning_rate": 3.002738654147105e-06, "loss": 0.5278, "step": 254665 }, { "epoch": 2.82, "learning_rate": 3.001815927008593e-06, "loss": 0.5893, "step": 254670 }, { "epoch": 2.82, "learning_rate": 3.0008931998700802e-06, "loss": 0.5818, "step": 254675 }, { "epoch": 2.82, "learning_rate": 2.9999704727315676e-06, "loss": 0.579, "step": 254680 }, { "epoch": 2.82, "learning_rate": 2.9990477455930555e-06, "loss": 0.5882, "step": 254685 }, { "epoch": 2.82, "learning_rate": 2.998125018454543e-06, "loss": 0.5774, "step": 254690 }, { "epoch": 2.82, "learning_rate": 2.9972022913160308e-06, "loss": 0.5778, "step": 254695 }, { "epoch": 2.82, "learning_rate": 2.996279564177518e-06, "loss": 0.6225, "step": 254700 }, { "epoch": 2.82, "learning_rate": 2.9953568370390056e-06, "loss": 0.6035, "step": 254705 }, { "epoch": 2.82, "learning_rate": 2.9944341099004935e-06, "loss": 0.5266, "step": 254710 }, { "epoch": 2.82, "learning_rate": 2.9935113827619805e-06, "loss": 0.5552, "step": 254715 }, { "epoch": 2.82, "learning_rate": 2.9925886556234683e-06, "loss": 0.576, "step": 254720 }, { "epoch": 2.82, "learning_rate": 2.991665928484956e-06, "loss": 0.5796, "step": 254725 }, { "epoch": 2.82, "learning_rate": 2.9907432013464436e-06, "loss": 0.5524, "step": 254730 }, { "epoch": 2.82, "learning_rate": 2.989820474207931e-06, "loss": 0.6467, "step": 254735 }, { "epoch": 2.82, "learning_rate": 2.988897747069419e-06, "loss": 0.5198, "step": 254740 }, { "epoch": 2.82, "learning_rate": 2.9879750199309063e-06, "loss": 0.5313, "step": 254745 }, { "epoch": 2.82, "learning_rate": 2.987052292792394e-06, "loss": 0.5707, "step": 254750 }, { "epoch": 2.82, "learning_rate": 2.9861295656538816e-06, "loss": 0.5708, "step": 254755 }, { "epoch": 2.82, "learning_rate": 2.985206838515369e-06, "loss": 0.5054, "step": 254760 }, { "epoch": 2.82, "learning_rate": 2.984284111376857e-06, "loss": 0.5948, "step": 254765 }, { "epoch": 2.82, "learning_rate": 2.9833613842383443e-06, "loss": 0.5521, "step": 254770 }, { "epoch": 2.82, "learning_rate": 2.9824386570998317e-06, "loss": 0.5974, "step": 254775 }, { "epoch": 2.82, "learning_rate": 2.9815159299613195e-06, "loss": 0.5366, "step": 254780 }, { "epoch": 2.82, "learning_rate": 2.980593202822807e-06, "loss": 0.5455, "step": 254785 }, { "epoch": 2.82, "learning_rate": 2.9796704756842944e-06, "loss": 0.622, "step": 254790 }, { "epoch": 2.82, "learning_rate": 2.9787477485457822e-06, "loss": 0.6097, "step": 254795 }, { "epoch": 2.82, "learning_rate": 2.9778250214072697e-06, "loss": 0.5684, "step": 254800 }, { "epoch": 2.82, "learning_rate": 2.9769022942687575e-06, "loss": 0.5833, "step": 254805 }, { "epoch": 2.82, "learning_rate": 2.975979567130245e-06, "loss": 0.613, "step": 254810 }, { "epoch": 2.82, "learning_rate": 2.9750568399917324e-06, "loss": 0.6287, "step": 254815 }, { "epoch": 2.82, "learning_rate": 2.97413411285322e-06, "loss": 0.603, "step": 254820 }, { "epoch": 2.82, "learning_rate": 2.9732113857147076e-06, "loss": 0.5635, "step": 254825 }, { "epoch": 2.82, "learning_rate": 2.972288658576195e-06, "loss": 0.6137, "step": 254830 }, { "epoch": 2.82, "learning_rate": 2.971365931437683e-06, "loss": 0.5535, "step": 254835 }, { "epoch": 2.82, "learning_rate": 2.9704432042991703e-06, "loss": 0.5996, "step": 254840 }, { "epoch": 2.82, "learning_rate": 2.969520477160658e-06, "loss": 0.5626, "step": 254845 }, { "epoch": 2.82, "learning_rate": 2.9685977500221456e-06, "loss": 0.5771, "step": 254850 }, { "epoch": 2.82, "learning_rate": 2.967675022883633e-06, "loss": 0.628, "step": 254855 }, { "epoch": 2.82, "learning_rate": 2.966752295745121e-06, "loss": 0.5114, "step": 254860 }, { "epoch": 2.82, "learning_rate": 2.9658295686066083e-06, "loss": 0.569, "step": 254865 }, { "epoch": 2.82, "learning_rate": 2.9649068414680957e-06, "loss": 0.5398, "step": 254870 }, { "epoch": 2.82, "learning_rate": 2.9639841143295836e-06, "loss": 0.5707, "step": 254875 }, { "epoch": 2.82, "learning_rate": 2.963061387191071e-06, "loss": 0.5547, "step": 254880 }, { "epoch": 2.82, "learning_rate": 2.9621386600525584e-06, "loss": 0.5262, "step": 254885 }, { "epoch": 2.82, "learning_rate": 2.9612159329140463e-06, "loss": 0.5425, "step": 254890 }, { "epoch": 2.82, "learning_rate": 2.9602932057755337e-06, "loss": 0.589, "step": 254895 }, { "epoch": 2.82, "learning_rate": 2.9593704786370216e-06, "loss": 0.6503, "step": 254900 }, { "epoch": 2.82, "learning_rate": 2.958447751498509e-06, "loss": 0.5709, "step": 254905 }, { "epoch": 2.82, "learning_rate": 2.9575250243599964e-06, "loss": 0.6216, "step": 254910 }, { "epoch": 2.82, "learning_rate": 2.9566022972214843e-06, "loss": 0.5561, "step": 254915 }, { "epoch": 2.82, "learning_rate": 2.9556795700829717e-06, "loss": 0.5568, "step": 254920 }, { "epoch": 2.82, "learning_rate": 2.954756842944459e-06, "loss": 0.6187, "step": 254925 }, { "epoch": 2.82, "learning_rate": 2.953834115805947e-06, "loss": 0.607, "step": 254930 }, { "epoch": 2.82, "learning_rate": 2.9529113886674344e-06, "loss": 0.556, "step": 254935 }, { "epoch": 2.82, "learning_rate": 2.9519886615289222e-06, "loss": 0.5526, "step": 254940 }, { "epoch": 2.82, "learning_rate": 2.9510659343904097e-06, "loss": 0.5807, "step": 254945 }, { "epoch": 2.82, "learning_rate": 2.950143207251897e-06, "loss": 0.5881, "step": 254950 }, { "epoch": 2.82, "learning_rate": 2.949220480113385e-06, "loss": 0.6155, "step": 254955 }, { "epoch": 2.82, "learning_rate": 2.9482977529748728e-06, "loss": 0.5735, "step": 254960 }, { "epoch": 2.82, "learning_rate": 2.9473750258363598e-06, "loss": 0.5741, "step": 254965 }, { "epoch": 2.82, "learning_rate": 2.9464522986978476e-06, "loss": 0.6533, "step": 254970 }, { "epoch": 2.82, "learning_rate": 2.9455295715593355e-06, "loss": 0.5618, "step": 254975 }, { "epoch": 2.82, "learning_rate": 2.9446068444208225e-06, "loss": 0.5856, "step": 254980 }, { "epoch": 2.82, "learning_rate": 2.9436841172823103e-06, "loss": 0.5799, "step": 254985 }, { "epoch": 2.82, "learning_rate": 2.9427613901437977e-06, "loss": 0.6042, "step": 254990 }, { "epoch": 2.82, "learning_rate": 2.9418386630052856e-06, "loss": 0.5585, "step": 254995 }, { "epoch": 2.82, "learning_rate": 2.940915935866773e-06, "loss": 0.543, "step": 255000 }, { "epoch": 2.82, "eval_loss": 0.5498818755149841, "eval_runtime": 69.7456, "eval_samples_per_second": 28.676, "eval_steps_per_second": 14.338, "step": 255000 }, { "epoch": 2.82, "learning_rate": 2.9399932087282604e-06, "loss": 0.5343, "step": 255005 }, { "epoch": 2.82, "learning_rate": 2.9390704815897483e-06, "loss": 0.5821, "step": 255010 }, { "epoch": 2.82, "learning_rate": 2.938147754451236e-06, "loss": 0.5964, "step": 255015 }, { "epoch": 2.82, "learning_rate": 2.937225027312723e-06, "loss": 0.5946, "step": 255020 }, { "epoch": 2.82, "learning_rate": 2.936302300174211e-06, "loss": 0.5435, "step": 255025 }, { "epoch": 2.82, "learning_rate": 2.935379573035699e-06, "loss": 0.5931, "step": 255030 }, { "epoch": 2.82, "learning_rate": 2.9344568458971863e-06, "loss": 0.6183, "step": 255035 }, { "epoch": 2.82, "learning_rate": 2.9335341187586737e-06, "loss": 0.6135, "step": 255040 }, { "epoch": 2.82, "learning_rate": 2.9326113916201615e-06, "loss": 0.559, "step": 255045 }, { "epoch": 2.82, "learning_rate": 2.931688664481649e-06, "loss": 0.5293, "step": 255050 }, { "epoch": 2.82, "learning_rate": 2.930765937343137e-06, "loss": 0.5649, "step": 255055 }, { "epoch": 2.82, "learning_rate": 2.929843210204624e-06, "loss": 0.5931, "step": 255060 }, { "epoch": 2.82, "learning_rate": 2.9289204830661117e-06, "loss": 0.5523, "step": 255065 }, { "epoch": 2.82, "learning_rate": 2.9279977559275995e-06, "loss": 0.6031, "step": 255070 }, { "epoch": 2.82, "learning_rate": 2.9270750287890865e-06, "loss": 0.5779, "step": 255075 }, { "epoch": 2.82, "learning_rate": 2.9261523016505744e-06, "loss": 0.6056, "step": 255080 }, { "epoch": 2.82, "learning_rate": 2.9252295745120622e-06, "loss": 0.6069, "step": 255085 }, { "epoch": 2.82, "learning_rate": 2.9243068473735496e-06, "loss": 0.5799, "step": 255090 }, { "epoch": 2.82, "learning_rate": 2.923384120235037e-06, "loss": 0.5977, "step": 255095 }, { "epoch": 2.82, "learning_rate": 2.922461393096525e-06, "loss": 0.5455, "step": 255100 }, { "epoch": 2.82, "learning_rate": 2.9215386659580123e-06, "loss": 0.5478, "step": 255105 }, { "epoch": 2.82, "learning_rate": 2.9206159388195e-06, "loss": 0.5171, "step": 255110 }, { "epoch": 2.82, "learning_rate": 2.9196932116809876e-06, "loss": 0.568, "step": 255115 }, { "epoch": 2.82, "learning_rate": 2.918770484542475e-06, "loss": 0.5569, "step": 255120 }, { "epoch": 2.82, "learning_rate": 2.917847757403963e-06, "loss": 0.5593, "step": 255125 }, { "epoch": 2.82, "learning_rate": 2.9169250302654503e-06, "loss": 0.5685, "step": 255130 }, { "epoch": 2.83, "learning_rate": 2.9160023031269377e-06, "loss": 0.5619, "step": 255135 }, { "epoch": 2.83, "learning_rate": 2.9150795759884256e-06, "loss": 0.5322, "step": 255140 }, { "epoch": 2.83, "learning_rate": 2.914156848849913e-06, "loss": 0.6167, "step": 255145 }, { "epoch": 2.83, "learning_rate": 2.913234121711401e-06, "loss": 0.6176, "step": 255150 }, { "epoch": 2.83, "learning_rate": 2.9123113945728883e-06, "loss": 0.6061, "step": 255155 }, { "epoch": 2.83, "learning_rate": 2.9113886674343757e-06, "loss": 0.5824, "step": 255160 }, { "epoch": 2.83, "learning_rate": 2.9104659402958636e-06, "loss": 0.628, "step": 255165 }, { "epoch": 2.83, "learning_rate": 2.909543213157351e-06, "loss": 0.5808, "step": 255170 }, { "epoch": 2.83, "learning_rate": 2.9086204860188384e-06, "loss": 0.5863, "step": 255175 }, { "epoch": 2.83, "learning_rate": 2.9076977588803263e-06, "loss": 0.5692, "step": 255180 }, { "epoch": 2.83, "learning_rate": 2.9067750317418137e-06, "loss": 0.5243, "step": 255185 }, { "epoch": 2.83, "learning_rate": 2.905852304603301e-06, "loss": 0.5834, "step": 255190 }, { "epoch": 2.83, "learning_rate": 2.904929577464789e-06, "loss": 0.5257, "step": 255195 }, { "epoch": 2.83, "learning_rate": 2.9040068503262764e-06, "loss": 0.5498, "step": 255200 }, { "epoch": 2.83, "learning_rate": 2.9030841231877642e-06, "loss": 0.523, "step": 255205 }, { "epoch": 2.83, "learning_rate": 2.9021613960492517e-06, "loss": 0.5507, "step": 255210 }, { "epoch": 2.83, "learning_rate": 2.901238668910739e-06, "loss": 0.5784, "step": 255215 }, { "epoch": 2.83, "learning_rate": 2.900315941772227e-06, "loss": 0.6331, "step": 255220 }, { "epoch": 2.83, "learning_rate": 2.8993932146337144e-06, "loss": 0.5948, "step": 255225 }, { "epoch": 2.83, "learning_rate": 2.8984704874952018e-06, "loss": 0.5489, "step": 255230 }, { "epoch": 2.83, "learning_rate": 2.8975477603566896e-06, "loss": 0.6068, "step": 255235 }, { "epoch": 2.83, "learning_rate": 2.896625033218177e-06, "loss": 0.5207, "step": 255240 }, { "epoch": 2.83, "learning_rate": 2.895702306079665e-06, "loss": 0.5517, "step": 255245 }, { "epoch": 2.83, "learning_rate": 2.8947795789411523e-06, "loss": 0.5473, "step": 255250 }, { "epoch": 2.83, "learning_rate": 2.8938568518026398e-06, "loss": 0.5745, "step": 255255 }, { "epoch": 2.83, "learning_rate": 2.8929341246641276e-06, "loss": 0.569, "step": 255260 }, { "epoch": 2.83, "learning_rate": 2.892011397525615e-06, "loss": 0.5806, "step": 255265 }, { "epoch": 2.83, "learning_rate": 2.8910886703871025e-06, "loss": 0.6257, "step": 255270 }, { "epoch": 2.83, "learning_rate": 2.8901659432485903e-06, "loss": 0.6271, "step": 255275 }, { "epoch": 2.83, "learning_rate": 2.8892432161100777e-06, "loss": 0.5981, "step": 255280 }, { "epoch": 2.83, "learning_rate": 2.888320488971565e-06, "loss": 0.5887, "step": 255285 }, { "epoch": 2.83, "learning_rate": 2.887397761833053e-06, "loss": 0.5776, "step": 255290 }, { "epoch": 2.83, "learning_rate": 2.8864750346945404e-06, "loss": 0.5909, "step": 255295 }, { "epoch": 2.83, "learning_rate": 2.8855523075560283e-06, "loss": 0.5568, "step": 255300 }, { "epoch": 2.83, "learning_rate": 2.8846295804175157e-06, "loss": 0.5962, "step": 255305 }, { "epoch": 2.83, "learning_rate": 2.883706853279003e-06, "loss": 0.5194, "step": 255310 }, { "epoch": 2.83, "learning_rate": 2.882784126140491e-06, "loss": 0.5626, "step": 255315 }, { "epoch": 2.83, "learning_rate": 2.881861399001979e-06, "loss": 0.6342, "step": 255320 }, { "epoch": 2.83, "learning_rate": 2.880938671863466e-06, "loss": 0.5247, "step": 255325 }, { "epoch": 2.83, "learning_rate": 2.8800159447249537e-06, "loss": 0.5435, "step": 255330 }, { "epoch": 2.83, "learning_rate": 2.879093217586441e-06, "loss": 0.5118, "step": 255335 }, { "epoch": 2.83, "learning_rate": 2.878170490447929e-06, "loss": 0.6191, "step": 255340 }, { "epoch": 2.83, "learning_rate": 2.8772477633094164e-06, "loss": 0.5615, "step": 255345 }, { "epoch": 2.83, "learning_rate": 2.876325036170904e-06, "loss": 0.5712, "step": 255350 }, { "epoch": 2.83, "learning_rate": 2.8754023090323916e-06, "loss": 0.5673, "step": 255355 }, { "epoch": 2.83, "learning_rate": 2.874479581893879e-06, "loss": 0.5926, "step": 255360 }, { "epoch": 2.83, "learning_rate": 2.8735568547553665e-06, "loss": 0.5723, "step": 255365 }, { "epoch": 2.83, "learning_rate": 2.8726341276168543e-06, "loss": 0.6254, "step": 255370 }, { "epoch": 2.83, "learning_rate": 2.871711400478342e-06, "loss": 0.5821, "step": 255375 }, { "epoch": 2.83, "learning_rate": 2.870788673339829e-06, "loss": 0.5866, "step": 255380 }, { "epoch": 2.83, "learning_rate": 2.869865946201317e-06, "loss": 0.5374, "step": 255385 }, { "epoch": 2.83, "learning_rate": 2.868943219062805e-06, "loss": 0.5831, "step": 255390 }, { "epoch": 2.83, "learning_rate": 2.8680204919242923e-06, "loss": 0.5448, "step": 255395 }, { "epoch": 2.83, "learning_rate": 2.8670977647857797e-06, "loss": 0.5254, "step": 255400 }, { "epoch": 2.83, "learning_rate": 2.866175037647267e-06, "loss": 0.6067, "step": 255405 }, { "epoch": 2.83, "learning_rate": 2.865252310508755e-06, "loss": 0.5874, "step": 255410 }, { "epoch": 2.83, "learning_rate": 2.864329583370243e-06, "loss": 0.5886, "step": 255415 }, { "epoch": 2.83, "learning_rate": 2.86340685623173e-06, "loss": 0.5491, "step": 255420 }, { "epoch": 2.83, "learning_rate": 2.8624841290932177e-06, "loss": 0.5719, "step": 255425 }, { "epoch": 2.83, "learning_rate": 2.8615614019547056e-06, "loss": 0.5777, "step": 255430 }, { "epoch": 2.83, "learning_rate": 2.860638674816193e-06, "loss": 0.5466, "step": 255435 }, { "epoch": 2.83, "learning_rate": 2.8597159476776804e-06, "loss": 0.6134, "step": 255440 }, { "epoch": 2.83, "learning_rate": 2.8587932205391683e-06, "loss": 0.55, "step": 255445 }, { "epoch": 2.83, "learning_rate": 2.8578704934006557e-06, "loss": 0.5785, "step": 255450 }, { "epoch": 2.83, "learning_rate": 2.856947766262143e-06, "loss": 0.5787, "step": 255455 }, { "epoch": 2.83, "learning_rate": 2.8560250391236305e-06, "loss": 0.5916, "step": 255460 }, { "epoch": 2.83, "learning_rate": 2.8551023119851184e-06, "loss": 0.5768, "step": 255465 }, { "epoch": 2.83, "learning_rate": 2.8541795848466062e-06, "loss": 0.5555, "step": 255470 }, { "epoch": 2.83, "learning_rate": 2.8532568577080932e-06, "loss": 0.5275, "step": 255475 }, { "epoch": 2.83, "learning_rate": 2.852334130569581e-06, "loss": 0.5821, "step": 255480 }, { "epoch": 2.83, "learning_rate": 2.851411403431069e-06, "loss": 0.6571, "step": 255485 }, { "epoch": 2.83, "learning_rate": 2.8504886762925564e-06, "loss": 0.5405, "step": 255490 }, { "epoch": 2.83, "learning_rate": 2.8495659491540438e-06, "loss": 0.6418, "step": 255495 }, { "epoch": 2.83, "learning_rate": 2.8486432220155316e-06, "loss": 0.5886, "step": 255500 }, { "epoch": 2.83, "learning_rate": 2.847720494877019e-06, "loss": 0.6107, "step": 255505 }, { "epoch": 2.83, "learning_rate": 2.846797767738507e-06, "loss": 0.6015, "step": 255510 }, { "epoch": 2.83, "learning_rate": 2.8458750405999943e-06, "loss": 0.614, "step": 255515 }, { "epoch": 2.83, "learning_rate": 2.8449523134614818e-06, "loss": 0.4754, "step": 255520 }, { "epoch": 2.83, "learning_rate": 2.8440295863229696e-06, "loss": 0.6136, "step": 255525 }, { "epoch": 2.83, "learning_rate": 2.843106859184457e-06, "loss": 0.5833, "step": 255530 }, { "epoch": 2.83, "learning_rate": 2.8421841320459445e-06, "loss": 0.5558, "step": 255535 }, { "epoch": 2.83, "learning_rate": 2.8412614049074323e-06, "loss": 0.6028, "step": 255540 }, { "epoch": 2.83, "learning_rate": 2.8403386777689197e-06, "loss": 0.5344, "step": 255545 }, { "epoch": 2.83, "learning_rate": 2.839415950630407e-06, "loss": 0.6229, "step": 255550 }, { "epoch": 2.83, "learning_rate": 2.838493223491895e-06, "loss": 0.5185, "step": 255555 }, { "epoch": 2.83, "learning_rate": 2.8375704963533824e-06, "loss": 0.5762, "step": 255560 }, { "epoch": 2.83, "learning_rate": 2.8366477692148703e-06, "loss": 0.5932, "step": 255565 }, { "epoch": 2.83, "learning_rate": 2.8357250420763577e-06, "loss": 0.5119, "step": 255570 }, { "epoch": 2.83, "learning_rate": 2.834802314937845e-06, "loss": 0.6067, "step": 255575 }, { "epoch": 2.83, "learning_rate": 2.833879587799333e-06, "loss": 0.6295, "step": 255580 }, { "epoch": 2.83, "learning_rate": 2.8329568606608204e-06, "loss": 0.5785, "step": 255585 }, { "epoch": 2.83, "learning_rate": 2.832034133522308e-06, "loss": 0.5672, "step": 255590 }, { "epoch": 2.83, "learning_rate": 2.8311114063837957e-06, "loss": 0.5581, "step": 255595 }, { "epoch": 2.83, "learning_rate": 2.830188679245283e-06, "loss": 0.5805, "step": 255600 }, { "epoch": 2.83, "learning_rate": 2.829265952106771e-06, "loss": 0.6564, "step": 255605 }, { "epoch": 2.83, "learning_rate": 2.8283432249682584e-06, "loss": 0.5909, "step": 255610 }, { "epoch": 2.83, "learning_rate": 2.827420497829746e-06, "loss": 0.5639, "step": 255615 }, { "epoch": 2.83, "learning_rate": 2.8264977706912336e-06, "loss": 0.6236, "step": 255620 }, { "epoch": 2.83, "learning_rate": 2.825575043552721e-06, "loss": 0.6179, "step": 255625 }, { "epoch": 2.83, "learning_rate": 2.8246523164142085e-06, "loss": 0.5899, "step": 255630 }, { "epoch": 2.83, "learning_rate": 2.8237295892756963e-06, "loss": 0.5659, "step": 255635 }, { "epoch": 2.83, "learning_rate": 2.8228068621371838e-06, "loss": 0.6271, "step": 255640 }, { "epoch": 2.83, "learning_rate": 2.821884134998671e-06, "loss": 0.5912, "step": 255645 }, { "epoch": 2.83, "learning_rate": 2.820961407860159e-06, "loss": 0.5741, "step": 255650 }, { "epoch": 2.83, "learning_rate": 2.8200386807216465e-06, "loss": 0.5845, "step": 255655 }, { "epoch": 2.83, "learning_rate": 2.8191159535831343e-06, "loss": 0.5656, "step": 255660 }, { "epoch": 2.83, "learning_rate": 2.8181932264446217e-06, "loss": 0.5887, "step": 255665 }, { "epoch": 2.83, "learning_rate": 2.817270499306109e-06, "loss": 0.5781, "step": 255670 }, { "epoch": 2.83, "learning_rate": 2.816347772167597e-06, "loss": 0.5814, "step": 255675 }, { "epoch": 2.83, "learning_rate": 2.8154250450290844e-06, "loss": 0.5655, "step": 255680 }, { "epoch": 2.83, "learning_rate": 2.814502317890572e-06, "loss": 0.5725, "step": 255685 }, { "epoch": 2.83, "learning_rate": 2.8135795907520597e-06, "loss": 0.5611, "step": 255690 }, { "epoch": 2.83, "learning_rate": 2.812656863613547e-06, "loss": 0.5895, "step": 255695 }, { "epoch": 2.83, "learning_rate": 2.811734136475035e-06, "loss": 0.6244, "step": 255700 }, { "epoch": 2.83, "learning_rate": 2.8108114093365224e-06, "loss": 0.5217, "step": 255705 }, { "epoch": 2.83, "learning_rate": 2.80988868219801e-06, "loss": 0.5812, "step": 255710 }, { "epoch": 2.83, "learning_rate": 2.8089659550594977e-06, "loss": 0.6062, "step": 255715 }, { "epoch": 2.83, "learning_rate": 2.808043227920985e-06, "loss": 0.558, "step": 255720 }, { "epoch": 2.83, "learning_rate": 2.8071205007824725e-06, "loss": 0.5799, "step": 255725 }, { "epoch": 2.83, "learning_rate": 2.8061977736439604e-06, "loss": 0.5681, "step": 255730 }, { "epoch": 2.83, "learning_rate": 2.805275046505448e-06, "loss": 0.6023, "step": 255735 }, { "epoch": 2.83, "learning_rate": 2.8043523193669352e-06, "loss": 0.6083, "step": 255740 }, { "epoch": 2.83, "learning_rate": 2.803429592228423e-06, "loss": 0.5774, "step": 255745 }, { "epoch": 2.83, "learning_rate": 2.8025068650899105e-06, "loss": 0.5539, "step": 255750 }, { "epoch": 2.83, "learning_rate": 2.8015841379513984e-06, "loss": 0.6299, "step": 255755 }, { "epoch": 2.83, "learning_rate": 2.8006614108128858e-06, "loss": 0.6212, "step": 255760 }, { "epoch": 2.83, "learning_rate": 2.799738683674373e-06, "loss": 0.5785, "step": 255765 }, { "epoch": 2.83, "learning_rate": 2.798815956535861e-06, "loss": 0.5554, "step": 255770 }, { "epoch": 2.83, "learning_rate": 2.797893229397349e-06, "loss": 0.563, "step": 255775 }, { "epoch": 2.83, "learning_rate": 2.796970502258836e-06, "loss": 0.54, "step": 255780 }, { "epoch": 2.83, "learning_rate": 2.7960477751203238e-06, "loss": 0.5736, "step": 255785 }, { "epoch": 2.83, "learning_rate": 2.7951250479818116e-06, "loss": 0.5888, "step": 255790 }, { "epoch": 2.83, "learning_rate": 2.794202320843299e-06, "loss": 0.5861, "step": 255795 }, { "epoch": 2.83, "learning_rate": 2.7932795937047865e-06, "loss": 0.6344, "step": 255800 }, { "epoch": 2.83, "learning_rate": 2.792356866566274e-06, "loss": 0.5812, "step": 255805 }, { "epoch": 2.83, "learning_rate": 2.7914341394277617e-06, "loss": 0.6594, "step": 255810 }, { "epoch": 2.83, "learning_rate": 2.790511412289249e-06, "loss": 0.5874, "step": 255815 }, { "epoch": 2.83, "learning_rate": 2.7895886851507366e-06, "loss": 0.6072, "step": 255820 }, { "epoch": 2.83, "learning_rate": 2.7886659580122244e-06, "loss": 0.5579, "step": 255825 }, { "epoch": 2.83, "learning_rate": 2.7877432308737123e-06, "loss": 0.5367, "step": 255830 }, { "epoch": 2.83, "learning_rate": 2.7868205037351993e-06, "loss": 0.5739, "step": 255835 }, { "epoch": 2.83, "learning_rate": 2.785897776596687e-06, "loss": 0.5392, "step": 255840 }, { "epoch": 2.83, "learning_rate": 2.784975049458175e-06, "loss": 0.5835, "step": 255845 }, { "epoch": 2.83, "learning_rate": 2.7840523223196624e-06, "loss": 0.5808, "step": 255850 }, { "epoch": 2.83, "learning_rate": 2.78312959518115e-06, "loss": 0.5902, "step": 255855 }, { "epoch": 2.83, "learning_rate": 2.7822068680426377e-06, "loss": 0.5862, "step": 255860 }, { "epoch": 2.83, "learning_rate": 2.781284140904125e-06, "loss": 0.5814, "step": 255865 }, { "epoch": 2.83, "learning_rate": 2.780361413765613e-06, "loss": 0.5701, "step": 255870 }, { "epoch": 2.83, "learning_rate": 2.7794386866271e-06, "loss": 0.6203, "step": 255875 }, { "epoch": 2.83, "learning_rate": 2.778515959488588e-06, "loss": 0.6139, "step": 255880 }, { "epoch": 2.83, "learning_rate": 2.7775932323500757e-06, "loss": 0.6063, "step": 255885 }, { "epoch": 2.83, "learning_rate": 2.776670505211563e-06, "loss": 0.5758, "step": 255890 }, { "epoch": 2.83, "learning_rate": 2.7757477780730505e-06, "loss": 0.6156, "step": 255895 }, { "epoch": 2.83, "learning_rate": 2.7748250509345383e-06, "loss": 0.5621, "step": 255900 }, { "epoch": 2.83, "learning_rate": 2.7739023237960258e-06, "loss": 0.5549, "step": 255905 }, { "epoch": 2.83, "learning_rate": 2.772979596657513e-06, "loss": 0.5426, "step": 255910 }, { "epoch": 2.83, "learning_rate": 2.772056869519001e-06, "loss": 0.5476, "step": 255915 }, { "epoch": 2.83, "learning_rate": 2.7711341423804885e-06, "loss": 0.6027, "step": 255920 }, { "epoch": 2.83, "learning_rate": 2.7702114152419763e-06, "loss": 0.5608, "step": 255925 }, { "epoch": 2.83, "learning_rate": 2.7692886881034637e-06, "loss": 0.5593, "step": 255930 }, { "epoch": 2.83, "learning_rate": 2.768365960964951e-06, "loss": 0.5562, "step": 255935 }, { "epoch": 2.83, "learning_rate": 2.767443233826439e-06, "loss": 0.5353, "step": 255940 }, { "epoch": 2.83, "learning_rate": 2.7665205066879264e-06, "loss": 0.6204, "step": 255945 }, { "epoch": 2.83, "learning_rate": 2.765597779549414e-06, "loss": 0.5553, "step": 255950 }, { "epoch": 2.83, "learning_rate": 2.7646750524109017e-06, "loss": 0.5196, "step": 255955 }, { "epoch": 2.83, "learning_rate": 2.763752325272389e-06, "loss": 0.5565, "step": 255960 }, { "epoch": 2.83, "learning_rate": 2.762829598133877e-06, "loss": 0.632, "step": 255965 }, { "epoch": 2.83, "learning_rate": 2.7619068709953644e-06, "loss": 0.5653, "step": 255970 }, { "epoch": 2.83, "learning_rate": 2.760984143856852e-06, "loss": 0.5489, "step": 255975 }, { "epoch": 2.83, "learning_rate": 2.7600614167183397e-06, "loss": 0.5522, "step": 255980 }, { "epoch": 2.83, "learning_rate": 2.759138689579827e-06, "loss": 0.5179, "step": 255985 }, { "epoch": 2.83, "learning_rate": 2.7582159624413145e-06, "loss": 0.6215, "step": 255990 }, { "epoch": 2.83, "learning_rate": 2.7572932353028024e-06, "loss": 0.6021, "step": 255995 }, { "epoch": 2.83, "learning_rate": 2.75637050816429e-06, "loss": 0.5763, "step": 256000 }, { "epoch": 2.83, "eval_loss": 0.5240838527679443, "eval_runtime": 69.6975, "eval_samples_per_second": 28.695, "eval_steps_per_second": 14.348, "step": 256000 } ], "max_steps": 270936, "num_train_epochs": 3, "total_flos": 1.5275323853887291e+19, "trial_name": null, "trial_params": null }