{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.48742098448884263, "eval_steps": 800, "global_step": 12000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.097165991902834e-09, "loss": 3.5744, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.048582995951417e-08, "loss": 3.6858, "step": 5 }, { "epoch": 0.0, "learning_rate": 8.097165991902834e-08, "loss": 3.6628, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.214574898785425e-07, "loss": 3.3928, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.6194331983805668e-07, "loss": 3.0895, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.0242914979757083e-07, "loss": 2.6585, "step": 25 }, { "epoch": 0.0, "learning_rate": 2.42914979757085e-07, "loss": 2.1214, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.8340080971659917e-07, "loss": 1.7237, "step": 35 }, { "epoch": 0.0, "learning_rate": 3.2388663967611335e-07, "loss": 1.2235, "step": 40 }, { "epoch": 0.0, "learning_rate": 3.6437246963562754e-07, "loss": 1.1867, "step": 45 }, { "epoch": 0.0, "learning_rate": 4.0485829959514166e-07, "loss": 1.1259, "step": 50 }, { "epoch": 0.0, "learning_rate": 4.4534412955465585e-07, "loss": 1.0245, "step": 55 }, { "epoch": 0.0, "learning_rate": 4.8582995951417e-07, "loss": 0.977, "step": 60 }, { "epoch": 0.0, "learning_rate": 5.263157894736842e-07, "loss": 0.9554, "step": 65 }, { "epoch": 0.0, "learning_rate": 5.668016194331983e-07, "loss": 0.9017, "step": 70 }, { "epoch": 0.0, "learning_rate": 6.072874493927125e-07, "loss": 0.8987, "step": 75 }, { "epoch": 0.0, "learning_rate": 6.477732793522267e-07, "loss": 0.8863, "step": 80 }, { "epoch": 0.0, "learning_rate": 6.882591093117408e-07, "loss": 0.9425, "step": 85 }, { "epoch": 0.0, "learning_rate": 7.287449392712551e-07, "loss": 0.91, "step": 90 }, { "epoch": 0.0, "learning_rate": 7.692307692307693e-07, "loss": 0.848, "step": 95 }, { "epoch": 0.0, "learning_rate": 8.097165991902833e-07, "loss": 0.8213, "step": 100 }, { "epoch": 0.0, "learning_rate": 8.502024291497975e-07, "loss": 0.8434, "step": 105 }, { "epoch": 0.0, "learning_rate": 8.906882591093117e-07, "loss": 0.8409, "step": 110 }, { "epoch": 0.0, "learning_rate": 9.311740890688259e-07, "loss": 0.8398, "step": 115 }, { "epoch": 0.0, "learning_rate": 9.7165991902834e-07, "loss": 0.7942, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.0121457489878542e-06, "loss": 0.8221, "step": 125 }, { "epoch": 0.01, "learning_rate": 1.0526315789473683e-06, "loss": 0.8038, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.0931174089068826e-06, "loss": 0.803, "step": 135 }, { "epoch": 0.01, "learning_rate": 1.1336032388663967e-06, "loss": 0.7935, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.1740890688259108e-06, "loss": 0.8251, "step": 145 }, { "epoch": 0.01, "learning_rate": 1.214574898785425e-06, "loss": 0.8082, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.2550607287449393e-06, "loss": 0.7897, "step": 155 }, { "epoch": 0.01, "learning_rate": 1.2955465587044534e-06, "loss": 0.8286, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.3360323886639675e-06, "loss": 0.7742, "step": 165 }, { "epoch": 0.01, "learning_rate": 1.3765182186234816e-06, "loss": 0.787, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.4170040485829959e-06, "loss": 0.7862, "step": 175 }, { "epoch": 0.01, "learning_rate": 1.4574898785425101e-06, "loss": 0.7721, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.4979757085020242e-06, "loss": 0.7554, "step": 185 }, { "epoch": 0.01, "learning_rate": 1.5384615384615385e-06, "loss": 0.7941, "step": 190 }, { "epoch": 0.01, "learning_rate": 1.5789473684210526e-06, "loss": 0.7759, "step": 195 }, { "epoch": 0.01, "learning_rate": 1.6194331983805667e-06, "loss": 0.7249, "step": 200 }, { "epoch": 0.01, "learning_rate": 1.6599190283400807e-06, "loss": 0.7673, "step": 205 }, { "epoch": 0.01, "learning_rate": 1.700404858299595e-06, "loss": 0.7922, "step": 210 }, { "epoch": 0.01, "learning_rate": 1.7408906882591093e-06, "loss": 0.7546, "step": 215 }, { "epoch": 0.01, "learning_rate": 1.7813765182186234e-06, "loss": 0.7709, "step": 220 }, { "epoch": 0.01, "learning_rate": 1.8218623481781377e-06, "loss": 0.7383, "step": 225 }, { "epoch": 0.01, "learning_rate": 1.8623481781376518e-06, "loss": 0.7608, "step": 230 }, { "epoch": 0.01, "learning_rate": 1.9028340080971658e-06, "loss": 0.7663, "step": 235 }, { "epoch": 0.01, "learning_rate": 1.94331983805668e-06, "loss": 0.7567, "step": 240 }, { "epoch": 0.01, "learning_rate": 1.983805668016194e-06, "loss": 0.751, "step": 245 }, { "epoch": 0.01, "learning_rate": 1.9999999252295637e-06, "loss": 0.8148, "step": 250 }, { "epoch": 0.01, "learning_rate": 1.9999994682991603e-06, "loss": 0.7634, "step": 255 }, { "epoch": 0.01, "learning_rate": 1.999998595977674e-06, "loss": 0.7448, "step": 260 }, { "epoch": 0.01, "learning_rate": 1.999997308265467e-06, "loss": 0.7508, "step": 265 }, { "epoch": 0.01, "learning_rate": 1.999995605163075e-06, "loss": 0.7696, "step": 270 }, { "epoch": 0.01, "learning_rate": 1.9999934866712048e-06, "loss": 0.7676, "step": 275 }, { "epoch": 0.01, "learning_rate": 1.9999909527907367e-06, "loss": 0.7601, "step": 280 }, { "epoch": 0.01, "learning_rate": 1.9999880035227236e-06, "loss": 0.7779, "step": 285 }, { "epoch": 0.01, "learning_rate": 1.9999846388683895e-06, "loss": 0.7768, "step": 290 }, { "epoch": 0.01, "learning_rate": 1.9999808588291327e-06, "loss": 0.7713, "step": 295 }, { "epoch": 0.01, "learning_rate": 1.999976663406524e-06, "loss": 0.7666, "step": 300 }, { "epoch": 0.01, "learning_rate": 1.999972052602305e-06, "loss": 0.742, "step": 305 }, { "epoch": 0.01, "learning_rate": 1.999967026418392e-06, "loss": 0.783, "step": 310 }, { "epoch": 0.01, "learning_rate": 1.999961584856872e-06, "loss": 0.7269, "step": 315 }, { "epoch": 0.01, "learning_rate": 1.9999557279200056e-06, "loss": 0.7336, "step": 320 }, { "epoch": 0.01, "learning_rate": 1.9999494556102263e-06, "loss": 0.7072, "step": 325 }, { "epoch": 0.01, "learning_rate": 1.9999427679301387e-06, "loss": 0.7709, "step": 330 }, { "epoch": 0.01, "learning_rate": 1.999935664882522e-06, "loss": 0.7237, "step": 335 }, { "epoch": 0.01, "learning_rate": 1.9999281464703247e-06, "loss": 0.719, "step": 340 }, { "epoch": 0.01, "learning_rate": 1.999920212696672e-06, "loss": 0.748, "step": 345 }, { "epoch": 0.01, "learning_rate": 1.999911863564859e-06, "loss": 0.7167, "step": 350 }, { "epoch": 0.01, "learning_rate": 1.9999030990783527e-06, "loss": 0.7151, "step": 355 }, { "epoch": 0.01, "learning_rate": 1.999893919240795e-06, "loss": 0.7095, "step": 360 }, { "epoch": 0.01, "learning_rate": 1.9998843240559986e-06, "loss": 0.7703, "step": 365 }, { "epoch": 0.02, "learning_rate": 1.9998743135279497e-06, "loss": 0.7456, "step": 370 }, { "epoch": 0.02, "learning_rate": 1.999863887660806e-06, "loss": 0.7532, "step": 375 }, { "epoch": 0.02, "learning_rate": 1.999853046458899e-06, "loss": 0.7014, "step": 380 }, { "epoch": 0.02, "learning_rate": 1.9998417899267313e-06, "loss": 0.7629, "step": 385 }, { "epoch": 0.02, "learning_rate": 1.999830118068979e-06, "loss": 0.7329, "step": 390 }, { "epoch": 0.02, "learning_rate": 1.999818030890491e-06, "loss": 0.723, "step": 395 }, { "epoch": 0.02, "learning_rate": 1.999805528396288e-06, "loss": 0.7549, "step": 400 }, { "epoch": 0.02, "learning_rate": 1.9997926105915627e-06, "loss": 0.7121, "step": 405 }, { "epoch": 0.02, "learning_rate": 1.999779277481682e-06, "loss": 0.7506, "step": 410 }, { "epoch": 0.02, "learning_rate": 1.9997655290721834e-06, "loss": 0.7284, "step": 415 }, { "epoch": 0.02, "learning_rate": 1.9997513653687786e-06, "loss": 0.7344, "step": 420 }, { "epoch": 0.02, "learning_rate": 1.999736786377351e-06, "loss": 0.7684, "step": 425 }, { "epoch": 0.02, "learning_rate": 1.9997217921039567e-06, "loss": 0.7427, "step": 430 }, { "epoch": 0.02, "learning_rate": 1.9997063825548237e-06, "loss": 0.7139, "step": 435 }, { "epoch": 0.02, "learning_rate": 1.9996905577363533e-06, "loss": 0.761, "step": 440 }, { "epoch": 0.02, "learning_rate": 1.9996743176551186e-06, "loss": 0.7545, "step": 445 }, { "epoch": 0.02, "learning_rate": 1.999657662317866e-06, "loss": 0.7431, "step": 450 }, { "epoch": 0.02, "learning_rate": 1.999640591731515e-06, "loss": 0.7225, "step": 455 }, { "epoch": 0.02, "learning_rate": 1.999623105903154e-06, "loss": 0.7284, "step": 460 }, { "epoch": 0.02, "learning_rate": 1.999605204840049e-06, "loss": 0.76, "step": 465 }, { "epoch": 0.02, "learning_rate": 1.9995868885496343e-06, "loss": 0.7413, "step": 470 }, { "epoch": 0.02, "learning_rate": 1.9995681570395195e-06, "loss": 0.7837, "step": 475 }, { "epoch": 0.02, "learning_rate": 1.9995490103174847e-06, "loss": 0.7347, "step": 480 }, { "epoch": 0.02, "learning_rate": 1.999529448391483e-06, "loss": 0.7576, "step": 485 }, { "epoch": 0.02, "learning_rate": 1.9995094712696413e-06, "loss": 0.7665, "step": 490 }, { "epoch": 0.02, "learning_rate": 1.9994890789602576e-06, "loss": 0.7353, "step": 495 }, { "epoch": 0.02, "learning_rate": 1.999468271471802e-06, "loss": 0.7344, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.9994470488129185e-06, "loss": 0.7476, "step": 505 }, { "epoch": 0.02, "learning_rate": 1.9994254109924223e-06, "loss": 0.7257, "step": 510 }, { "epoch": 0.02, "learning_rate": 1.9994033580193017e-06, "loss": 0.7306, "step": 515 }, { "epoch": 0.02, "learning_rate": 1.999380889902718e-06, "loss": 0.7115, "step": 520 }, { "epoch": 0.02, "learning_rate": 1.9993580066520034e-06, "loss": 0.7452, "step": 525 }, { "epoch": 0.02, "learning_rate": 1.9993347082766636e-06, "loss": 0.7523, "step": 530 }, { "epoch": 0.02, "learning_rate": 1.9993109947863764e-06, "loss": 0.7091, "step": 535 }, { "epoch": 0.02, "learning_rate": 1.999286866190993e-06, "loss": 0.7383, "step": 540 }, { "epoch": 0.02, "learning_rate": 1.999262322500535e-06, "loss": 0.7043, "step": 545 }, { "epoch": 0.02, "learning_rate": 1.9992373637251982e-06, "loss": 0.7098, "step": 550 }, { "epoch": 0.02, "learning_rate": 1.999211989875351e-06, "loss": 0.7142, "step": 555 }, { "epoch": 0.02, "learning_rate": 1.999186200961532e-06, "loss": 0.7424, "step": 560 }, { "epoch": 0.02, "learning_rate": 1.9991599969944552e-06, "loss": 0.7348, "step": 565 }, { "epoch": 0.02, "learning_rate": 1.9991333779850043e-06, "loss": 0.7126, "step": 570 }, { "epoch": 0.02, "learning_rate": 1.999106343944237e-06, "loss": 0.7341, "step": 575 }, { "epoch": 0.02, "learning_rate": 1.9990788948833833e-06, "loss": 0.7445, "step": 580 }, { "epoch": 0.02, "learning_rate": 1.999051030813845e-06, "loss": 0.7181, "step": 585 }, { "epoch": 0.02, "learning_rate": 1.999022751747197e-06, "loss": 0.7295, "step": 590 }, { "epoch": 0.02, "learning_rate": 1.998994057695185e-06, "loss": 0.7159, "step": 595 }, { "epoch": 0.02, "learning_rate": 1.99896494866973e-06, "loss": 0.6844, "step": 600 }, { "epoch": 0.02, "learning_rate": 1.9989354246829222e-06, "loss": 0.7511, "step": 605 }, { "epoch": 0.02, "learning_rate": 1.9989054857470267e-06, "loss": 0.7322, "step": 610 }, { "epoch": 0.02, "learning_rate": 1.9988751318744787e-06, "loss": 0.7829, "step": 615 }, { "epoch": 0.03, "learning_rate": 1.998844363077888e-06, "loss": 0.7229, "step": 620 }, { "epoch": 0.03, "learning_rate": 1.998813179370035e-06, "loss": 0.738, "step": 625 }, { "epoch": 0.03, "learning_rate": 1.9987815807638733e-06, "loss": 0.6934, "step": 630 }, { "epoch": 0.03, "learning_rate": 1.9987495672725294e-06, "loss": 0.7005, "step": 635 }, { "epoch": 0.03, "learning_rate": 1.9987171389093e-06, "loss": 0.7692, "step": 640 }, { "epoch": 0.03, "learning_rate": 1.998684295687657e-06, "loss": 0.7101, "step": 645 }, { "epoch": 0.03, "learning_rate": 1.998651037621242e-06, "loss": 0.7813, "step": 650 }, { "epoch": 0.03, "learning_rate": 1.9986173647238715e-06, "loss": 0.7526, "step": 655 }, { "epoch": 0.03, "learning_rate": 1.9985832770095313e-06, "loss": 0.7235, "step": 660 }, { "epoch": 0.03, "learning_rate": 1.998548774492382e-06, "loss": 0.7201, "step": 665 }, { "epoch": 0.03, "learning_rate": 1.9985138571867557e-06, "loss": 0.7303, "step": 670 }, { "epoch": 0.03, "learning_rate": 1.998478525107157e-06, "loss": 0.7375, "step": 675 }, { "epoch": 0.03, "learning_rate": 1.998442778268262e-06, "loss": 0.7123, "step": 680 }, { "epoch": 0.03, "learning_rate": 1.99840661668492e-06, "loss": 0.7541, "step": 685 }, { "epoch": 0.03, "learning_rate": 1.998370040372151e-06, "loss": 0.7685, "step": 690 }, { "epoch": 0.03, "learning_rate": 1.99833304934515e-06, "loss": 0.7029, "step": 695 }, { "epoch": 0.03, "learning_rate": 1.9982956436192827e-06, "loss": 0.7797, "step": 700 }, { "epoch": 0.03, "learning_rate": 1.9982578232100866e-06, "loss": 0.7326, "step": 705 }, { "epoch": 0.03, "learning_rate": 1.9982195881332714e-06, "loss": 0.773, "step": 710 }, { "epoch": 0.03, "learning_rate": 1.9981809384047207e-06, "loss": 0.741, "step": 715 }, { "epoch": 0.03, "learning_rate": 1.9981418740404886e-06, "loss": 0.7518, "step": 720 }, { "epoch": 0.03, "learning_rate": 1.998102395056802e-06, "loss": 0.7338, "step": 725 }, { "epoch": 0.03, "learning_rate": 1.998062501470061e-06, "loss": 0.7192, "step": 730 }, { "epoch": 0.03, "learning_rate": 1.998022193296836e-06, "loss": 0.7429, "step": 735 }, { "epoch": 0.03, "learning_rate": 1.9979814705538715e-06, "loss": 0.6953, "step": 740 }, { "epoch": 0.03, "learning_rate": 1.997940333258083e-06, "loss": 0.7265, "step": 745 }, { "epoch": 0.03, "learning_rate": 1.9978987814265583e-06, "loss": 0.7105, "step": 750 }, { "epoch": 0.03, "learning_rate": 1.997856815076558e-06, "loss": 0.6994, "step": 755 }, { "epoch": 0.03, "learning_rate": 1.9978144342255147e-06, "loss": 0.7008, "step": 760 }, { "epoch": 0.03, "learning_rate": 1.9977716388910325e-06, "loss": 0.7301, "step": 765 }, { "epoch": 0.03, "learning_rate": 1.997728429090889e-06, "loss": 0.7662, "step": 770 }, { "epoch": 0.03, "learning_rate": 1.9976848048430323e-06, "loss": 0.7428, "step": 775 }, { "epoch": 0.03, "learning_rate": 1.9976407661655844e-06, "loss": 0.706, "step": 780 }, { "epoch": 0.03, "learning_rate": 1.997596313076838e-06, "loss": 0.6853, "step": 785 }, { "epoch": 0.03, "learning_rate": 1.9975514455952584e-06, "loss": 0.7363, "step": 790 }, { "epoch": 0.03, "learning_rate": 1.9975061637394834e-06, "loss": 0.7217, "step": 795 }, { "epoch": 0.03, "learning_rate": 1.997460467528323e-06, "loss": 0.7161, "step": 800 }, { "epoch": 0.03, "eval_loss": 0.6896045207977295, "eval_runtime": 140.4315, "eval_samples_per_second": 16.848, "eval_steps_per_second": 2.813, "step": 800 }, { "epoch": 0.03, "learning_rate": 1.997414356980759e-06, "loss": 0.7911, "step": 805 }, { "epoch": 0.03, "learning_rate": 1.9973678321159443e-06, "loss": 0.7037, "step": 810 }, { "epoch": 0.03, "learning_rate": 1.9973208929532063e-06, "loss": 0.7083, "step": 815 }, { "epoch": 0.03, "learning_rate": 1.9972735395120418e-06, "loss": 0.7183, "step": 820 }, { "epoch": 0.03, "learning_rate": 1.997225771812122e-06, "loss": 0.7227, "step": 825 }, { "epoch": 0.03, "learning_rate": 1.9971775898732893e-06, "loss": 0.7271, "step": 830 }, { "epoch": 0.03, "learning_rate": 1.9971289937155577e-06, "loss": 0.7271, "step": 835 }, { "epoch": 0.03, "learning_rate": 1.997079983359113e-06, "loss": 0.7065, "step": 840 }, { "epoch": 0.03, "learning_rate": 1.9970305588243145e-06, "loss": 0.706, "step": 845 }, { "epoch": 0.03, "learning_rate": 1.9969807201316925e-06, "loss": 0.7, "step": 850 }, { "epoch": 0.03, "learning_rate": 1.9969304673019494e-06, "loss": 0.7165, "step": 855 }, { "epoch": 0.03, "learning_rate": 1.99687980035596e-06, "loss": 0.729, "step": 860 }, { "epoch": 0.04, "learning_rate": 1.996828719314771e-06, "loss": 0.7199, "step": 865 }, { "epoch": 0.04, "learning_rate": 1.996777224199601e-06, "loss": 0.7041, "step": 870 }, { "epoch": 0.04, "learning_rate": 1.99672531503184e-06, "loss": 0.735, "step": 875 }, { "epoch": 0.04, "learning_rate": 1.996672991833051e-06, "loss": 0.7153, "step": 880 }, { "epoch": 0.04, "learning_rate": 1.996620254624969e-06, "loss": 0.714, "step": 885 }, { "epoch": 0.04, "learning_rate": 1.9965671034295e-06, "loss": 0.7309, "step": 890 }, { "epoch": 0.04, "learning_rate": 1.996513538268723e-06, "loss": 0.7808, "step": 895 }, { "epoch": 0.04, "learning_rate": 1.9964595591648883e-06, "loss": 0.7407, "step": 900 }, { "epoch": 0.04, "learning_rate": 1.9964051661404185e-06, "loss": 0.6831, "step": 905 }, { "epoch": 0.04, "learning_rate": 1.9963503592179078e-06, "loss": 0.7178, "step": 910 }, { "epoch": 0.04, "learning_rate": 1.996295138420122e-06, "loss": 0.7607, "step": 915 }, { "epoch": 0.04, "learning_rate": 1.9962395037700007e-06, "loss": 0.747, "step": 920 }, { "epoch": 0.04, "learning_rate": 1.996183455290653e-06, "loss": 0.6911, "step": 925 }, { "epoch": 0.04, "learning_rate": 1.996126993005361e-06, "loss": 0.7038, "step": 930 }, { "epoch": 0.04, "learning_rate": 1.996070116937579e-06, "loss": 0.7195, "step": 935 }, { "epoch": 0.04, "learning_rate": 1.9960128271109326e-06, "loss": 0.6974, "step": 940 }, { "epoch": 0.04, "learning_rate": 1.9959551235492195e-06, "loss": 0.7399, "step": 945 }, { "epoch": 0.04, "learning_rate": 1.9958970062764095e-06, "loss": 0.7475, "step": 950 }, { "epoch": 0.04, "learning_rate": 1.9958384753166437e-06, "loss": 0.7091, "step": 955 }, { "epoch": 0.04, "learning_rate": 1.995779530694236e-06, "loss": 0.6908, "step": 960 }, { "epoch": 0.04, "learning_rate": 1.9957201724336704e-06, "loss": 0.7052, "step": 965 }, { "epoch": 0.04, "learning_rate": 1.9956604005596043e-06, "loss": 0.6963, "step": 970 }, { "epoch": 0.04, "learning_rate": 1.9956002150968667e-06, "loss": 0.7064, "step": 975 }, { "epoch": 0.04, "learning_rate": 1.9955396160704582e-06, "loss": 0.6804, "step": 980 }, { "epoch": 0.04, "learning_rate": 1.99547860350555e-06, "loss": 0.6759, "step": 985 }, { "epoch": 0.04, "learning_rate": 1.995417177427488e-06, "loss": 0.7175, "step": 990 }, { "epoch": 0.04, "learning_rate": 1.9953553378617866e-06, "loss": 0.6926, "step": 995 }, { "epoch": 0.04, "learning_rate": 1.995293084834134e-06, "loss": 0.7109, "step": 1000 }, { "epoch": 0.04, "learning_rate": 1.9952304183703893e-06, "loss": 0.7129, "step": 1005 }, { "epoch": 0.04, "learning_rate": 1.9951673384965835e-06, "loss": 0.7117, "step": 1010 }, { "epoch": 0.04, "learning_rate": 1.99510384523892e-06, "loss": 0.7694, "step": 1015 }, { "epoch": 0.04, "learning_rate": 1.995039938623773e-06, "loss": 0.7381, "step": 1020 }, { "epoch": 0.04, "learning_rate": 1.9949756186776893e-06, "loss": 0.722, "step": 1025 }, { "epoch": 0.04, "learning_rate": 1.9949108854273855e-06, "loss": 0.7288, "step": 1030 }, { "epoch": 0.04, "learning_rate": 1.9948457388997528e-06, "loss": 0.7045, "step": 1035 }, { "epoch": 0.04, "learning_rate": 1.994780179121851e-06, "loss": 0.7623, "step": 1040 }, { "epoch": 0.04, "learning_rate": 1.994714206120914e-06, "loss": 0.725, "step": 1045 }, { "epoch": 0.04, "learning_rate": 1.9946478199243466e-06, "loss": 0.7203, "step": 1050 }, { "epoch": 0.04, "learning_rate": 1.9945810205597246e-06, "loss": 0.7011, "step": 1055 }, { "epoch": 0.04, "learning_rate": 1.9945138080547957e-06, "loss": 0.6946, "step": 1060 }, { "epoch": 0.04, "learning_rate": 1.99444618243748e-06, "loss": 0.7151, "step": 1065 }, { "epoch": 0.04, "learning_rate": 1.994378143735868e-06, "loss": 0.7074, "step": 1070 }, { "epoch": 0.04, "learning_rate": 1.9943096919782225e-06, "loss": 0.7, "step": 1075 }, { "epoch": 0.04, "learning_rate": 1.994240827192978e-06, "loss": 0.6957, "step": 1080 }, { "epoch": 0.04, "learning_rate": 1.9941715494087408e-06, "loss": 0.7348, "step": 1085 }, { "epoch": 0.04, "learning_rate": 1.9941018586542866e-06, "loss": 0.6984, "step": 1090 }, { "epoch": 0.04, "learning_rate": 1.9940317549585665e-06, "loss": 0.7252, "step": 1095 }, { "epoch": 0.04, "learning_rate": 1.9939612383506993e-06, "loss": 0.7706, "step": 1100 }, { "epoch": 0.04, "learning_rate": 1.993890308859978e-06, "loss": 0.7261, "step": 1105 }, { "epoch": 0.05, "learning_rate": 1.9938189665158654e-06, "loss": 0.6879, "step": 1110 }, { "epoch": 0.05, "learning_rate": 1.9937472113479966e-06, "loss": 0.7088, "step": 1115 }, { "epoch": 0.05, "learning_rate": 1.9936750433861787e-06, "loss": 0.7428, "step": 1120 }, { "epoch": 0.05, "learning_rate": 1.993602462660389e-06, "loss": 0.7111, "step": 1125 }, { "epoch": 0.05, "learning_rate": 1.993529469200777e-06, "loss": 0.7027, "step": 1130 }, { "epoch": 0.05, "learning_rate": 1.993456063037664e-06, "loss": 0.6969, "step": 1135 }, { "epoch": 0.05, "learning_rate": 1.9933822442015416e-06, "loss": 0.7343, "step": 1140 }, { "epoch": 0.05, "learning_rate": 1.993308012723074e-06, "loss": 0.7174, "step": 1145 }, { "epoch": 0.05, "learning_rate": 1.993233368633096e-06, "loss": 0.6997, "step": 1150 }, { "epoch": 0.05, "learning_rate": 1.993158311962614e-06, "loss": 0.693, "step": 1155 }, { "epoch": 0.05, "learning_rate": 1.9930828427428066e-06, "loss": 0.7136, "step": 1160 }, { "epoch": 0.05, "learning_rate": 1.9930069610050224e-06, "loss": 0.7211, "step": 1165 }, { "epoch": 0.05, "learning_rate": 1.9929306667807823e-06, "loss": 0.7144, "step": 1170 }, { "epoch": 0.05, "learning_rate": 1.992853960101778e-06, "loss": 0.6787, "step": 1175 }, { "epoch": 0.05, "learning_rate": 1.9927768409998733e-06, "loss": 0.7348, "step": 1180 }, { "epoch": 0.05, "learning_rate": 1.992699309507102e-06, "loss": 0.6718, "step": 1185 }, { "epoch": 0.05, "learning_rate": 1.992621365655671e-06, "loss": 0.7146, "step": 1190 }, { "epoch": 0.05, "learning_rate": 1.9925430094779566e-06, "loss": 0.6982, "step": 1195 }, { "epoch": 0.05, "learning_rate": 1.9924642410065075e-06, "loss": 0.7379, "step": 1200 }, { "epoch": 0.05, "learning_rate": 1.992385060274044e-06, "loss": 0.6983, "step": 1205 }, { "epoch": 0.05, "learning_rate": 1.9923054673134564e-06, "loss": 0.7893, "step": 1210 }, { "epoch": 0.05, "learning_rate": 1.992225462157807e-06, "loss": 0.714, "step": 1215 }, { "epoch": 0.05, "learning_rate": 1.99214504484033e-06, "loss": 0.7394, "step": 1220 }, { "epoch": 0.05, "learning_rate": 1.9920642153944288e-06, "loss": 0.7238, "step": 1225 }, { "epoch": 0.05, "learning_rate": 1.9919829738536806e-06, "loss": 0.6847, "step": 1230 }, { "epoch": 0.05, "learning_rate": 1.991901320251831e-06, "loss": 0.6936, "step": 1235 }, { "epoch": 0.05, "learning_rate": 1.9918192546227995e-06, "loss": 0.7271, "step": 1240 }, { "epoch": 0.05, "learning_rate": 1.991736777000675e-06, "loss": 0.7416, "step": 1245 }, { "epoch": 0.05, "learning_rate": 1.9916538874197176e-06, "loss": 0.7637, "step": 1250 }, { "epoch": 0.05, "learning_rate": 1.9915705859143594e-06, "loss": 0.6722, "step": 1255 }, { "epoch": 0.05, "learning_rate": 1.9914868725192025e-06, "loss": 0.6943, "step": 1260 }, { "epoch": 0.05, "learning_rate": 1.991402747269022e-06, "loss": 0.7433, "step": 1265 }, { "epoch": 0.05, "learning_rate": 1.991318210198761e-06, "loss": 0.7015, "step": 1270 }, { "epoch": 0.05, "learning_rate": 1.991233261343537e-06, "loss": 0.6772, "step": 1275 }, { "epoch": 0.05, "learning_rate": 1.9911479007386364e-06, "loss": 0.7278, "step": 1280 }, { "epoch": 0.05, "learning_rate": 1.991062128419517e-06, "loss": 0.7471, "step": 1285 }, { "epoch": 0.05, "learning_rate": 1.9909759444218085e-06, "loss": 0.7234, "step": 1290 }, { "epoch": 0.05, "learning_rate": 1.9908893487813106e-06, "loss": 0.7118, "step": 1295 }, { "epoch": 0.05, "learning_rate": 1.990802341533994e-06, "loss": 0.7351, "step": 1300 }, { "epoch": 0.05, "learning_rate": 1.9907149227160016e-06, "loss": 0.7084, "step": 1305 }, { "epoch": 0.05, "learning_rate": 1.9906270923636457e-06, "loss": 0.7174, "step": 1310 }, { "epoch": 0.05, "learning_rate": 1.9905388505134107e-06, "loss": 0.6935, "step": 1315 }, { "epoch": 0.05, "learning_rate": 1.990450197201951e-06, "loss": 0.7004, "step": 1320 }, { "epoch": 0.05, "learning_rate": 1.990361132466093e-06, "loss": 0.7077, "step": 1325 }, { "epoch": 0.05, "learning_rate": 1.9902716563428335e-06, "loss": 0.7226, "step": 1330 }, { "epoch": 0.05, "learning_rate": 1.9901817688693395e-06, "loss": 0.7025, "step": 1335 }, { "epoch": 0.05, "learning_rate": 1.99009147008295e-06, "loss": 0.7139, "step": 1340 }, { "epoch": 0.05, "learning_rate": 1.9900007600211735e-06, "loss": 0.6609, "step": 1345 }, { "epoch": 0.05, "learning_rate": 1.9899096387216914e-06, "loss": 0.7452, "step": 1350 }, { "epoch": 0.06, "learning_rate": 1.9898181062223536e-06, "loss": 0.7111, "step": 1355 }, { "epoch": 0.06, "learning_rate": 1.9897261625611822e-06, "loss": 0.6925, "step": 1360 }, { "epoch": 0.06, "learning_rate": 1.9896338077763704e-06, "loss": 0.7097, "step": 1365 }, { "epoch": 0.06, "learning_rate": 1.989541041906281e-06, "loss": 0.7146, "step": 1370 }, { "epoch": 0.06, "learning_rate": 1.9894478649894484e-06, "loss": 0.704, "step": 1375 }, { "epoch": 0.06, "learning_rate": 1.989354277064577e-06, "loss": 0.7466, "step": 1380 }, { "epoch": 0.06, "learning_rate": 1.9892602781705427e-06, "loss": 0.6958, "step": 1385 }, { "epoch": 0.06, "learning_rate": 1.9891658683463922e-06, "loss": 0.7421, "step": 1390 }, { "epoch": 0.06, "learning_rate": 1.989071047631342e-06, "loss": 0.6658, "step": 1395 }, { "epoch": 0.06, "learning_rate": 1.98897581606478e-06, "loss": 0.6868, "step": 1400 }, { "epoch": 0.06, "learning_rate": 1.988880173686265e-06, "loss": 0.7437, "step": 1405 }, { "epoch": 0.06, "learning_rate": 1.988784120535525e-06, "loss": 0.7484, "step": 1410 }, { "epoch": 0.06, "learning_rate": 1.988687656652461e-06, "loss": 0.7063, "step": 1415 }, { "epoch": 0.06, "learning_rate": 1.9885907820771415e-06, "loss": 0.713, "step": 1420 }, { "epoch": 0.06, "learning_rate": 1.988493496849809e-06, "loss": 0.7313, "step": 1425 }, { "epoch": 0.06, "learning_rate": 1.9883958010108736e-06, "loss": 0.6987, "step": 1430 }, { "epoch": 0.06, "learning_rate": 1.9882976946009186e-06, "loss": 0.7089, "step": 1435 }, { "epoch": 0.06, "learning_rate": 1.9881991776606956e-06, "loss": 0.6492, "step": 1440 }, { "epoch": 0.06, "learning_rate": 1.9881002502311285e-06, "loss": 0.6538, "step": 1445 }, { "epoch": 0.06, "learning_rate": 1.9880009123533095e-06, "loss": 0.7096, "step": 1450 }, { "epoch": 0.06, "learning_rate": 1.9879011640685043e-06, "loss": 0.7329, "step": 1455 }, { "epoch": 0.06, "learning_rate": 1.9878010054181463e-06, "loss": 0.7414, "step": 1460 }, { "epoch": 0.06, "learning_rate": 1.9877004364438414e-06, "loss": 0.7089, "step": 1465 }, { "epoch": 0.06, "learning_rate": 1.987599457187365e-06, "loss": 0.738, "step": 1470 }, { "epoch": 0.06, "learning_rate": 1.9874980676906617e-06, "loss": 0.7078, "step": 1475 }, { "epoch": 0.06, "learning_rate": 1.9873962679958494e-06, "loss": 0.6987, "step": 1480 }, { "epoch": 0.06, "learning_rate": 1.987294058145214e-06, "loss": 0.7456, "step": 1485 }, { "epoch": 0.06, "learning_rate": 1.987191438181213e-06, "loss": 0.7402, "step": 1490 }, { "epoch": 0.06, "learning_rate": 1.987088408146473e-06, "loss": 0.7308, "step": 1495 }, { "epoch": 0.06, "learning_rate": 1.986984968083793e-06, "loss": 0.7197, "step": 1500 }, { "epoch": 0.06, "learning_rate": 1.9868811180361402e-06, "loss": 0.7386, "step": 1505 }, { "epoch": 0.06, "learning_rate": 1.9867768580466536e-06, "loss": 0.7024, "step": 1510 }, { "epoch": 0.06, "learning_rate": 1.986672188158641e-06, "loss": 0.7241, "step": 1515 }, { "epoch": 0.06, "learning_rate": 1.9865671084155826e-06, "loss": 0.7006, "step": 1520 }, { "epoch": 0.06, "learning_rate": 1.986461618861127e-06, "loss": 0.7035, "step": 1525 }, { "epoch": 0.06, "learning_rate": 1.986355719539093e-06, "loss": 0.723, "step": 1530 }, { "epoch": 0.06, "learning_rate": 1.9862494104934717e-06, "loss": 0.7184, "step": 1535 }, { "epoch": 0.06, "learning_rate": 1.9861426917684214e-06, "loss": 0.7018, "step": 1540 }, { "epoch": 0.06, "learning_rate": 1.986035563408273e-06, "loss": 0.6943, "step": 1545 }, { "epoch": 0.06, "learning_rate": 1.9859280254575268e-06, "loss": 0.7434, "step": 1550 }, { "epoch": 0.06, "learning_rate": 1.9858200779608526e-06, "loss": 0.7122, "step": 1555 }, { "epoch": 0.06, "learning_rate": 1.9857117209630913e-06, "loss": 0.7187, "step": 1560 }, { "epoch": 0.06, "learning_rate": 1.9856029545092536e-06, "loss": 0.6825, "step": 1565 }, { "epoch": 0.06, "learning_rate": 1.985493778644519e-06, "loss": 0.6964, "step": 1570 }, { "epoch": 0.06, "learning_rate": 1.9853841934142396e-06, "loss": 0.7437, "step": 1575 }, { "epoch": 0.06, "learning_rate": 1.9852741988639356e-06, "loss": 0.7125, "step": 1580 }, { "epoch": 0.06, "learning_rate": 1.9851637950392974e-06, "loss": 0.7241, "step": 1585 }, { "epoch": 0.06, "learning_rate": 1.9850529819861863e-06, "loss": 0.7113, "step": 1590 }, { "epoch": 0.06, "learning_rate": 1.984941759750633e-06, "loss": 0.6725, "step": 1595 }, { "epoch": 0.06, "learning_rate": 1.984830128378838e-06, "loss": 0.7166, "step": 1600 }, { "epoch": 0.06, "eval_loss": 0.6776626706123352, "eval_runtime": 140.3492, "eval_samples_per_second": 16.858, "eval_steps_per_second": 2.814, "step": 1600 }, { "epoch": 0.07, "learning_rate": 1.9847180879171727e-06, "loss": 0.7111, "step": 1605 }, { "epoch": 0.07, "learning_rate": 1.9846056384121768e-06, "loss": 0.7004, "step": 1610 }, { "epoch": 0.07, "learning_rate": 1.9844927799105612e-06, "loss": 0.7221, "step": 1615 }, { "epoch": 0.07, "learning_rate": 1.984379512459207e-06, "loss": 0.7363, "step": 1620 }, { "epoch": 0.07, "learning_rate": 1.984265836105163e-06, "loss": 0.7107, "step": 1625 }, { "epoch": 0.07, "learning_rate": 1.9841517508956506e-06, "loss": 0.7081, "step": 1630 }, { "epoch": 0.07, "learning_rate": 1.9840372568780594e-06, "loss": 0.6796, "step": 1635 }, { "epoch": 0.07, "learning_rate": 1.9839223540999496e-06, "loss": 0.7207, "step": 1640 }, { "epoch": 0.07, "learning_rate": 1.9838070426090505e-06, "loss": 0.716, "step": 1645 }, { "epoch": 0.07, "learning_rate": 1.983691322453261e-06, "loss": 0.7306, "step": 1650 }, { "epoch": 0.07, "learning_rate": 1.983575193680651e-06, "loss": 0.724, "step": 1655 }, { "epoch": 0.07, "learning_rate": 1.983458656339459e-06, "loss": 0.7447, "step": 1660 }, { "epoch": 0.07, "learning_rate": 1.9833417104780942e-06, "loss": 0.6929, "step": 1665 }, { "epoch": 0.07, "learning_rate": 1.9832243561451346e-06, "loss": 0.7228, "step": 1670 }, { "epoch": 0.07, "learning_rate": 1.9831065933893275e-06, "loss": 0.6824, "step": 1675 }, { "epoch": 0.07, "learning_rate": 1.982988422259591e-06, "loss": 0.7056, "step": 1680 }, { "epoch": 0.07, "learning_rate": 1.9828698428050123e-06, "loss": 0.6943, "step": 1685 }, { "epoch": 0.07, "learning_rate": 1.982750855074849e-06, "loss": 0.7101, "step": 1690 }, { "epoch": 0.07, "learning_rate": 1.9826314591185263e-06, "loss": 0.6786, "step": 1695 }, { "epoch": 0.07, "learning_rate": 1.9825116549856408e-06, "loss": 0.6954, "step": 1700 }, { "epoch": 0.07, "learning_rate": 1.9823914427259584e-06, "loss": 0.7165, "step": 1705 }, { "epoch": 0.07, "learning_rate": 1.982270822389414e-06, "loss": 0.7208, "step": 1710 }, { "epoch": 0.07, "learning_rate": 1.9821497940261124e-06, "loss": 0.6981, "step": 1715 }, { "epoch": 0.07, "learning_rate": 1.982028357686327e-06, "loss": 0.6914, "step": 1720 }, { "epoch": 0.07, "learning_rate": 1.9819065134205026e-06, "loss": 0.7291, "step": 1725 }, { "epoch": 0.07, "learning_rate": 1.9817842612792513e-06, "loss": 0.6882, "step": 1730 }, { "epoch": 0.07, "learning_rate": 1.981661601313356e-06, "loss": 0.685, "step": 1735 }, { "epoch": 0.07, "learning_rate": 1.981538533573768e-06, "loss": 0.6954, "step": 1740 }, { "epoch": 0.07, "learning_rate": 1.9814150581116093e-06, "loss": 0.7104, "step": 1745 }, { "epoch": 0.07, "learning_rate": 1.9812911749781705e-06, "loss": 0.7026, "step": 1750 }, { "epoch": 0.07, "learning_rate": 1.981166884224911e-06, "loss": 0.6907, "step": 1755 }, { "epoch": 0.07, "learning_rate": 1.981042185903461e-06, "loss": 0.6988, "step": 1760 }, { "epoch": 0.07, "learning_rate": 1.980917080065618e-06, "loss": 0.6894, "step": 1765 }, { "epoch": 0.07, "learning_rate": 1.98079156676335e-06, "loss": 0.7308, "step": 1770 }, { "epoch": 0.07, "learning_rate": 1.9806656460487955e-06, "loss": 0.6688, "step": 1775 }, { "epoch": 0.07, "learning_rate": 1.9805393179742596e-06, "loss": 0.7028, "step": 1780 }, { "epoch": 0.07, "learning_rate": 1.980412582592218e-06, "loss": 0.6982, "step": 1785 }, { "epoch": 0.07, "learning_rate": 1.980285439955316e-06, "loss": 0.7326, "step": 1790 }, { "epoch": 0.07, "learning_rate": 1.980157890116367e-06, "loss": 0.7204, "step": 1795 }, { "epoch": 0.07, "learning_rate": 1.980029933128354e-06, "loss": 0.7016, "step": 1800 }, { "epoch": 0.07, "learning_rate": 1.9799015690444302e-06, "loss": 0.7076, "step": 1805 }, { "epoch": 0.07, "learning_rate": 1.9797727979179156e-06, "loss": 0.7121, "step": 1810 }, { "epoch": 0.07, "learning_rate": 1.9796436198023016e-06, "loss": 0.7204, "step": 1815 }, { "epoch": 0.07, "learning_rate": 1.9795140347512472e-06, "loss": 0.7178, "step": 1820 }, { "epoch": 0.07, "learning_rate": 1.979384042818581e-06, "loss": 0.7223, "step": 1825 }, { "epoch": 0.07, "learning_rate": 1.979253644058301e-06, "loss": 0.7066, "step": 1830 }, { "epoch": 0.07, "learning_rate": 1.979122838524573e-06, "loss": 0.6873, "step": 1835 }, { "epoch": 0.07, "learning_rate": 1.9789916262717328e-06, "loss": 0.6822, "step": 1840 }, { "epoch": 0.07, "learning_rate": 1.9788600073542848e-06, "loss": 0.6947, "step": 1845 }, { "epoch": 0.08, "learning_rate": 1.978727981826902e-06, "loss": 0.7092, "step": 1850 }, { "epoch": 0.08, "learning_rate": 1.978595549744427e-06, "loss": 0.7166, "step": 1855 }, { "epoch": 0.08, "learning_rate": 1.9784627111618715e-06, "loss": 0.6842, "step": 1860 }, { "epoch": 0.08, "learning_rate": 1.9783294661344145e-06, "loss": 0.7161, "step": 1865 }, { "epoch": 0.08, "learning_rate": 1.978195814717405e-06, "loss": 0.6881, "step": 1870 }, { "epoch": 0.08, "learning_rate": 1.978061756966361e-06, "loss": 0.7342, "step": 1875 }, { "epoch": 0.08, "learning_rate": 1.977927292936969e-06, "loss": 0.6767, "step": 1880 }, { "epoch": 0.08, "learning_rate": 1.9777924226850842e-06, "loss": 0.7096, "step": 1885 }, { "epoch": 0.08, "learning_rate": 1.97765714626673e-06, "loss": 0.694, "step": 1890 }, { "epoch": 0.08, "learning_rate": 1.977521463738099e-06, "loss": 0.7152, "step": 1895 }, { "epoch": 0.08, "learning_rate": 1.9773853751555537e-06, "loss": 0.6618, "step": 1900 }, { "epoch": 0.08, "learning_rate": 1.977248880575623e-06, "loss": 0.689, "step": 1905 }, { "epoch": 0.08, "learning_rate": 1.9771119800550054e-06, "loss": 0.6892, "step": 1910 }, { "epoch": 0.08, "learning_rate": 1.9769746736505694e-06, "loss": 0.7179, "step": 1915 }, { "epoch": 0.08, "learning_rate": 1.97683696141935e-06, "loss": 0.6888, "step": 1920 }, { "epoch": 0.08, "learning_rate": 1.9766988434185514e-06, "loss": 0.7041, "step": 1925 }, { "epoch": 0.08, "learning_rate": 1.976560319705547e-06, "loss": 0.6969, "step": 1930 }, { "epoch": 0.08, "learning_rate": 1.9764213903378786e-06, "loss": 0.7162, "step": 1935 }, { "epoch": 0.08, "learning_rate": 1.9762820553732563e-06, "loss": 0.7178, "step": 1940 }, { "epoch": 0.08, "learning_rate": 1.976142314869558e-06, "loss": 0.7309, "step": 1945 }, { "epoch": 0.08, "learning_rate": 1.976002168884831e-06, "loss": 0.7198, "step": 1950 }, { "epoch": 0.08, "learning_rate": 1.975861617477291e-06, "loss": 0.7131, "step": 1955 }, { "epoch": 0.08, "learning_rate": 1.9757206607053218e-06, "loss": 0.7087, "step": 1960 }, { "epoch": 0.08, "learning_rate": 1.9755792986274755e-06, "loss": 0.6708, "step": 1965 }, { "epoch": 0.08, "learning_rate": 1.975437531302472e-06, "loss": 0.7141, "step": 1970 }, { "epoch": 0.08, "learning_rate": 1.975295358789201e-06, "loss": 0.7152, "step": 1975 }, { "epoch": 0.08, "learning_rate": 1.9751527811467195e-06, "loss": 0.7172, "step": 1980 }, { "epoch": 0.08, "learning_rate": 1.9750097984342534e-06, "loss": 0.7472, "step": 1985 }, { "epoch": 0.08, "learning_rate": 1.9748664107111962e-06, "loss": 0.7129, "step": 1990 }, { "epoch": 0.08, "learning_rate": 1.9747226180371094e-06, "loss": 0.7066, "step": 1995 }, { "epoch": 0.08, "learning_rate": 1.974578420471724e-06, "loss": 0.7049, "step": 2000 }, { "epoch": 0.08, "learning_rate": 1.9744338180749376e-06, "loss": 0.7214, "step": 2005 }, { "epoch": 0.08, "learning_rate": 1.9742888109068175e-06, "loss": 0.7469, "step": 2010 }, { "epoch": 0.08, "learning_rate": 1.9741433990275987e-06, "loss": 0.7119, "step": 2015 }, { "epoch": 0.08, "learning_rate": 1.973997582497683e-06, "loss": 0.7487, "step": 2020 }, { "epoch": 0.08, "learning_rate": 1.973851361377642e-06, "loss": 0.7259, "step": 2025 }, { "epoch": 0.08, "learning_rate": 1.9737047357282143e-06, "loss": 0.7164, "step": 2030 }, { "epoch": 0.08, "learning_rate": 1.9735577056103074e-06, "loss": 0.7639, "step": 2035 }, { "epoch": 0.08, "learning_rate": 1.9734102710849956e-06, "loss": 0.7181, "step": 2040 }, { "epoch": 0.08, "learning_rate": 1.973262432213523e-06, "loss": 0.6833, "step": 2045 }, { "epoch": 0.08, "learning_rate": 1.973114189057299e-06, "loss": 0.7263, "step": 2050 }, { "epoch": 0.08, "learning_rate": 1.9729655416779044e-06, "loss": 0.6957, "step": 2055 }, { "epoch": 0.08, "learning_rate": 1.972816490137085e-06, "loss": 0.716, "step": 2060 }, { "epoch": 0.08, "learning_rate": 1.9726670344967554e-06, "loss": 0.699, "step": 2065 }, { "epoch": 0.08, "learning_rate": 1.9725171748189987e-06, "loss": 0.6858, "step": 2070 }, { "epoch": 0.08, "learning_rate": 1.9723669111660645e-06, "loss": 0.7036, "step": 2075 }, { "epoch": 0.08, "learning_rate": 1.9722162436003715e-06, "loss": 0.6958, "step": 2080 }, { "epoch": 0.08, "learning_rate": 1.9720651721845062e-06, "loss": 0.7303, "step": 2085 }, { "epoch": 0.08, "learning_rate": 1.971913696981222e-06, "loss": 0.6836, "step": 2090 }, { "epoch": 0.09, "learning_rate": 1.97176181805344e-06, "loss": 0.7349, "step": 2095 }, { "epoch": 0.09, "learning_rate": 1.9716095354642493e-06, "loss": 0.7105, "step": 2100 }, { "epoch": 0.09, "learning_rate": 1.971456849276907e-06, "loss": 0.7126, "step": 2105 }, { "epoch": 0.09, "learning_rate": 1.971303759554838e-06, "loss": 0.6959, "step": 2110 }, { "epoch": 0.09, "learning_rate": 1.9711502663616344e-06, "loss": 0.6699, "step": 2115 }, { "epoch": 0.09, "learning_rate": 1.9709963697610554e-06, "loss": 0.6671, "step": 2120 }, { "epoch": 0.09, "learning_rate": 1.970842069817029e-06, "loss": 0.7101, "step": 2125 }, { "epoch": 0.09, "learning_rate": 1.970687366593649e-06, "loss": 0.6985, "step": 2130 }, { "epoch": 0.09, "learning_rate": 1.970532260155179e-06, "loss": 0.7175, "step": 2135 }, { "epoch": 0.09, "learning_rate": 1.970376750566048e-06, "loss": 0.7251, "step": 2140 }, { "epoch": 0.09, "learning_rate": 1.9702208378908537e-06, "loss": 0.6949, "step": 2145 }, { "epoch": 0.09, "learning_rate": 1.970064522194361e-06, "loss": 0.7573, "step": 2150 }, { "epoch": 0.09, "learning_rate": 1.9699078035415014e-06, "loss": 0.6922, "step": 2155 }, { "epoch": 0.09, "learning_rate": 1.9697506819973753e-06, "loss": 0.7215, "step": 2160 }, { "epoch": 0.09, "learning_rate": 1.9695931576272493e-06, "loss": 0.7252, "step": 2165 }, { "epoch": 0.09, "learning_rate": 1.9694352304965572e-06, "loss": 0.7202, "step": 2170 }, { "epoch": 0.09, "learning_rate": 1.9692769006709013e-06, "loss": 0.6932, "step": 2175 }, { "epoch": 0.09, "learning_rate": 1.96911816821605e-06, "loss": 0.6522, "step": 2180 }, { "epoch": 0.09, "learning_rate": 1.9689590331979394e-06, "loss": 0.6933, "step": 2185 }, { "epoch": 0.09, "learning_rate": 1.968799495682673e-06, "loss": 0.6825, "step": 2190 }, { "epoch": 0.09, "learning_rate": 1.968639555736521e-06, "loss": 0.7037, "step": 2195 }, { "epoch": 0.09, "learning_rate": 1.968479213425922e-06, "loss": 0.6911, "step": 2200 }, { "epoch": 0.09, "learning_rate": 1.9683184688174795e-06, "loss": 0.7048, "step": 2205 }, { "epoch": 0.09, "learning_rate": 1.9681573219779658e-06, "loss": 0.7273, "step": 2210 }, { "epoch": 0.09, "learning_rate": 1.9679957729743204e-06, "loss": 0.7105, "step": 2215 }, { "epoch": 0.09, "learning_rate": 1.9678338218736486e-06, "loss": 0.6832, "step": 2220 }, { "epoch": 0.09, "learning_rate": 1.967671468743224e-06, "loss": 0.6829, "step": 2225 }, { "epoch": 0.09, "learning_rate": 1.9675087136504865e-06, "loss": 0.6971, "step": 2230 }, { "epoch": 0.09, "learning_rate": 1.9673455566630437e-06, "loss": 0.7298, "step": 2235 }, { "epoch": 0.09, "learning_rate": 1.9671819978486688e-06, "loss": 0.6841, "step": 2240 }, { "epoch": 0.09, "learning_rate": 1.967018037275303e-06, "loss": 0.6838, "step": 2245 }, { "epoch": 0.09, "learning_rate": 1.966853675011054e-06, "loss": 0.7053, "step": 2250 }, { "epoch": 0.09, "learning_rate": 1.966688911124197e-06, "loss": 0.6886, "step": 2255 }, { "epoch": 0.09, "learning_rate": 1.9665237456831725e-06, "loss": 0.7032, "step": 2260 }, { "epoch": 0.09, "learning_rate": 1.9663581787565898e-06, "loss": 0.7159, "step": 2265 }, { "epoch": 0.09, "learning_rate": 1.966192210413223e-06, "loss": 0.6896, "step": 2270 }, { "epoch": 0.09, "learning_rate": 1.966025840722015e-06, "loss": 0.7004, "step": 2275 }, { "epoch": 0.09, "learning_rate": 1.9658590697520735e-06, "loss": 0.6726, "step": 2280 }, { "epoch": 0.09, "learning_rate": 1.965691897572674e-06, "loss": 0.6911, "step": 2285 }, { "epoch": 0.09, "learning_rate": 1.9655243242532584e-06, "loss": 0.7235, "step": 2290 }, { "epoch": 0.09, "learning_rate": 1.9653563498634347e-06, "loss": 0.7014, "step": 2295 }, { "epoch": 0.09, "learning_rate": 1.9651879744729786e-06, "loss": 0.7192, "step": 2300 }, { "epoch": 0.09, "learning_rate": 1.9650191981518317e-06, "loss": 0.65, "step": 2305 }, { "epoch": 0.09, "learning_rate": 1.964850020970102e-06, "loss": 0.7128, "step": 2310 }, { "epoch": 0.09, "learning_rate": 1.9646804429980644e-06, "loss": 0.7265, "step": 2315 }, { "epoch": 0.09, "learning_rate": 1.96451046430616e-06, "loss": 0.7038, "step": 2320 }, { "epoch": 0.09, "learning_rate": 1.964340084964997e-06, "loss": 0.7132, "step": 2325 }, { "epoch": 0.09, "learning_rate": 1.964169305045348e-06, "loss": 0.6968, "step": 2330 }, { "epoch": 0.09, "learning_rate": 1.9639981246181555e-06, "loss": 0.7456, "step": 2335 }, { "epoch": 0.1, "learning_rate": 1.9638265437545247e-06, "loss": 0.7403, "step": 2340 }, { "epoch": 0.1, "learning_rate": 1.9636545625257297e-06, "loss": 0.6893, "step": 2345 }, { "epoch": 0.1, "learning_rate": 1.96348218100321e-06, "loss": 0.7341, "step": 2350 }, { "epoch": 0.1, "learning_rate": 1.9633093992585706e-06, "loss": 0.7459, "step": 2355 }, { "epoch": 0.1, "learning_rate": 1.963136217363585e-06, "loss": 0.716, "step": 2360 }, { "epoch": 0.1, "learning_rate": 1.9629626353901897e-06, "loss": 0.7086, "step": 2365 }, { "epoch": 0.1, "learning_rate": 1.9627886534104903e-06, "loss": 0.7041, "step": 2370 }, { "epoch": 0.1, "learning_rate": 1.962614271496757e-06, "loss": 0.7094, "step": 2375 }, { "epoch": 0.1, "learning_rate": 1.962439489721427e-06, "loss": 0.6794, "step": 2380 }, { "epoch": 0.1, "learning_rate": 1.962264308157102e-06, "loss": 0.6751, "step": 2385 }, { "epoch": 0.1, "learning_rate": 1.9620887268765523e-06, "loss": 0.7398, "step": 2390 }, { "epoch": 0.1, "learning_rate": 1.961912745952712e-06, "loss": 0.6622, "step": 2395 }, { "epoch": 0.1, "learning_rate": 1.961736365458682e-06, "loss": 0.7404, "step": 2400 }, { "epoch": 0.1, "eval_loss": 0.6729753613471985, "eval_runtime": 140.5864, "eval_samples_per_second": 16.83, "eval_steps_per_second": 2.81, "step": 2400 }, { "epoch": 0.1, "learning_rate": 1.96155958546773e-06, "loss": 0.6928, "step": 2405 }, { "epoch": 0.1, "learning_rate": 1.961382406053288e-06, "loss": 0.6832, "step": 2410 }, { "epoch": 0.1, "learning_rate": 1.961204827288955e-06, "loss": 0.6775, "step": 2415 }, { "epoch": 0.1, "learning_rate": 1.961026849248496e-06, "loss": 0.6995, "step": 2420 }, { "epoch": 0.1, "learning_rate": 1.9608484720058416e-06, "loss": 0.6508, "step": 2425 }, { "epoch": 0.1, "learning_rate": 1.960669695635087e-06, "loss": 0.6865, "step": 2430 }, { "epoch": 0.1, "learning_rate": 1.960490520210496e-06, "loss": 0.7273, "step": 2435 }, { "epoch": 0.1, "learning_rate": 1.9603109458064955e-06, "loss": 0.6513, "step": 2440 }, { "epoch": 0.1, "learning_rate": 1.9601309724976795e-06, "loss": 0.7051, "step": 2445 }, { "epoch": 0.1, "learning_rate": 1.9599506003588068e-06, "loss": 0.7164, "step": 2450 }, { "epoch": 0.1, "learning_rate": 1.9597698294648034e-06, "loss": 0.7269, "step": 2455 }, { "epoch": 0.1, "learning_rate": 1.959588659890759e-06, "loss": 0.7082, "step": 2460 }, { "epoch": 0.1, "learning_rate": 1.9594070917119306e-06, "loss": 0.7164, "step": 2465 }, { "epoch": 0.1, "learning_rate": 1.9592251250037394e-06, "loss": 0.7156, "step": 2470 }, { "epoch": 0.1, "learning_rate": 1.9590427598417733e-06, "loss": 0.6603, "step": 2475 }, { "epoch": 0.1, "learning_rate": 1.958859996301785e-06, "loss": 0.6811, "step": 2480 }, { "epoch": 0.1, "learning_rate": 1.958676834459693e-06, "loss": 0.701, "step": 2485 }, { "epoch": 0.1, "learning_rate": 1.9584932743915807e-06, "loss": 0.6546, "step": 2490 }, { "epoch": 0.1, "learning_rate": 1.9583093161736975e-06, "loss": 0.7022, "step": 2495 }, { "epoch": 0.1, "learning_rate": 1.9581249598824588e-06, "loss": 0.7066, "step": 2500 }, { "epoch": 0.1, "learning_rate": 1.957940205594444e-06, "loss": 0.6942, "step": 2505 }, { "epoch": 0.1, "learning_rate": 1.957755053386398e-06, "loss": 0.6983, "step": 2510 }, { "epoch": 0.1, "learning_rate": 1.957569503335232e-06, "loss": 0.7491, "step": 2515 }, { "epoch": 0.1, "learning_rate": 1.957383555518022e-06, "loss": 0.6916, "step": 2520 }, { "epoch": 0.1, "learning_rate": 1.9571972100120087e-06, "loss": 0.7035, "step": 2525 }, { "epoch": 0.1, "learning_rate": 1.9570104668945986e-06, "loss": 0.6987, "step": 2530 }, { "epoch": 0.1, "learning_rate": 1.956823326243363e-06, "loss": 0.7035, "step": 2535 }, { "epoch": 0.1, "learning_rate": 1.956635788136039e-06, "loss": 0.6854, "step": 2540 }, { "epoch": 0.1, "learning_rate": 1.9564478526505276e-06, "loss": 0.7194, "step": 2545 }, { "epoch": 0.1, "learning_rate": 1.9562595198648956e-06, "loss": 0.7026, "step": 2550 }, { "epoch": 0.1, "learning_rate": 1.9560707898573757e-06, "loss": 0.6887, "step": 2555 }, { "epoch": 0.1, "learning_rate": 1.9558816627063636e-06, "loss": 0.7127, "step": 2560 }, { "epoch": 0.1, "learning_rate": 1.9556921384904217e-06, "loss": 0.7126, "step": 2565 }, { "epoch": 0.1, "learning_rate": 1.9555022172882767e-06, "loss": 0.671, "step": 2570 }, { "epoch": 0.1, "learning_rate": 1.9553118991788203e-06, "loss": 0.7004, "step": 2575 }, { "epoch": 0.1, "learning_rate": 1.9551211842411083e-06, "loss": 0.7303, "step": 2580 }, { "epoch": 0.1, "learning_rate": 1.9549300725543627e-06, "loss": 0.7571, "step": 2585 }, { "epoch": 0.11, "learning_rate": 1.9547385641979696e-06, "loss": 0.6688, "step": 2590 }, { "epoch": 0.11, "learning_rate": 1.9545466592514795e-06, "loss": 0.7083, "step": 2595 }, { "epoch": 0.11, "learning_rate": 1.9543543577946086e-06, "loss": 0.7426, "step": 2600 }, { "epoch": 0.11, "learning_rate": 1.9541616599072365e-06, "loss": 0.6722, "step": 2605 }, { "epoch": 0.11, "learning_rate": 1.953968565669409e-06, "loss": 0.7215, "step": 2610 }, { "epoch": 0.11, "learning_rate": 1.9537750751613354e-06, "loss": 0.7354, "step": 2615 }, { "epoch": 0.11, "learning_rate": 1.95358118846339e-06, "loss": 0.7253, "step": 2620 }, { "epoch": 0.11, "learning_rate": 1.9533869056561113e-06, "loss": 0.6877, "step": 2625 }, { "epoch": 0.11, "learning_rate": 1.9531922268202033e-06, "loss": 0.72, "step": 2630 }, { "epoch": 0.11, "learning_rate": 1.952997152036534e-06, "loss": 0.6894, "step": 2635 }, { "epoch": 0.11, "learning_rate": 1.952801681386135e-06, "loss": 0.69, "step": 2640 }, { "epoch": 0.11, "learning_rate": 1.9526058149502035e-06, "loss": 0.689, "step": 2645 }, { "epoch": 0.11, "learning_rate": 1.952409552810101e-06, "loss": 0.7495, "step": 2650 }, { "epoch": 0.11, "learning_rate": 1.9522128950473525e-06, "loss": 0.7378, "step": 2655 }, { "epoch": 0.11, "learning_rate": 1.9520158417436486e-06, "loss": 0.7041, "step": 2660 }, { "epoch": 0.11, "learning_rate": 1.9518183929808433e-06, "loss": 0.711, "step": 2665 }, { "epoch": 0.11, "learning_rate": 1.9516205488409543e-06, "loss": 0.6908, "step": 2670 }, { "epoch": 0.11, "learning_rate": 1.9514223094061654e-06, "loss": 0.7107, "step": 2675 }, { "epoch": 0.11, "learning_rate": 1.951223674758823e-06, "loss": 0.7225, "step": 2680 }, { "epoch": 0.11, "learning_rate": 1.9510246449814385e-06, "loss": 0.7217, "step": 2685 }, { "epoch": 0.11, "learning_rate": 1.950825220156687e-06, "loss": 0.7274, "step": 2690 }, { "epoch": 0.11, "learning_rate": 1.950625400367408e-06, "loss": 0.6867, "step": 2695 }, { "epoch": 0.11, "learning_rate": 1.9504251856966043e-06, "loss": 0.7186, "step": 2700 }, { "epoch": 0.11, "learning_rate": 1.950224576227444e-06, "loss": 0.6889, "step": 2705 }, { "epoch": 0.11, "learning_rate": 1.9500235720432583e-06, "loss": 0.7005, "step": 2710 }, { "epoch": 0.11, "learning_rate": 1.9498221732275425e-06, "loss": 0.6874, "step": 2715 }, { "epoch": 0.11, "learning_rate": 1.9496203798639565e-06, "loss": 0.7033, "step": 2720 }, { "epoch": 0.11, "learning_rate": 1.9494181920363223e-06, "loss": 0.707, "step": 2725 }, { "epoch": 0.11, "learning_rate": 1.9492156098286283e-06, "loss": 0.7348, "step": 2730 }, { "epoch": 0.11, "learning_rate": 1.949012633325024e-06, "loss": 0.6636, "step": 2735 }, { "epoch": 0.11, "learning_rate": 1.9488092626098254e-06, "loss": 0.6975, "step": 2740 }, { "epoch": 0.11, "learning_rate": 1.9486054977675103e-06, "loss": 0.7124, "step": 2745 }, { "epoch": 0.11, "learning_rate": 1.9484013388827204e-06, "loss": 0.663, "step": 2750 }, { "epoch": 0.11, "learning_rate": 1.9481967860402627e-06, "loss": 0.6916, "step": 2755 }, { "epoch": 0.11, "learning_rate": 1.9479918393251056e-06, "loss": 0.7344, "step": 2760 }, { "epoch": 0.11, "learning_rate": 1.9477864988223828e-06, "loss": 0.6752, "step": 2765 }, { "epoch": 0.11, "learning_rate": 1.9475807646173908e-06, "loss": 0.6885, "step": 2770 }, { "epoch": 0.11, "learning_rate": 1.9473746367955893e-06, "loss": 0.6602, "step": 2775 }, { "epoch": 0.11, "learning_rate": 1.947168115442603e-06, "loss": 0.6764, "step": 2780 }, { "epoch": 0.11, "learning_rate": 1.946961200644218e-06, "loss": 0.6784, "step": 2785 }, { "epoch": 0.11, "learning_rate": 1.946753892486386e-06, "loss": 0.7325, "step": 2790 }, { "epoch": 0.11, "learning_rate": 1.94654619105522e-06, "loss": 0.6533, "step": 2795 }, { "epoch": 0.11, "learning_rate": 1.946338096436998e-06, "loss": 0.6741, "step": 2800 }, { "epoch": 0.11, "learning_rate": 1.9461296087181604e-06, "loss": 0.7046, "step": 2805 }, { "epoch": 0.11, "learning_rate": 1.9459207279853113e-06, "loss": 0.6797, "step": 2810 }, { "epoch": 0.11, "learning_rate": 1.945711454325218e-06, "loss": 0.7096, "step": 2815 }, { "epoch": 0.11, "learning_rate": 1.9455017878248107e-06, "loss": 0.698, "step": 2820 }, { "epoch": 0.11, "learning_rate": 1.9452917285711834e-06, "loss": 0.7036, "step": 2825 }, { "epoch": 0.11, "learning_rate": 1.945081276651593e-06, "loss": 0.7205, "step": 2830 }, { "epoch": 0.12, "learning_rate": 1.9448704321534588e-06, "loss": 0.6583, "step": 2835 }, { "epoch": 0.12, "learning_rate": 1.944659195164364e-06, "loss": 0.6757, "step": 2840 }, { "epoch": 0.12, "learning_rate": 1.9444475657720545e-06, "loss": 0.7033, "step": 2845 }, { "epoch": 0.12, "learning_rate": 1.9442355440644394e-06, "loss": 0.7029, "step": 2850 }, { "epoch": 0.12, "learning_rate": 1.944023130129591e-06, "loss": 0.6993, "step": 2855 }, { "epoch": 0.12, "learning_rate": 1.9438103240557446e-06, "loss": 0.704, "step": 2860 }, { "epoch": 0.12, "learning_rate": 1.9435971259312966e-06, "loss": 0.7104, "step": 2865 }, { "epoch": 0.12, "learning_rate": 1.9433835358448086e-06, "loss": 0.7017, "step": 2870 }, { "epoch": 0.12, "learning_rate": 1.943169553885004e-06, "loss": 0.7107, "step": 2875 }, { "epoch": 0.12, "learning_rate": 1.9429551801407687e-06, "loss": 0.6939, "step": 2880 }, { "epoch": 0.12, "learning_rate": 1.942740414701152e-06, "loss": 0.6737, "step": 2885 }, { "epoch": 0.12, "learning_rate": 1.9425252576553656e-06, "loss": 0.7174, "step": 2890 }, { "epoch": 0.12, "learning_rate": 1.942309709092784e-06, "loss": 0.7147, "step": 2895 }, { "epoch": 0.12, "learning_rate": 1.9420937691029435e-06, "loss": 0.7351, "step": 2900 }, { "epoch": 0.12, "learning_rate": 1.9418774377755444e-06, "loss": 0.6845, "step": 2905 }, { "epoch": 0.12, "learning_rate": 1.9416607152004485e-06, "loss": 0.7156, "step": 2910 }, { "epoch": 0.12, "learning_rate": 1.9414436014676806e-06, "loss": 0.7532, "step": 2915 }, { "epoch": 0.12, "learning_rate": 1.941226096667428e-06, "loss": 0.7421, "step": 2920 }, { "epoch": 0.12, "learning_rate": 1.94100820089004e-06, "loss": 0.7049, "step": 2925 }, { "epoch": 0.12, "learning_rate": 1.940789914226029e-06, "loss": 0.7652, "step": 2930 }, { "epoch": 0.12, "learning_rate": 1.940571236766069e-06, "loss": 0.6884, "step": 2935 }, { "epoch": 0.12, "learning_rate": 1.9403521686009964e-06, "loss": 0.6974, "step": 2940 }, { "epoch": 0.12, "learning_rate": 1.940132709821811e-06, "loss": 0.7362, "step": 2945 }, { "epoch": 0.12, "learning_rate": 1.9399128605196737e-06, "loss": 0.7167, "step": 2950 }, { "epoch": 0.12, "learning_rate": 1.9396926207859082e-06, "loss": 0.6806, "step": 2955 }, { "epoch": 0.12, "learning_rate": 1.939471990712e-06, "loss": 0.7069, "step": 2960 }, { "epoch": 0.12, "learning_rate": 1.939250970389597e-06, "loss": 0.7185, "step": 2965 }, { "epoch": 0.12, "learning_rate": 1.9390295599105085e-06, "loss": 0.6996, "step": 2970 }, { "epoch": 0.12, "learning_rate": 1.9388077593667075e-06, "loss": 0.7304, "step": 2975 }, { "epoch": 0.12, "learning_rate": 1.9385855688503276e-06, "loss": 0.7209, "step": 2980 }, { "epoch": 0.12, "learning_rate": 1.9383629884536644e-06, "loss": 0.7077, "step": 2985 }, { "epoch": 0.12, "learning_rate": 1.938140018269176e-06, "loss": 0.6838, "step": 2990 }, { "epoch": 0.12, "learning_rate": 1.937916658389483e-06, "loss": 0.6784, "step": 2995 }, { "epoch": 0.12, "learning_rate": 1.9376929089073665e-06, "loss": 0.7185, "step": 3000 }, { "epoch": 0.12, "learning_rate": 1.9374687699157703e-06, "loss": 0.7146, "step": 3005 }, { "epoch": 0.12, "learning_rate": 1.9372442415077994e-06, "loss": 0.7131, "step": 3010 }, { "epoch": 0.12, "learning_rate": 1.9370193237767213e-06, "loss": 0.7014, "step": 3015 }, { "epoch": 0.12, "learning_rate": 1.9367940168159648e-06, "loss": 0.697, "step": 3020 }, { "epoch": 0.12, "learning_rate": 1.9365683207191205e-06, "loss": 0.7009, "step": 3025 }, { "epoch": 0.12, "learning_rate": 1.9363422355799406e-06, "loss": 0.7125, "step": 3030 }, { "epoch": 0.12, "learning_rate": 1.936115761492339e-06, "loss": 0.6954, "step": 3035 }, { "epoch": 0.12, "learning_rate": 1.935888898550391e-06, "loss": 0.7465, "step": 3040 }, { "epoch": 0.12, "learning_rate": 1.935661646848333e-06, "loss": 0.7253, "step": 3045 }, { "epoch": 0.12, "learning_rate": 1.935434006480564e-06, "loss": 0.7223, "step": 3050 }, { "epoch": 0.12, "learning_rate": 1.935205977541644e-06, "loss": 0.7141, "step": 3055 }, { "epoch": 0.12, "learning_rate": 1.9349775601262935e-06, "loss": 0.732, "step": 3060 }, { "epoch": 0.12, "learning_rate": 1.9347487543293958e-06, "loss": 0.709, "step": 3065 }, { "epoch": 0.12, "learning_rate": 1.934519560245994e-06, "loss": 0.724, "step": 3070 }, { "epoch": 0.12, "learning_rate": 1.9342899779712946e-06, "loss": 0.6998, "step": 3075 }, { "epoch": 0.13, "learning_rate": 1.934060007600663e-06, "loss": 0.6945, "step": 3080 }, { "epoch": 0.13, "learning_rate": 1.9338296492296267e-06, "loss": 0.7068, "step": 3085 }, { "epoch": 0.13, "learning_rate": 1.9335989029538756e-06, "loss": 0.691, "step": 3090 }, { "epoch": 0.13, "learning_rate": 1.9333677688692595e-06, "loss": 0.7063, "step": 3095 }, { "epoch": 0.13, "learning_rate": 1.9331362470717886e-06, "loss": 0.7518, "step": 3100 }, { "epoch": 0.13, "learning_rate": 1.9329043376576357e-06, "loss": 0.6806, "step": 3105 }, { "epoch": 0.13, "learning_rate": 1.932672040723134e-06, "loss": 0.6878, "step": 3110 }, { "epoch": 0.13, "learning_rate": 1.9324393563647772e-06, "loss": 0.6961, "step": 3115 }, { "epoch": 0.13, "learning_rate": 1.932206284679221e-06, "loss": 0.7125, "step": 3120 }, { "epoch": 0.13, "learning_rate": 1.931972825763281e-06, "loss": 0.6893, "step": 3125 }, { "epoch": 0.13, "learning_rate": 1.931738979713934e-06, "loss": 0.6821, "step": 3130 }, { "epoch": 0.13, "learning_rate": 1.9315047466283177e-06, "loss": 0.735, "step": 3135 }, { "epoch": 0.13, "learning_rate": 1.9312701266037302e-06, "loss": 0.6873, "step": 3140 }, { "epoch": 0.13, "learning_rate": 1.931035119737631e-06, "loss": 0.6853, "step": 3145 }, { "epoch": 0.13, "learning_rate": 1.9307997261276393e-06, "loss": 0.7256, "step": 3150 }, { "epoch": 0.13, "learning_rate": 1.9305639458715365e-06, "loss": 0.7297, "step": 3155 }, { "epoch": 0.13, "learning_rate": 1.930327779067263e-06, "loss": 0.7162, "step": 3160 }, { "epoch": 0.13, "learning_rate": 1.9300912258129206e-06, "loss": 0.7133, "step": 3165 }, { "epoch": 0.13, "learning_rate": 1.9298542862067712e-06, "loss": 0.7255, "step": 3170 }, { "epoch": 0.13, "learning_rate": 1.9296169603472384e-06, "loss": 0.6895, "step": 3175 }, { "epoch": 0.13, "learning_rate": 1.929379248332904e-06, "loss": 0.729, "step": 3180 }, { "epoch": 0.13, "learning_rate": 1.9291411502625123e-06, "loss": 0.6971, "step": 3185 }, { "epoch": 0.13, "learning_rate": 1.928902666234967e-06, "loss": 0.7232, "step": 3190 }, { "epoch": 0.13, "learning_rate": 1.9286637963493323e-06, "loss": 0.6653, "step": 3195 }, { "epoch": 0.13, "learning_rate": 1.9284245407048323e-06, "loss": 0.7309, "step": 3200 }, { "epoch": 0.13, "eval_loss": 0.6701433062553406, "eval_runtime": 140.1425, "eval_samples_per_second": 16.883, "eval_steps_per_second": 2.819, "step": 3200 }, { "epoch": 0.13, "learning_rate": 1.928184899400853e-06, "loss": 0.7025, "step": 3205 }, { "epoch": 0.13, "learning_rate": 1.9279448725369375e-06, "loss": 0.711, "step": 3210 }, { "epoch": 0.13, "learning_rate": 1.927704460212792e-06, "loss": 0.6813, "step": 3215 }, { "epoch": 0.13, "learning_rate": 1.9274636625282816e-06, "loss": 0.7209, "step": 3220 }, { "epoch": 0.13, "learning_rate": 1.927222479583431e-06, "loss": 0.7279, "step": 3225 }, { "epoch": 0.13, "learning_rate": 1.9269809114784265e-06, "loss": 0.6779, "step": 3230 }, { "epoch": 0.13, "learning_rate": 1.926738958313612e-06, "loss": 0.7237, "step": 3235 }, { "epoch": 0.13, "learning_rate": 1.9264966201894945e-06, "loss": 0.7123, "step": 3240 }, { "epoch": 0.13, "learning_rate": 1.9262538972067375e-06, "loss": 0.7147, "step": 3245 }, { "epoch": 0.13, "learning_rate": 1.9260107894661666e-06, "loss": 0.7104, "step": 3250 }, { "epoch": 0.13, "learning_rate": 1.9257672970687673e-06, "loss": 0.7059, "step": 3255 }, { "epoch": 0.13, "learning_rate": 1.9255234201156834e-06, "loss": 0.7259, "step": 3260 }, { "epoch": 0.13, "learning_rate": 1.9252791587082195e-06, "loss": 0.6778, "step": 3265 }, { "epoch": 0.13, "learning_rate": 1.9250345129478396e-06, "loss": 0.7456, "step": 3270 }, { "epoch": 0.13, "learning_rate": 1.924789482936168e-06, "loss": 0.7254, "step": 3275 }, { "epoch": 0.13, "learning_rate": 1.924544068774987e-06, "loss": 0.732, "step": 3280 }, { "epoch": 0.13, "learning_rate": 1.92429827056624e-06, "loss": 0.6824, "step": 3285 }, { "epoch": 0.13, "learning_rate": 1.9240520884120296e-06, "loss": 0.6807, "step": 3290 }, { "epoch": 0.13, "learning_rate": 1.923805522414618e-06, "loss": 0.6679, "step": 3295 }, { "epoch": 0.13, "learning_rate": 1.923558572676426e-06, "loss": 0.6813, "step": 3300 }, { "epoch": 0.13, "learning_rate": 1.9233112393000344e-06, "loss": 0.6791, "step": 3305 }, { "epoch": 0.13, "learning_rate": 1.9230635223881836e-06, "loss": 0.6877, "step": 3310 }, { "epoch": 0.13, "learning_rate": 1.9228154220437733e-06, "loss": 0.7023, "step": 3315 }, { "epoch": 0.13, "learning_rate": 1.922566938369861e-06, "loss": 0.7001, "step": 3320 }, { "epoch": 0.14, "learning_rate": 1.9223180714696664e-06, "loss": 0.691, "step": 3325 }, { "epoch": 0.14, "learning_rate": 1.922068821446565e-06, "loss": 0.7149, "step": 3330 }, { "epoch": 0.14, "learning_rate": 1.9218191884040945e-06, "loss": 0.6826, "step": 3335 }, { "epoch": 0.14, "learning_rate": 1.9215691724459496e-06, "loss": 0.7069, "step": 3340 }, { "epoch": 0.14, "learning_rate": 1.9213187736759848e-06, "loss": 0.7044, "step": 3345 }, { "epoch": 0.14, "learning_rate": 1.9210679921982134e-06, "loss": 0.7026, "step": 3350 }, { "epoch": 0.14, "learning_rate": 1.9208168281168083e-06, "loss": 0.7151, "step": 3355 }, { "epoch": 0.14, "learning_rate": 1.9205652815361003e-06, "loss": 0.7147, "step": 3360 }, { "epoch": 0.14, "learning_rate": 1.92031335256058e-06, "loss": 0.6891, "step": 3365 }, { "epoch": 0.14, "learning_rate": 1.9200610412948967e-06, "loss": 0.7246, "step": 3370 }, { "epoch": 0.14, "learning_rate": 1.9198083478438584e-06, "loss": 0.6832, "step": 3375 }, { "epoch": 0.14, "learning_rate": 1.919555272312431e-06, "loss": 0.6708, "step": 3380 }, { "epoch": 0.14, "learning_rate": 1.91930181480574e-06, "loss": 0.7217, "step": 3385 }, { "epoch": 0.14, "learning_rate": 1.9190479754290703e-06, "loss": 0.7272, "step": 3390 }, { "epoch": 0.14, "learning_rate": 1.918793754287864e-06, "loss": 0.6629, "step": 3395 }, { "epoch": 0.14, "learning_rate": 1.918539151487722e-06, "loss": 0.6633, "step": 3400 }, { "epoch": 0.14, "learning_rate": 1.9182841671344053e-06, "loss": 0.7085, "step": 3405 }, { "epoch": 0.14, "learning_rate": 1.918028801333831e-06, "loss": 0.7321, "step": 3410 }, { "epoch": 0.14, "learning_rate": 1.9177730541920757e-06, "loss": 0.7141, "step": 3415 }, { "epoch": 0.14, "learning_rate": 1.9175169258153752e-06, "loss": 0.6976, "step": 3420 }, { "epoch": 0.14, "learning_rate": 1.9172604163101227e-06, "loss": 0.7034, "step": 3425 }, { "epoch": 0.14, "learning_rate": 1.9170035257828706e-06, "loss": 0.6478, "step": 3430 }, { "epoch": 0.14, "learning_rate": 1.9167462543403286e-06, "loss": 0.6949, "step": 3435 }, { "epoch": 0.14, "learning_rate": 1.9164886020893647e-06, "loss": 0.7224, "step": 3440 }, { "epoch": 0.14, "learning_rate": 1.9162305691370057e-06, "loss": 0.7016, "step": 3445 }, { "epoch": 0.14, "learning_rate": 1.9159721555904364e-06, "loss": 0.7065, "step": 3450 }, { "epoch": 0.14, "learning_rate": 1.9157133615569993e-06, "loss": 0.6808, "step": 3455 }, { "epoch": 0.14, "learning_rate": 1.9154541871441947e-06, "loss": 0.7026, "step": 3460 }, { "epoch": 0.14, "learning_rate": 1.9151946324596826e-06, "loss": 0.7035, "step": 3465 }, { "epoch": 0.14, "learning_rate": 1.9149346976112787e-06, "loss": 0.6941, "step": 3470 }, { "epoch": 0.14, "learning_rate": 1.9146743827069584e-06, "loss": 0.6988, "step": 3475 }, { "epoch": 0.14, "learning_rate": 1.9144136878548536e-06, "loss": 0.6812, "step": 3480 }, { "epoch": 0.14, "learning_rate": 1.914152613163255e-06, "loss": 0.6976, "step": 3485 }, { "epoch": 0.14, "learning_rate": 1.913891158740611e-06, "loss": 0.7263, "step": 3490 }, { "epoch": 0.14, "learning_rate": 1.913629324695527e-06, "loss": 0.6872, "step": 3495 }, { "epoch": 0.14, "learning_rate": 1.913367111136767e-06, "loss": 0.6588, "step": 3500 }, { "epoch": 0.14, "learning_rate": 1.913104518173252e-06, "loss": 0.6679, "step": 3505 }, { "epoch": 0.14, "learning_rate": 1.912841545914061e-06, "loss": 0.7026, "step": 3510 }, { "epoch": 0.14, "learning_rate": 1.9125781944684304e-06, "loss": 0.6763, "step": 3515 }, { "epoch": 0.14, "learning_rate": 1.9123144639457537e-06, "loss": 0.7106, "step": 3520 }, { "epoch": 0.14, "learning_rate": 1.9120503544555826e-06, "loss": 0.7148, "step": 3525 }, { "epoch": 0.14, "learning_rate": 1.911785866107626e-06, "loss": 0.7029, "step": 3530 }, { "epoch": 0.14, "learning_rate": 1.9115209990117495e-06, "loss": 0.7071, "step": 3535 }, { "epoch": 0.14, "learning_rate": 1.9112557532779773e-06, "loss": 0.6967, "step": 3540 }, { "epoch": 0.14, "learning_rate": 1.9109901290164897e-06, "loss": 0.6872, "step": 3545 }, { "epoch": 0.14, "learning_rate": 1.9107241263376255e-06, "loss": 0.6732, "step": 3550 }, { "epoch": 0.14, "learning_rate": 1.9104577453518783e-06, "loss": 0.7015, "step": 3555 }, { "epoch": 0.14, "learning_rate": 1.910190986169902e-06, "loss": 0.6894, "step": 3560 }, { "epoch": 0.14, "learning_rate": 1.9099238489025054e-06, "loss": 0.6748, "step": 3565 }, { "epoch": 0.15, "learning_rate": 1.909656333660655e-06, "loss": 0.6691, "step": 3570 }, { "epoch": 0.15, "learning_rate": 1.909388440555474e-06, "loss": 0.6763, "step": 3575 }, { "epoch": 0.15, "learning_rate": 1.909120169698244e-06, "loss": 0.7212, "step": 3580 }, { "epoch": 0.15, "learning_rate": 1.9088515212004006e-06, "loss": 0.6999, "step": 3585 }, { "epoch": 0.15, "learning_rate": 1.90858249517354e-06, "loss": 0.7089, "step": 3590 }, { "epoch": 0.15, "learning_rate": 1.9083130917294116e-06, "loss": 0.6819, "step": 3595 }, { "epoch": 0.15, "learning_rate": 1.9080433109799243e-06, "loss": 0.6902, "step": 3600 }, { "epoch": 0.15, "learning_rate": 1.9077731530371425e-06, "loss": 0.6825, "step": 3605 }, { "epoch": 0.15, "learning_rate": 1.9075026180132873e-06, "loss": 0.7008, "step": 3610 }, { "epoch": 0.15, "learning_rate": 1.9072317060207364e-06, "loss": 0.6789, "step": 3615 }, { "epoch": 0.15, "learning_rate": 1.9069604171720243e-06, "loss": 0.7221, "step": 3620 }, { "epoch": 0.15, "learning_rate": 1.9066887515798426e-06, "loss": 0.6761, "step": 3625 }, { "epoch": 0.15, "learning_rate": 1.9064167093570382e-06, "loss": 0.7134, "step": 3630 }, { "epoch": 0.15, "learning_rate": 1.9061442906166154e-06, "loss": 0.6633, "step": 3635 }, { "epoch": 0.15, "learning_rate": 1.9058714954717345e-06, "loss": 0.6869, "step": 3640 }, { "epoch": 0.15, "learning_rate": 1.9055983240357123e-06, "loss": 0.6967, "step": 3645 }, { "epoch": 0.15, "learning_rate": 1.9053247764220218e-06, "loss": 0.7272, "step": 3650 }, { "epoch": 0.15, "learning_rate": 1.905050852744292e-06, "loss": 0.6696, "step": 3655 }, { "epoch": 0.15, "learning_rate": 1.904776553116309e-06, "loss": 0.6606, "step": 3660 }, { "epoch": 0.15, "learning_rate": 1.9045018776520138e-06, "loss": 0.7055, "step": 3665 }, { "epoch": 0.15, "learning_rate": 1.9042268264655048e-06, "loss": 0.6879, "step": 3670 }, { "epoch": 0.15, "learning_rate": 1.9039513996710357e-06, "loss": 0.7322, "step": 3675 }, { "epoch": 0.15, "learning_rate": 1.903675597383016e-06, "loss": 0.7115, "step": 3680 }, { "epoch": 0.15, "learning_rate": 1.9033994197160124e-06, "loss": 0.6892, "step": 3685 }, { "epoch": 0.15, "learning_rate": 1.903122866784746e-06, "loss": 0.7278, "step": 3690 }, { "epoch": 0.15, "learning_rate": 1.9028459387040944e-06, "loss": 0.6844, "step": 3695 }, { "epoch": 0.15, "learning_rate": 1.9025686355890916e-06, "loss": 0.7143, "step": 3700 }, { "epoch": 0.15, "learning_rate": 1.9022909575549265e-06, "loss": 0.7161, "step": 3705 }, { "epoch": 0.15, "learning_rate": 1.9020129047169443e-06, "loss": 0.7013, "step": 3710 }, { "epoch": 0.15, "learning_rate": 1.9017344771906463e-06, "loss": 0.6588, "step": 3715 }, { "epoch": 0.15, "learning_rate": 1.9014556750916879e-06, "loss": 0.6451, "step": 3720 }, { "epoch": 0.15, "learning_rate": 1.9011764985358817e-06, "loss": 0.6991, "step": 3725 }, { "epoch": 0.15, "learning_rate": 1.900896947639195e-06, "loss": 0.6901, "step": 3730 }, { "epoch": 0.15, "learning_rate": 1.9006170225177508e-06, "loss": 0.7236, "step": 3735 }, { "epoch": 0.15, "learning_rate": 1.9003367232878273e-06, "loss": 0.6827, "step": 3740 }, { "epoch": 0.15, "learning_rate": 1.9000560500658592e-06, "loss": 0.6791, "step": 3745 }, { "epoch": 0.15, "learning_rate": 1.8997750029684347e-06, "loss": 0.6822, "step": 3750 }, { "epoch": 0.15, "learning_rate": 1.899493582112299e-06, "loss": 0.7139, "step": 3755 }, { "epoch": 0.15, "learning_rate": 1.8992117876143516e-06, "loss": 0.6901, "step": 3760 }, { "epoch": 0.15, "learning_rate": 1.8989296195916476e-06, "loss": 0.7012, "step": 3765 }, { "epoch": 0.15, "learning_rate": 1.8986470781613973e-06, "loss": 0.7305, "step": 3770 }, { "epoch": 0.15, "learning_rate": 1.8983641634409656e-06, "loss": 0.6812, "step": 3775 }, { "epoch": 0.15, "learning_rate": 1.8980808755478726e-06, "loss": 0.6781, "step": 3780 }, { "epoch": 0.15, "learning_rate": 1.8977972145997945e-06, "loss": 0.7271, "step": 3785 }, { "epoch": 0.15, "learning_rate": 1.897513180714561e-06, "loss": 0.7162, "step": 3790 }, { "epoch": 0.15, "learning_rate": 1.8972287740101572e-06, "loss": 0.6669, "step": 3795 }, { "epoch": 0.15, "learning_rate": 1.8969439946047232e-06, "loss": 0.7358, "step": 3800 }, { "epoch": 0.15, "learning_rate": 1.8966588426165544e-06, "loss": 0.7315, "step": 3805 }, { "epoch": 0.15, "learning_rate": 1.8963733181640999e-06, "loss": 0.7245, "step": 3810 }, { "epoch": 0.15, "learning_rate": 1.8960874213659643e-06, "loss": 0.7233, "step": 3815 }, { "epoch": 0.16, "learning_rate": 1.8958011523409067e-06, "loss": 0.7128, "step": 3820 }, { "epoch": 0.16, "learning_rate": 1.8955145112078408e-06, "loss": 0.6579, "step": 3825 }, { "epoch": 0.16, "learning_rate": 1.8952274980858344e-06, "loss": 0.7155, "step": 3830 }, { "epoch": 0.16, "learning_rate": 1.8949401130941109e-06, "loss": 0.6825, "step": 3835 }, { "epoch": 0.16, "learning_rate": 1.894652356352047e-06, "loss": 0.6838, "step": 3840 }, { "epoch": 0.16, "learning_rate": 1.8943642279791747e-06, "loss": 0.7339, "step": 3845 }, { "epoch": 0.16, "learning_rate": 1.8940757280951799e-06, "loss": 0.7334, "step": 3850 }, { "epoch": 0.16, "learning_rate": 1.8937868568199026e-06, "loss": 0.7113, "step": 3855 }, { "epoch": 0.16, "learning_rate": 1.893497614273338e-06, "loss": 0.7085, "step": 3860 }, { "epoch": 0.16, "learning_rate": 1.8932080005756346e-06, "loss": 0.7179, "step": 3865 }, { "epoch": 0.16, "learning_rate": 1.8929180158470953e-06, "loss": 0.6847, "step": 3870 }, { "epoch": 0.16, "learning_rate": 1.8926276602081777e-06, "loss": 0.7149, "step": 3875 }, { "epoch": 0.16, "learning_rate": 1.8923369337794926e-06, "loss": 0.6732, "step": 3880 }, { "epoch": 0.16, "learning_rate": 1.8920458366818055e-06, "loss": 0.6769, "step": 3885 }, { "epoch": 0.16, "learning_rate": 1.8917543690360351e-06, "loss": 0.6755, "step": 3890 }, { "epoch": 0.16, "learning_rate": 1.8914625309632552e-06, "loss": 0.7257, "step": 3895 }, { "epoch": 0.16, "learning_rate": 1.8911703225846921e-06, "loss": 0.6711, "step": 3900 }, { "epoch": 0.16, "learning_rate": 1.8908777440217274e-06, "loss": 0.6978, "step": 3905 }, { "epoch": 0.16, "learning_rate": 1.8905847953958951e-06, "loss": 0.706, "step": 3910 }, { "epoch": 0.16, "learning_rate": 1.8902914768288837e-06, "loss": 0.7315, "step": 3915 }, { "epoch": 0.16, "learning_rate": 1.8899977884425353e-06, "loss": 0.7385, "step": 3920 }, { "epoch": 0.16, "learning_rate": 1.8897037303588452e-06, "loss": 0.6868, "step": 3925 }, { "epoch": 0.16, "learning_rate": 1.889409302699963e-06, "loss": 0.6924, "step": 3930 }, { "epoch": 0.16, "learning_rate": 1.8891145055881907e-06, "loss": 0.6575, "step": 3935 }, { "epoch": 0.16, "learning_rate": 1.8888193391459853e-06, "loss": 0.7152, "step": 3940 }, { "epoch": 0.16, "learning_rate": 1.8885238034959556e-06, "loss": 0.6853, "step": 3945 }, { "epoch": 0.16, "learning_rate": 1.8882278987608653e-06, "loss": 0.6626, "step": 3950 }, { "epoch": 0.16, "learning_rate": 1.8879316250636302e-06, "loss": 0.6714, "step": 3955 }, { "epoch": 0.16, "learning_rate": 1.8876349825273197e-06, "loss": 0.6994, "step": 3960 }, { "epoch": 0.16, "learning_rate": 1.8873379712751567e-06, "loss": 0.7311, "step": 3965 }, { "epoch": 0.16, "learning_rate": 1.8870405914305173e-06, "loss": 0.7214, "step": 3970 }, { "epoch": 0.16, "learning_rate": 1.88674284311693e-06, "loss": 0.7004, "step": 3975 }, { "epoch": 0.16, "learning_rate": 1.8864447264580776e-06, "loss": 0.6956, "step": 3980 }, { "epoch": 0.16, "learning_rate": 1.8861462415777942e-06, "loss": 0.7378, "step": 3985 }, { "epoch": 0.16, "learning_rate": 1.885847388600069e-06, "loss": 0.7195, "step": 3990 }, { "epoch": 0.16, "learning_rate": 1.8855481676490417e-06, "loss": 0.6948, "step": 3995 }, { "epoch": 0.16, "learning_rate": 1.885248578849007e-06, "loss": 0.7367, "step": 4000 }, { "epoch": 0.16, "eval_loss": 0.666739821434021, "eval_runtime": 138.8501, "eval_samples_per_second": 17.04, "eval_steps_per_second": 2.845, "step": 4000 }, { "epoch": 0.16, "learning_rate": 1.884948622324411e-06, "loss": 0.6942, "step": 4005 }, { "epoch": 0.16, "learning_rate": 1.884648298199853e-06, "loss": 0.6911, "step": 4010 }, { "epoch": 0.16, "learning_rate": 1.8843476066000856e-06, "loss": 0.7338, "step": 4015 }, { "epoch": 0.16, "learning_rate": 1.884046547650013e-06, "loss": 0.6914, "step": 4020 }, { "epoch": 0.16, "learning_rate": 1.8837451214746922e-06, "loss": 0.6998, "step": 4025 }, { "epoch": 0.16, "learning_rate": 1.8834433281993336e-06, "loss": 0.7024, "step": 4030 }, { "epoch": 0.16, "learning_rate": 1.8831411679492992e-06, "loss": 0.6748, "step": 4035 }, { "epoch": 0.16, "learning_rate": 1.882838640850104e-06, "loss": 0.687, "step": 4040 }, { "epoch": 0.16, "learning_rate": 1.8825357470274148e-06, "loss": 0.661, "step": 4045 }, { "epoch": 0.16, "learning_rate": 1.8822324866070512e-06, "loss": 0.7209, "step": 4050 }, { "epoch": 0.16, "learning_rate": 1.8819288597149846e-06, "loss": 0.6597, "step": 4055 }, { "epoch": 0.16, "learning_rate": 1.88162486647734e-06, "loss": 0.6923, "step": 4060 }, { "epoch": 0.17, "learning_rate": 1.8813205070203924e-06, "loss": 0.6875, "step": 4065 }, { "epoch": 0.17, "learning_rate": 1.8810157814705705e-06, "loss": 0.7085, "step": 4070 }, { "epoch": 0.17, "learning_rate": 1.8807106899544547e-06, "loss": 0.6786, "step": 4075 }, { "epoch": 0.17, "learning_rate": 1.8804052325987775e-06, "loss": 0.7078, "step": 4080 }, { "epoch": 0.17, "learning_rate": 1.8800994095304227e-06, "loss": 0.6945, "step": 4085 }, { "epoch": 0.17, "learning_rate": 1.8797932208764273e-06, "loss": 0.7232, "step": 4090 }, { "epoch": 0.17, "learning_rate": 1.8794866667639791e-06, "loss": 0.6876, "step": 4095 }, { "epoch": 0.17, "learning_rate": 1.8791797473204176e-06, "loss": 0.7299, "step": 4100 }, { "epoch": 0.17, "learning_rate": 1.8788724626732347e-06, "loss": 0.6689, "step": 4105 }, { "epoch": 0.17, "learning_rate": 1.878564812950074e-06, "loss": 0.7345, "step": 4110 }, { "epoch": 0.17, "learning_rate": 1.8782567982787302e-06, "loss": 0.7271, "step": 4115 }, { "epoch": 0.17, "learning_rate": 1.8779484187871504e-06, "loss": 0.7328, "step": 4120 }, { "epoch": 0.17, "learning_rate": 1.8776396746034324e-06, "loss": 0.7043, "step": 4125 }, { "epoch": 0.17, "learning_rate": 1.8773305658558258e-06, "loss": 0.6841, "step": 4130 }, { "epoch": 0.17, "learning_rate": 1.8770210926727316e-06, "loss": 0.7154, "step": 4135 }, { "epoch": 0.17, "learning_rate": 1.8767112551827027e-06, "loss": 0.7329, "step": 4140 }, { "epoch": 0.17, "learning_rate": 1.8764010535144426e-06, "loss": 0.7226, "step": 4145 }, { "epoch": 0.17, "learning_rate": 1.8760904877968065e-06, "loss": 0.6958, "step": 4150 }, { "epoch": 0.17, "learning_rate": 1.8757795581588005e-06, "loss": 0.7373, "step": 4155 }, { "epoch": 0.17, "learning_rate": 1.8754682647295822e-06, "loss": 0.7313, "step": 4160 }, { "epoch": 0.17, "learning_rate": 1.87515660763846e-06, "loss": 0.7333, "step": 4165 }, { "epoch": 0.17, "learning_rate": 1.8748445870148941e-06, "loss": 0.6882, "step": 4170 }, { "epoch": 0.17, "learning_rate": 1.8745322029884946e-06, "loss": 0.681, "step": 4175 }, { "epoch": 0.17, "learning_rate": 1.8742194556890233e-06, "loss": 0.7227, "step": 4180 }, { "epoch": 0.17, "learning_rate": 1.8739063452463926e-06, "loss": 0.7047, "step": 4185 }, { "epoch": 0.17, "learning_rate": 1.873592871790666e-06, "loss": 0.6831, "step": 4190 }, { "epoch": 0.17, "learning_rate": 1.8732790354520579e-06, "loss": 0.6887, "step": 4195 }, { "epoch": 0.17, "learning_rate": 1.8729648363609324e-06, "loss": 0.6833, "step": 4200 }, { "epoch": 0.17, "learning_rate": 1.8726502746478058e-06, "loss": 0.6928, "step": 4205 }, { "epoch": 0.17, "learning_rate": 1.872335350443344e-06, "loss": 0.7192, "step": 4210 }, { "epoch": 0.17, "learning_rate": 1.872020063878364e-06, "loss": 0.6792, "step": 4215 }, { "epoch": 0.17, "learning_rate": 1.8717044150838326e-06, "loss": 0.6942, "step": 4220 }, { "epoch": 0.17, "learning_rate": 1.871388404190868e-06, "loss": 0.733, "step": 4225 }, { "epoch": 0.17, "learning_rate": 1.8710720313307382e-06, "loss": 0.6587, "step": 4230 }, { "epoch": 0.17, "learning_rate": 1.8707552966348618e-06, "loss": 0.7068, "step": 4235 }, { "epoch": 0.17, "learning_rate": 1.870438200234808e-06, "loss": 0.7121, "step": 4240 }, { "epoch": 0.17, "learning_rate": 1.8701207422622951e-06, "loss": 0.6572, "step": 4245 }, { "epoch": 0.17, "learning_rate": 1.869802922849193e-06, "loss": 0.6988, "step": 4250 }, { "epoch": 0.17, "learning_rate": 1.8694847421275206e-06, "loss": 0.7009, "step": 4255 }, { "epoch": 0.17, "learning_rate": 1.8691662002294481e-06, "loss": 0.7128, "step": 4260 }, { "epoch": 0.17, "learning_rate": 1.8688472972872947e-06, "loss": 0.6869, "step": 4265 }, { "epoch": 0.17, "learning_rate": 1.8685280334335296e-06, "loss": 0.6775, "step": 4270 }, { "epoch": 0.17, "learning_rate": 1.8682084088007728e-06, "loss": 0.6787, "step": 4275 }, { "epoch": 0.17, "learning_rate": 1.8678884235217928e-06, "loss": 0.7285, "step": 4280 }, { "epoch": 0.17, "learning_rate": 1.8675680777295097e-06, "loss": 0.6764, "step": 4285 }, { "epoch": 0.17, "learning_rate": 1.8672473715569916e-06, "loss": 0.7242, "step": 4290 }, { "epoch": 0.17, "learning_rate": 1.8669263051374572e-06, "loss": 0.6771, "step": 4295 }, { "epoch": 0.17, "learning_rate": 1.8666048786042752e-06, "loss": 0.7394, "step": 4300 }, { "epoch": 0.17, "learning_rate": 1.866283092090963e-06, "loss": 0.6957, "step": 4305 }, { "epoch": 0.18, "learning_rate": 1.8659609457311875e-06, "loss": 0.6816, "step": 4310 }, { "epoch": 0.18, "learning_rate": 1.865638439658766e-06, "loss": 0.6952, "step": 4315 }, { "epoch": 0.18, "learning_rate": 1.8653155740076647e-06, "loss": 0.6754, "step": 4320 }, { "epoch": 0.18, "learning_rate": 1.8649923489119992e-06, "loss": 0.7266, "step": 4325 }, { "epoch": 0.18, "learning_rate": 1.8646687645060343e-06, "loss": 0.7218, "step": 4330 }, { "epoch": 0.18, "learning_rate": 1.8643448209241841e-06, "loss": 0.6779, "step": 4335 }, { "epoch": 0.18, "learning_rate": 1.864020518301012e-06, "loss": 0.7003, "step": 4340 }, { "epoch": 0.18, "learning_rate": 1.8636958567712303e-06, "loss": 0.7015, "step": 4345 }, { "epoch": 0.18, "learning_rate": 1.8633708364697013e-06, "loss": 0.6808, "step": 4350 }, { "epoch": 0.18, "learning_rate": 1.8630454575314344e-06, "loss": 0.6816, "step": 4355 }, { "epoch": 0.18, "learning_rate": 1.8627197200915902e-06, "loss": 0.7126, "step": 4360 }, { "epoch": 0.18, "learning_rate": 1.862393624285477e-06, "loss": 0.6838, "step": 4365 }, { "epoch": 0.18, "learning_rate": 1.8620671702485517e-06, "loss": 0.6578, "step": 4370 }, { "epoch": 0.18, "learning_rate": 1.861740358116421e-06, "loss": 0.7344, "step": 4375 }, { "epoch": 0.18, "learning_rate": 1.8614131880248393e-06, "loss": 0.7172, "step": 4380 }, { "epoch": 0.18, "learning_rate": 1.8610856601097108e-06, "loss": 0.7621, "step": 4385 }, { "epoch": 0.18, "learning_rate": 1.8607577745070873e-06, "loss": 0.7071, "step": 4390 }, { "epoch": 0.18, "learning_rate": 1.8604295313531698e-06, "loss": 0.6889, "step": 4395 }, { "epoch": 0.18, "learning_rate": 1.8601009307843078e-06, "loss": 0.6992, "step": 4400 }, { "epoch": 0.18, "learning_rate": 1.8597719729369988e-06, "loss": 0.6843, "step": 4405 }, { "epoch": 0.18, "learning_rate": 1.8594426579478891e-06, "loss": 0.6791, "step": 4410 }, { "epoch": 0.18, "learning_rate": 1.8591129859537738e-06, "loss": 0.7045, "step": 4415 }, { "epoch": 0.18, "learning_rate": 1.858782957091595e-06, "loss": 0.6975, "step": 4420 }, { "epoch": 0.18, "learning_rate": 1.858452571498444e-06, "loss": 0.7386, "step": 4425 }, { "epoch": 0.18, "learning_rate": 1.8581218293115607e-06, "loss": 0.6974, "step": 4430 }, { "epoch": 0.18, "learning_rate": 1.8577907306683317e-06, "loss": 0.7313, "step": 4435 }, { "epoch": 0.18, "learning_rate": 1.857459275706293e-06, "loss": 0.6941, "step": 4440 }, { "epoch": 0.18, "learning_rate": 1.8571274645631281e-06, "loss": 0.7037, "step": 4445 }, { "epoch": 0.18, "learning_rate": 1.8567952973766685e-06, "loss": 0.655, "step": 4450 }, { "epoch": 0.18, "learning_rate": 1.856462774284893e-06, "loss": 0.712, "step": 4455 }, { "epoch": 0.18, "learning_rate": 1.8561298954259297e-06, "loss": 0.68, "step": 4460 }, { "epoch": 0.18, "learning_rate": 1.8557966609380528e-06, "loss": 0.6917, "step": 4465 }, { "epoch": 0.18, "learning_rate": 1.8554630709596855e-06, "loss": 0.6719, "step": 4470 }, { "epoch": 0.18, "learning_rate": 1.8551291256293977e-06, "loss": 0.7117, "step": 4475 }, { "epoch": 0.18, "learning_rate": 1.854794825085908e-06, "loss": 0.7157, "step": 4480 }, { "epoch": 0.18, "learning_rate": 1.8544601694680814e-06, "loss": 0.7147, "step": 4485 }, { "epoch": 0.18, "learning_rate": 1.8541251589149313e-06, "loss": 0.6964, "step": 4490 }, { "epoch": 0.18, "learning_rate": 1.853789793565618e-06, "loss": 0.7083, "step": 4495 }, { "epoch": 0.18, "learning_rate": 1.8534540735594492e-06, "loss": 0.6735, "step": 4500 }, { "epoch": 0.18, "learning_rate": 1.8531179990358806e-06, "loss": 0.6719, "step": 4505 }, { "epoch": 0.18, "learning_rate": 1.852781570134514e-06, "loss": 0.6729, "step": 4510 }, { "epoch": 0.18, "learning_rate": 1.8524447869950995e-06, "loss": 0.6988, "step": 4515 }, { "epoch": 0.18, "learning_rate": 1.8521076497575335e-06, "loss": 0.6907, "step": 4520 }, { "epoch": 0.18, "learning_rate": 1.8517701585618602e-06, "loss": 0.7268, "step": 4525 }, { "epoch": 0.18, "learning_rate": 1.8514323135482704e-06, "loss": 0.7261, "step": 4530 }, { "epoch": 0.18, "learning_rate": 1.8510941148571018e-06, "loss": 0.6763, "step": 4535 }, { "epoch": 0.18, "learning_rate": 1.8507555626288397e-06, "loss": 0.6801, "step": 4540 }, { "epoch": 0.18, "learning_rate": 1.8504166570041152e-06, "loss": 0.7502, "step": 4545 }, { "epoch": 0.18, "learning_rate": 1.8500773981237069e-06, "loss": 0.6682, "step": 4550 }, { "epoch": 0.19, "learning_rate": 1.8497377861285401e-06, "loss": 0.709, "step": 4555 }, { "epoch": 0.19, "learning_rate": 1.8493978211596865e-06, "loss": 0.7001, "step": 4560 }, { "epoch": 0.19, "learning_rate": 1.849057503358365e-06, "loss": 0.7111, "step": 4565 }, { "epoch": 0.19, "learning_rate": 1.8487168328659403e-06, "loss": 0.701, "step": 4570 }, { "epoch": 0.19, "learning_rate": 1.8483758098239237e-06, "loss": 0.691, "step": 4575 }, { "epoch": 0.19, "learning_rate": 1.8480344343739738e-06, "loss": 0.6979, "step": 4580 }, { "epoch": 0.19, "learning_rate": 1.8476927066578946e-06, "loss": 0.7074, "step": 4585 }, { "epoch": 0.19, "learning_rate": 1.8473506268176372e-06, "loss": 0.7229, "step": 4590 }, { "epoch": 0.19, "learning_rate": 1.8470081949952982e-06, "loss": 0.7015, "step": 4595 }, { "epoch": 0.19, "learning_rate": 1.846665411333121e-06, "loss": 0.7329, "step": 4600 }, { "epoch": 0.19, "learning_rate": 1.8463222759734948e-06, "loss": 0.6908, "step": 4605 }, { "epoch": 0.19, "learning_rate": 1.8459787890589554e-06, "loss": 0.7381, "step": 4610 }, { "epoch": 0.19, "learning_rate": 1.8456349507321836e-06, "loss": 0.6663, "step": 4615 }, { "epoch": 0.19, "learning_rate": 1.8452907611360076e-06, "loss": 0.7021, "step": 4620 }, { "epoch": 0.19, "learning_rate": 1.8449462204134002e-06, "loss": 0.6826, "step": 4625 }, { "epoch": 0.19, "learning_rate": 1.8446013287074811e-06, "loss": 0.6959, "step": 4630 }, { "epoch": 0.19, "learning_rate": 1.8442560861615148e-06, "loss": 0.7348, "step": 4635 }, { "epoch": 0.19, "learning_rate": 1.8439104929189124e-06, "loss": 0.6699, "step": 4640 }, { "epoch": 0.19, "learning_rate": 1.8435645491232307e-06, "loss": 0.7354, "step": 4645 }, { "epoch": 0.19, "learning_rate": 1.8432182549181707e-06, "loss": 0.6721, "step": 4650 }, { "epoch": 0.19, "learning_rate": 1.8428716104475806e-06, "loss": 0.7104, "step": 4655 }, { "epoch": 0.19, "learning_rate": 1.8425246158554537e-06, "loss": 0.6808, "step": 4660 }, { "epoch": 0.19, "learning_rate": 1.8421772712859282e-06, "loss": 0.6941, "step": 4665 }, { "epoch": 0.19, "learning_rate": 1.8418295768832883e-06, "loss": 0.687, "step": 4670 }, { "epoch": 0.19, "learning_rate": 1.841481532791963e-06, "loss": 0.6894, "step": 4675 }, { "epoch": 0.19, "learning_rate": 1.841133139156527e-06, "loss": 0.6663, "step": 4680 }, { "epoch": 0.19, "learning_rate": 1.8407843961216995e-06, "loss": 0.6423, "step": 4685 }, { "epoch": 0.19, "learning_rate": 1.8404353038323459e-06, "loss": 0.7073, "step": 4690 }, { "epoch": 0.19, "learning_rate": 1.8400858624334758e-06, "loss": 0.7149, "step": 4695 }, { "epoch": 0.19, "learning_rate": 1.8397360720702442e-06, "loss": 0.7024, "step": 4700 }, { "epoch": 0.19, "learning_rate": 1.8393859328879511e-06, "loss": 0.7039, "step": 4705 }, { "epoch": 0.19, "learning_rate": 1.839035445032041e-06, "loss": 0.6758, "step": 4710 }, { "epoch": 0.19, "learning_rate": 1.8386846086481036e-06, "loss": 0.6833, "step": 4715 }, { "epoch": 0.19, "learning_rate": 1.8383334238818736e-06, "loss": 0.6996, "step": 4720 }, { "epoch": 0.19, "learning_rate": 1.8379818908792295e-06, "loss": 0.692, "step": 4725 }, { "epoch": 0.19, "learning_rate": 1.8376300097861953e-06, "loss": 0.7099, "step": 4730 }, { "epoch": 0.19, "learning_rate": 1.8372777807489396e-06, "loss": 0.6859, "step": 4735 }, { "epoch": 0.19, "learning_rate": 1.8369252039137753e-06, "loss": 0.696, "step": 4740 }, { "epoch": 0.19, "learning_rate": 1.8365722794271594e-06, "loss": 0.6713, "step": 4745 }, { "epoch": 0.19, "learning_rate": 1.8362190074356935e-06, "loss": 0.7215, "step": 4750 }, { "epoch": 0.19, "learning_rate": 1.8358653880861245e-06, "loss": 0.7001, "step": 4755 }, { "epoch": 0.19, "learning_rate": 1.835511421525342e-06, "loss": 0.727, "step": 4760 }, { "epoch": 0.19, "learning_rate": 1.8351571079003812e-06, "loss": 0.6764, "step": 4765 }, { "epoch": 0.19, "learning_rate": 1.8348024473584208e-06, "loss": 0.6667, "step": 4770 }, { "epoch": 0.19, "learning_rate": 1.834447440046783e-06, "loss": 0.7291, "step": 4775 }, { "epoch": 0.19, "learning_rate": 1.8340920861129358e-06, "loss": 0.6699, "step": 4780 }, { "epoch": 0.19, "learning_rate": 1.8337363857044894e-06, "loss": 0.6825, "step": 4785 }, { "epoch": 0.19, "learning_rate": 1.833380338969199e-06, "loss": 0.6954, "step": 4790 }, { "epoch": 0.19, "learning_rate": 1.833023946054963e-06, "loss": 0.7002, "step": 4795 }, { "epoch": 0.19, "learning_rate": 1.8326672071098246e-06, "loss": 0.699, "step": 4800 }, { "epoch": 0.19, "eval_loss": 0.6643534302711487, "eval_runtime": 138.4591, "eval_samples_per_second": 17.088, "eval_steps_per_second": 2.853, "step": 4800 }, { "epoch": 0.2, "learning_rate": 1.8323101222819693e-06, "loss": 0.677, "step": 4805 }, { "epoch": 0.2, "learning_rate": 1.8319526917197275e-06, "loss": 0.7381, "step": 4810 }, { "epoch": 0.2, "learning_rate": 1.8315949155715722e-06, "loss": 0.6681, "step": 4815 }, { "epoch": 0.2, "learning_rate": 1.8312367939861214e-06, "loss": 0.6696, "step": 4820 }, { "epoch": 0.2, "learning_rate": 1.8308783271121346e-06, "loss": 0.713, "step": 4825 }, { "epoch": 0.2, "learning_rate": 1.830519515098517e-06, "loss": 0.7155, "step": 4830 }, { "epoch": 0.2, "learning_rate": 1.8301603580943148e-06, "loss": 0.694, "step": 4835 }, { "epoch": 0.2, "learning_rate": 1.8298008562487195e-06, "loss": 0.7343, "step": 4840 }, { "epoch": 0.2, "learning_rate": 1.829441009711065e-06, "loss": 0.7173, "step": 4845 }, { "epoch": 0.2, "learning_rate": 1.8290808186308276e-06, "loss": 0.7167, "step": 4850 }, { "epoch": 0.2, "learning_rate": 1.828720283157629e-06, "loss": 0.7265, "step": 4855 }, { "epoch": 0.2, "learning_rate": 1.8283594034412313e-06, "loss": 0.6793, "step": 4860 }, { "epoch": 0.2, "learning_rate": 1.8279981796315412e-06, "loss": 0.6807, "step": 4865 }, { "epoch": 0.2, "learning_rate": 1.8276366118786078e-06, "loss": 0.6737, "step": 4870 }, { "epoch": 0.2, "learning_rate": 1.8272747003326235e-06, "loss": 0.7202, "step": 4875 }, { "epoch": 0.2, "learning_rate": 1.8269124451439231e-06, "loss": 0.7226, "step": 4880 }, { "epoch": 0.2, "learning_rate": 1.8265498464629837e-06, "loss": 0.6772, "step": 4885 }, { "epoch": 0.2, "learning_rate": 1.8261869044404265e-06, "loss": 0.6868, "step": 4890 }, { "epoch": 0.2, "learning_rate": 1.825823619227014e-06, "loss": 0.703, "step": 4895 }, { "epoch": 0.2, "learning_rate": 1.825459990973652e-06, "loss": 0.7193, "step": 4900 }, { "epoch": 0.2, "learning_rate": 1.8250960198313878e-06, "loss": 0.7194, "step": 4905 }, { "epoch": 0.2, "learning_rate": 1.8247317059514126e-06, "loss": 0.6859, "step": 4910 }, { "epoch": 0.2, "learning_rate": 1.8243670494850592e-06, "loss": 0.6749, "step": 4915 }, { "epoch": 0.2, "learning_rate": 1.824002050583802e-06, "loss": 0.6542, "step": 4920 }, { "epoch": 0.2, "learning_rate": 1.8236367093992592e-06, "loss": 0.6592, "step": 4925 }, { "epoch": 0.2, "learning_rate": 1.82327102608319e-06, "loss": 0.6788, "step": 4930 }, { "epoch": 0.2, "learning_rate": 1.8229050007874961e-06, "loss": 0.6909, "step": 4935 }, { "epoch": 0.2, "learning_rate": 1.822538633664221e-06, "loss": 0.7105, "step": 4940 }, { "epoch": 0.2, "learning_rate": 1.8221719248655507e-06, "loss": 0.6731, "step": 4945 }, { "epoch": 0.2, "learning_rate": 1.821804874543813e-06, "loss": 0.685, "step": 4950 }, { "epoch": 0.2, "learning_rate": 1.821437482851477e-06, "loss": 0.7251, "step": 4955 }, { "epoch": 0.2, "learning_rate": 1.821069749941154e-06, "loss": 0.6517, "step": 4960 }, { "epoch": 0.2, "learning_rate": 1.8207016759655973e-06, "loss": 0.704, "step": 4965 }, { "epoch": 0.2, "learning_rate": 1.8203332610777017e-06, "loss": 0.7063, "step": 4970 }, { "epoch": 0.2, "learning_rate": 1.8199645054305037e-06, "loss": 0.7208, "step": 4975 }, { "epoch": 0.2, "learning_rate": 1.8195954091771805e-06, "loss": 0.7339, "step": 4980 }, { "epoch": 0.2, "learning_rate": 1.8192259724710518e-06, "loss": 0.6627, "step": 4985 }, { "epoch": 0.2, "learning_rate": 1.818856195465579e-06, "loss": 0.7021, "step": 4990 }, { "epoch": 0.2, "learning_rate": 1.8184860783143635e-06, "loss": 0.7108, "step": 4995 }, { "epoch": 0.2, "learning_rate": 1.8181156211711488e-06, "loss": 0.639, "step": 5000 }, { "epoch": 0.2, "learning_rate": 1.8177448241898196e-06, "loss": 0.7256, "step": 5005 }, { "epoch": 0.2, "learning_rate": 1.817373687524402e-06, "loss": 0.6977, "step": 5010 }, { "epoch": 0.2, "learning_rate": 1.817002211329063e-06, "loss": 0.6984, "step": 5015 }, { "epoch": 0.2, "learning_rate": 1.8166303957581103e-06, "loss": 0.6875, "step": 5020 }, { "epoch": 0.2, "learning_rate": 1.8162582409659932e-06, "loss": 0.6602, "step": 5025 }, { "epoch": 0.2, "learning_rate": 1.815885747107301e-06, "loss": 0.6819, "step": 5030 }, { "epoch": 0.2, "learning_rate": 1.815512914336765e-06, "loss": 0.6843, "step": 5035 }, { "epoch": 0.2, "learning_rate": 1.8151397428092563e-06, "loss": 0.7079, "step": 5040 }, { "epoch": 0.2, "learning_rate": 1.8147662326797872e-06, "loss": 0.7022, "step": 5045 }, { "epoch": 0.21, "learning_rate": 1.8143923841035107e-06, "loss": 0.6897, "step": 5050 }, { "epoch": 0.21, "learning_rate": 1.8140181972357201e-06, "loss": 0.6774, "step": 5055 }, { "epoch": 0.21, "learning_rate": 1.8136436722318496e-06, "loss": 0.6877, "step": 5060 }, { "epoch": 0.21, "learning_rate": 1.813268809247473e-06, "loss": 0.6957, "step": 5065 }, { "epoch": 0.21, "learning_rate": 1.8128936084383058e-06, "loss": 0.7274, "step": 5070 }, { "epoch": 0.21, "learning_rate": 1.812518069960203e-06, "loss": 0.6767, "step": 5075 }, { "epoch": 0.21, "learning_rate": 1.8121421939691599e-06, "loss": 0.6857, "step": 5080 }, { "epoch": 0.21, "learning_rate": 1.8117659806213122e-06, "loss": 0.6637, "step": 5085 }, { "epoch": 0.21, "learning_rate": 1.8113894300729356e-06, "loss": 0.6777, "step": 5090 }, { "epoch": 0.21, "learning_rate": 1.8110125424804458e-06, "loss": 0.6666, "step": 5095 }, { "epoch": 0.21, "learning_rate": 1.8106353180003988e-06, "loss": 0.6811, "step": 5100 }, { "epoch": 0.21, "learning_rate": 1.8102577567894905e-06, "loss": 0.6735, "step": 5105 }, { "epoch": 0.21, "learning_rate": 1.8098798590045563e-06, "loss": 0.7543, "step": 5110 }, { "epoch": 0.21, "learning_rate": 1.8095016248025717e-06, "loss": 0.6957, "step": 5115 }, { "epoch": 0.21, "learning_rate": 1.809123054340652e-06, "loss": 0.6969, "step": 5120 }, { "epoch": 0.21, "learning_rate": 1.8087441477760517e-06, "loss": 0.6775, "step": 5125 }, { "epoch": 0.21, "learning_rate": 1.8083649052661661e-06, "loss": 0.6794, "step": 5130 }, { "epoch": 0.21, "learning_rate": 1.8079853269685282e-06, "loss": 0.6253, "step": 5135 }, { "epoch": 0.21, "learning_rate": 1.8076054130408123e-06, "loss": 0.7048, "step": 5140 }, { "epoch": 0.21, "learning_rate": 1.8072251636408314e-06, "loss": 0.681, "step": 5145 }, { "epoch": 0.21, "learning_rate": 1.806844578926537e-06, "loss": 0.6644, "step": 5150 }, { "epoch": 0.21, "learning_rate": 1.8064636590560215e-06, "loss": 0.6972, "step": 5155 }, { "epoch": 0.21, "learning_rate": 1.8060824041875154e-06, "loss": 0.6831, "step": 5160 }, { "epoch": 0.21, "learning_rate": 1.8057008144793882e-06, "loss": 0.6738, "step": 5165 }, { "epoch": 0.21, "learning_rate": 1.8053188900901497e-06, "loss": 0.7293, "step": 5170 }, { "epoch": 0.21, "learning_rate": 1.8049366311784477e-06, "loss": 0.6975, "step": 5175 }, { "epoch": 0.21, "learning_rate": 1.8045540379030691e-06, "loss": 0.7275, "step": 5180 }, { "epoch": 0.21, "learning_rate": 1.8041711104229401e-06, "loss": 0.6867, "step": 5185 }, { "epoch": 0.21, "learning_rate": 1.803787848897125e-06, "loss": 0.6857, "step": 5190 }, { "epoch": 0.21, "learning_rate": 1.8034042534848277e-06, "loss": 0.6861, "step": 5195 }, { "epoch": 0.21, "learning_rate": 1.8030203243453901e-06, "loss": 0.6732, "step": 5200 }, { "epoch": 0.21, "learning_rate": 1.8026360616382933e-06, "loss": 0.7133, "step": 5205 }, { "epoch": 0.21, "learning_rate": 1.8022514655231565e-06, "loss": 0.7196, "step": 5210 }, { "epoch": 0.21, "learning_rate": 1.8018665361597373e-06, "loss": 0.6929, "step": 5215 }, { "epoch": 0.21, "learning_rate": 1.801481273707933e-06, "loss": 0.7227, "step": 5220 }, { "epoch": 0.21, "learning_rate": 1.8010956783277772e-06, "loss": 0.6655, "step": 5225 }, { "epoch": 0.21, "learning_rate": 1.8007097501794435e-06, "loss": 0.7454, "step": 5230 }, { "epoch": 0.21, "learning_rate": 1.8003234894232426e-06, "loss": 0.6815, "step": 5235 }, { "epoch": 0.21, "learning_rate": 1.7999368962196243e-06, "loss": 0.7456, "step": 5240 }, { "epoch": 0.21, "learning_rate": 1.7995499707291762e-06, "loss": 0.6822, "step": 5245 }, { "epoch": 0.21, "learning_rate": 1.799162713112623e-06, "loss": 0.7083, "step": 5250 }, { "epoch": 0.21, "learning_rate": 1.798775123530829e-06, "loss": 0.6874, "step": 5255 }, { "epoch": 0.21, "learning_rate": 1.798387202144795e-06, "loss": 0.6864, "step": 5260 }, { "epoch": 0.21, "learning_rate": 1.7979989491156603e-06, "loss": 0.6991, "step": 5265 }, { "epoch": 0.21, "learning_rate": 1.797610364604702e-06, "loss": 0.7077, "step": 5270 }, { "epoch": 0.21, "learning_rate": 1.7972214487733345e-06, "loss": 0.6461, "step": 5275 }, { "epoch": 0.21, "learning_rate": 1.7968322017831102e-06, "loss": 0.6968, "step": 5280 }, { "epoch": 0.21, "learning_rate": 1.7964426237957188e-06, "loss": 0.6635, "step": 5285 }, { "epoch": 0.21, "learning_rate": 1.7960527149729878e-06, "loss": 0.684, "step": 5290 }, { "epoch": 0.22, "learning_rate": 1.7956624754768818e-06, "loss": 0.6784, "step": 5295 }, { "epoch": 0.22, "learning_rate": 1.795271905469503e-06, "loss": 0.7481, "step": 5300 }, { "epoch": 0.22, "learning_rate": 1.7948810051130905e-06, "loss": 0.683, "step": 5305 }, { "epoch": 0.22, "learning_rate": 1.794489774570021e-06, "loss": 0.6661, "step": 5310 }, { "epoch": 0.22, "learning_rate": 1.7940982140028087e-06, "loss": 0.7184, "step": 5315 }, { "epoch": 0.22, "learning_rate": 1.793706323574104e-06, "loss": 0.6612, "step": 5320 }, { "epoch": 0.22, "learning_rate": 1.7933141034466948e-06, "loss": 0.703, "step": 5325 }, { "epoch": 0.22, "learning_rate": 1.792921553783506e-06, "loss": 0.7108, "step": 5330 }, { "epoch": 0.22, "learning_rate": 1.7925286747475994e-06, "loss": 0.7301, "step": 5335 }, { "epoch": 0.22, "learning_rate": 1.7921354665021735e-06, "loss": 0.6704, "step": 5340 }, { "epoch": 0.22, "learning_rate": 1.7917419292105636e-06, "loss": 0.7065, "step": 5345 }, { "epoch": 0.22, "learning_rate": 1.7913480630362417e-06, "loss": 0.7033, "step": 5350 }, { "epoch": 0.22, "learning_rate": 1.790953868142816e-06, "loss": 0.7258, "step": 5355 }, { "epoch": 0.22, "learning_rate": 1.790559344694032e-06, "loss": 0.6887, "step": 5360 }, { "epoch": 0.22, "learning_rate": 1.7901644928537715e-06, "loss": 0.7195, "step": 5365 }, { "epoch": 0.22, "learning_rate": 1.7897693127860524e-06, "loss": 0.7058, "step": 5370 }, { "epoch": 0.22, "learning_rate": 1.7893738046550286e-06, "loss": 0.7177, "step": 5375 }, { "epoch": 0.22, "learning_rate": 1.7889779686249912e-06, "loss": 0.6891, "step": 5380 }, { "epoch": 0.22, "learning_rate": 1.788581804860367e-06, "loss": 0.6551, "step": 5385 }, { "epoch": 0.22, "learning_rate": 1.7881853135257185e-06, "loss": 0.7061, "step": 5390 }, { "epoch": 0.22, "learning_rate": 1.7877884947857455e-06, "loss": 0.6706, "step": 5395 }, { "epoch": 0.22, "learning_rate": 1.7873913488052827e-06, "loss": 0.6719, "step": 5400 }, { "epoch": 0.22, "learning_rate": 1.7869938757493011e-06, "loss": 0.7022, "step": 5405 }, { "epoch": 0.22, "learning_rate": 1.7865960757829075e-06, "loss": 0.676, "step": 5410 }, { "epoch": 0.22, "learning_rate": 1.7861979490713445e-06, "loss": 0.7427, "step": 5415 }, { "epoch": 0.22, "learning_rate": 1.7857994957799906e-06, "loss": 0.698, "step": 5420 }, { "epoch": 0.22, "learning_rate": 1.78540071607436e-06, "loss": 0.7023, "step": 5425 }, { "epoch": 0.22, "learning_rate": 1.785001610120102e-06, "loss": 0.6985, "step": 5430 }, { "epoch": 0.22, "learning_rate": 1.784602178083002e-06, "loss": 0.7192, "step": 5435 }, { "epoch": 0.22, "learning_rate": 1.7842024201289801e-06, "loss": 0.6682, "step": 5440 }, { "epoch": 0.22, "learning_rate": 1.7838023364240929e-06, "loss": 0.694, "step": 5445 }, { "epoch": 0.22, "learning_rate": 1.7834019271345313e-06, "loss": 0.6484, "step": 5450 }, { "epoch": 0.22, "learning_rate": 1.7830011924266221e-06, "loss": 0.6921, "step": 5455 }, { "epoch": 0.22, "learning_rate": 1.7826001324668267e-06, "loss": 0.7135, "step": 5460 }, { "epoch": 0.22, "learning_rate": 1.7821987474217424e-06, "loss": 0.6631, "step": 5465 }, { "epoch": 0.22, "learning_rate": 1.7817970374581006e-06, "loss": 0.6572, "step": 5470 }, { "epoch": 0.22, "learning_rate": 1.781395002742768e-06, "loss": 0.7068, "step": 5475 }, { "epoch": 0.22, "learning_rate": 1.780992643442747e-06, "loss": 0.6417, "step": 5480 }, { "epoch": 0.22, "learning_rate": 1.7805899597251735e-06, "loss": 0.6847, "step": 5485 }, { "epoch": 0.22, "learning_rate": 1.7801869517573191e-06, "loss": 0.6862, "step": 5490 }, { "epoch": 0.22, "learning_rate": 1.77978361970659e-06, "loss": 0.6696, "step": 5495 }, { "epoch": 0.22, "learning_rate": 1.7793799637405265e-06, "loss": 0.737, "step": 5500 }, { "epoch": 0.22, "learning_rate": 1.7789759840268037e-06, "loss": 0.6826, "step": 5505 }, { "epoch": 0.22, "learning_rate": 1.7785716807332315e-06, "loss": 0.7288, "step": 5510 }, { "epoch": 0.22, "learning_rate": 1.778167054027754e-06, "loss": 0.6856, "step": 5515 }, { "epoch": 0.22, "learning_rate": 1.7777621040784496e-06, "loss": 0.6801, "step": 5520 }, { "epoch": 0.22, "learning_rate": 1.777356831053531e-06, "loss": 0.6737, "step": 5525 }, { "epoch": 0.22, "learning_rate": 1.7769512351213448e-06, "loss": 0.6547, "step": 5530 }, { "epoch": 0.22, "learning_rate": 1.7765453164503722e-06, "loss": 0.7255, "step": 5535 }, { "epoch": 0.23, "learning_rate": 1.7761390752092284e-06, "loss": 0.6757, "step": 5540 }, { "epoch": 0.23, "learning_rate": 1.7757325115666624e-06, "loss": 0.6752, "step": 5545 }, { "epoch": 0.23, "learning_rate": 1.7753256256915572e-06, "loss": 0.6901, "step": 5550 }, { "epoch": 0.23, "learning_rate": 1.7749184177529294e-06, "loss": 0.6747, "step": 5555 }, { "epoch": 0.23, "learning_rate": 1.7745108879199302e-06, "loss": 0.6852, "step": 5560 }, { "epoch": 0.23, "learning_rate": 1.7741030363618437e-06, "loss": 0.6804, "step": 5565 }, { "epoch": 0.23, "learning_rate": 1.7736948632480874e-06, "loss": 0.6783, "step": 5570 }, { "epoch": 0.23, "learning_rate": 1.7732863687482138e-06, "loss": 0.6799, "step": 5575 }, { "epoch": 0.23, "learning_rate": 1.772877553031907e-06, "loss": 0.7125, "step": 5580 }, { "epoch": 0.23, "learning_rate": 1.7724684162689863e-06, "loss": 0.6851, "step": 5585 }, { "epoch": 0.23, "learning_rate": 1.772058958629403e-06, "loss": 0.6906, "step": 5590 }, { "epoch": 0.23, "learning_rate": 1.7716491802832425e-06, "loss": 0.7292, "step": 5595 }, { "epoch": 0.23, "learning_rate": 1.771239081400723e-06, "loss": 0.7028, "step": 5600 }, { "epoch": 0.23, "eval_loss": 0.6604963541030884, "eval_runtime": 138.4769, "eval_samples_per_second": 17.086, "eval_steps_per_second": 2.852, "step": 5600 }, { "epoch": 0.23, "learning_rate": 1.7708286621521964e-06, "loss": 0.6984, "step": 5605 }, { "epoch": 0.23, "learning_rate": 1.7704179227081467e-06, "loss": 0.7075, "step": 5610 }, { "epoch": 0.23, "learning_rate": 1.7700068632391917e-06, "loss": 0.7014, "step": 5615 }, { "epoch": 0.23, "learning_rate": 1.7695954839160824e-06, "loss": 0.6986, "step": 5620 }, { "epoch": 0.23, "learning_rate": 1.7691837849097015e-06, "loss": 0.7069, "step": 5625 }, { "epoch": 0.23, "learning_rate": 1.7687717663910658e-06, "loss": 0.6893, "step": 5630 }, { "epoch": 0.23, "learning_rate": 1.7683594285313235e-06, "loss": 0.6479, "step": 5635 }, { "epoch": 0.23, "learning_rate": 1.7679467715017567e-06, "loss": 0.7326, "step": 5640 }, { "epoch": 0.23, "learning_rate": 1.7675337954737795e-06, "loss": 0.7032, "step": 5645 }, { "epoch": 0.23, "learning_rate": 1.7671205006189384e-06, "loss": 0.6587, "step": 5650 }, { "epoch": 0.23, "learning_rate": 1.7667068871089126e-06, "loss": 0.6494, "step": 5655 }, { "epoch": 0.23, "learning_rate": 1.7662929551155136e-06, "loss": 0.7001, "step": 5660 }, { "epoch": 0.23, "learning_rate": 1.7658787048106846e-06, "loss": 0.7043, "step": 5665 }, { "epoch": 0.23, "learning_rate": 1.7654641363665027e-06, "loss": 0.6735, "step": 5670 }, { "epoch": 0.23, "learning_rate": 1.765049249955175e-06, "loss": 0.706, "step": 5675 }, { "epoch": 0.23, "learning_rate": 1.764634045749042e-06, "loss": 0.7178, "step": 5680 }, { "epoch": 0.23, "learning_rate": 1.7642185239205763e-06, "loss": 0.6817, "step": 5685 }, { "epoch": 0.23, "learning_rate": 1.7638026846423813e-06, "loss": 0.739, "step": 5690 }, { "epoch": 0.23, "learning_rate": 1.7633865280871935e-06, "loss": 0.704, "step": 5695 }, { "epoch": 0.23, "learning_rate": 1.762970054427881e-06, "loss": 0.6859, "step": 5700 }, { "epoch": 0.23, "learning_rate": 1.7625532638374429e-06, "loss": 0.6645, "step": 5705 }, { "epoch": 0.23, "learning_rate": 1.7621361564890104e-06, "loss": 0.6885, "step": 5710 }, { "epoch": 0.23, "learning_rate": 1.7617187325558463e-06, "loss": 0.7211, "step": 5715 }, { "epoch": 0.23, "learning_rate": 1.7613009922113449e-06, "loss": 0.6563, "step": 5720 }, { "epoch": 0.23, "learning_rate": 1.760882935629032e-06, "loss": 0.6872, "step": 5725 }, { "epoch": 0.23, "learning_rate": 1.7604645629825647e-06, "loss": 0.7238, "step": 5730 }, { "epoch": 0.23, "learning_rate": 1.7600458744457312e-06, "loss": 0.6829, "step": 5735 }, { "epoch": 0.23, "learning_rate": 1.7596268701924513e-06, "loss": 0.6752, "step": 5740 }, { "epoch": 0.23, "learning_rate": 1.7592075503967751e-06, "loss": 0.7009, "step": 5745 }, { "epoch": 0.23, "learning_rate": 1.7587879152328852e-06, "loss": 0.6987, "step": 5750 }, { "epoch": 0.23, "learning_rate": 1.758367964875094e-06, "loss": 0.7154, "step": 5755 }, { "epoch": 0.23, "learning_rate": 1.7579476994978454e-06, "loss": 0.6551, "step": 5760 }, { "epoch": 0.23, "learning_rate": 1.7575271192757138e-06, "loss": 0.6711, "step": 5765 }, { "epoch": 0.23, "learning_rate": 1.7571062243834046e-06, "loss": 0.7064, "step": 5770 }, { "epoch": 0.23, "learning_rate": 1.7566850149957536e-06, "loss": 0.6889, "step": 5775 }, { "epoch": 0.23, "learning_rate": 1.7562634912877282e-06, "loss": 0.6998, "step": 5780 }, { "epoch": 0.23, "learning_rate": 1.7558416534344252e-06, "loss": 0.6513, "step": 5785 }, { "epoch": 0.24, "learning_rate": 1.7554195016110725e-06, "loss": 0.7019, "step": 5790 }, { "epoch": 0.24, "learning_rate": 1.7549970359930285e-06, "loss": 0.7119, "step": 5795 }, { "epoch": 0.24, "learning_rate": 1.7545742567557811e-06, "loss": 0.6858, "step": 5800 }, { "epoch": 0.24, "learning_rate": 1.7541511640749499e-06, "loss": 0.6887, "step": 5805 }, { "epoch": 0.24, "learning_rate": 1.753727758126283e-06, "loss": 0.6976, "step": 5810 }, { "epoch": 0.24, "learning_rate": 1.7533040390856608e-06, "loss": 0.6699, "step": 5815 }, { "epoch": 0.24, "learning_rate": 1.7528800071290914e-06, "loss": 0.6802, "step": 5820 }, { "epoch": 0.24, "learning_rate": 1.7524556624327147e-06, "loss": 0.6804, "step": 5825 }, { "epoch": 0.24, "learning_rate": 1.7520310051727992e-06, "loss": 0.6678, "step": 5830 }, { "epoch": 0.24, "learning_rate": 1.7516060355257443e-06, "loss": 0.6856, "step": 5835 }, { "epoch": 0.24, "learning_rate": 1.7511807536680782e-06, "loss": 0.6983, "step": 5840 }, { "epoch": 0.24, "learning_rate": 1.75075515977646e-06, "loss": 0.7212, "step": 5845 }, { "epoch": 0.24, "learning_rate": 1.7503292540276772e-06, "loss": 0.6783, "step": 5850 }, { "epoch": 0.24, "learning_rate": 1.7499030365986475e-06, "loss": 0.6932, "step": 5855 }, { "epoch": 0.24, "learning_rate": 1.749476507666418e-06, "loss": 0.7191, "step": 5860 }, { "epoch": 0.24, "learning_rate": 1.7490496674081648e-06, "loss": 0.6817, "step": 5865 }, { "epoch": 0.24, "learning_rate": 1.748622516001194e-06, "loss": 0.6568, "step": 5870 }, { "epoch": 0.24, "learning_rate": 1.7481950536229402e-06, "loss": 0.6861, "step": 5875 }, { "epoch": 0.24, "learning_rate": 1.7477672804509679e-06, "loss": 0.6995, "step": 5880 }, { "epoch": 0.24, "learning_rate": 1.7473391966629698e-06, "loss": 0.72, "step": 5885 }, { "epoch": 0.24, "learning_rate": 1.7469108024367687e-06, "loss": 0.6983, "step": 5890 }, { "epoch": 0.24, "learning_rate": 1.746482097950316e-06, "loss": 0.6961, "step": 5895 }, { "epoch": 0.24, "learning_rate": 1.7460530833816915e-06, "loss": 0.7183, "step": 5900 }, { "epoch": 0.24, "learning_rate": 1.745623758909104e-06, "loss": 0.7194, "step": 5905 }, { "epoch": 0.24, "learning_rate": 1.7451941247108914e-06, "loss": 0.6699, "step": 5910 }, { "epoch": 0.24, "learning_rate": 1.74476418096552e-06, "loss": 0.6807, "step": 5915 }, { "epoch": 0.24, "learning_rate": 1.7443339278515846e-06, "loss": 0.6835, "step": 5920 }, { "epoch": 0.24, "learning_rate": 1.7439033655478084e-06, "loss": 0.6874, "step": 5925 }, { "epoch": 0.24, "learning_rate": 1.7434724942330436e-06, "loss": 0.6832, "step": 5930 }, { "epoch": 0.24, "learning_rate": 1.7430413140862703e-06, "loss": 0.6661, "step": 5935 }, { "epoch": 0.24, "learning_rate": 1.7426098252865968e-06, "loss": 0.677, "step": 5940 }, { "epoch": 0.24, "learning_rate": 1.74217802801326e-06, "loss": 0.6587, "step": 5945 }, { "epoch": 0.24, "learning_rate": 1.7417459224456246e-06, "loss": 0.6613, "step": 5950 }, { "epoch": 0.24, "learning_rate": 1.741313508763184e-06, "loss": 0.7241, "step": 5955 }, { "epoch": 0.24, "learning_rate": 1.740880787145558e-06, "loss": 0.7194, "step": 5960 }, { "epoch": 0.24, "learning_rate": 1.7404477577724964e-06, "loss": 0.6832, "step": 5965 }, { "epoch": 0.24, "learning_rate": 1.7400144208238751e-06, "loss": 0.6761, "step": 5970 }, { "epoch": 0.24, "learning_rate": 1.7395807764796993e-06, "loss": 0.7259, "step": 5975 }, { "epoch": 0.24, "learning_rate": 1.7391468249201004e-06, "loss": 0.6704, "step": 5980 }, { "epoch": 0.24, "learning_rate": 1.7387125663253386e-06, "loss": 0.6836, "step": 5985 }, { "epoch": 0.24, "learning_rate": 1.7382780008758005e-06, "loss": 0.6555, "step": 5990 }, { "epoch": 0.24, "learning_rate": 1.7378431287520016e-06, "loss": 0.7281, "step": 5995 }, { "epoch": 0.24, "learning_rate": 1.7374079501345835e-06, "loss": 0.6644, "step": 6000 }, { "epoch": 0.24, "learning_rate": 1.7369724652043156e-06, "loss": 0.6314, "step": 6005 }, { "epoch": 0.24, "learning_rate": 1.7365366741420947e-06, "loss": 0.6719, "step": 6010 }, { "epoch": 0.24, "learning_rate": 1.736100577128945e-06, "loss": 0.6913, "step": 6015 }, { "epoch": 0.24, "learning_rate": 1.7356641743460166e-06, "loss": 0.6858, "step": 6020 }, { "epoch": 0.24, "learning_rate": 1.7352274659745878e-06, "loss": 0.6853, "step": 6025 }, { "epoch": 0.24, "learning_rate": 1.7347904521960635e-06, "loss": 0.6921, "step": 6030 }, { "epoch": 0.25, "learning_rate": 1.7343531331919756e-06, "loss": 0.6898, "step": 6035 }, { "epoch": 0.25, "learning_rate": 1.7339155091439823e-06, "loss": 0.6996, "step": 6040 }, { "epoch": 0.25, "learning_rate": 1.733477580233869e-06, "loss": 0.7071, "step": 6045 }, { "epoch": 0.25, "learning_rate": 1.7330393466435474e-06, "loss": 0.6903, "step": 6050 }, { "epoch": 0.25, "learning_rate": 1.7326008085550564e-06, "loss": 0.6929, "step": 6055 }, { "epoch": 0.25, "learning_rate": 1.73216196615056e-06, "loss": 0.7195, "step": 6060 }, { "epoch": 0.25, "learning_rate": 1.7317228196123504e-06, "loss": 0.6737, "step": 6065 }, { "epoch": 0.25, "learning_rate": 1.7312833691228445e-06, "loss": 0.7118, "step": 6070 }, { "epoch": 0.25, "learning_rate": 1.7308436148645871e-06, "loss": 0.7004, "step": 6075 }, { "epoch": 0.25, "learning_rate": 1.7304035570202476e-06, "loss": 0.666, "step": 6080 }, { "epoch": 0.25, "learning_rate": 1.7299631957726223e-06, "loss": 0.6574, "step": 6085 }, { "epoch": 0.25, "learning_rate": 1.7295225313046337e-06, "loss": 0.7057, "step": 6090 }, { "epoch": 0.25, "learning_rate": 1.72908156379933e-06, "loss": 0.6804, "step": 6095 }, { "epoch": 0.25, "learning_rate": 1.7286402934398848e-06, "loss": 0.7117, "step": 6100 }, { "epoch": 0.25, "learning_rate": 1.7281987204095985e-06, "loss": 0.7004, "step": 6105 }, { "epoch": 0.25, "learning_rate": 1.7277568448918962e-06, "loss": 0.6945, "step": 6110 }, { "epoch": 0.25, "learning_rate": 1.7273146670703295e-06, "loss": 0.6268, "step": 6115 }, { "epoch": 0.25, "learning_rate": 1.7268721871285753e-06, "loss": 0.6772, "step": 6120 }, { "epoch": 0.25, "learning_rate": 1.7264294052504358e-06, "loss": 0.7141, "step": 6125 }, { "epoch": 0.25, "learning_rate": 1.7259863216198385e-06, "loss": 0.6466, "step": 6130 }, { "epoch": 0.25, "learning_rate": 1.7255429364208366e-06, "loss": 0.6835, "step": 6135 }, { "epoch": 0.25, "learning_rate": 1.7250992498376086e-06, "loss": 0.6938, "step": 6140 }, { "epoch": 0.25, "learning_rate": 1.7246552620544584e-06, "loss": 0.7017, "step": 6145 }, { "epoch": 0.25, "learning_rate": 1.7242109732558137e-06, "loss": 0.7161, "step": 6150 }, { "epoch": 0.25, "learning_rate": 1.7237663836262289e-06, "loss": 0.6646, "step": 6155 }, { "epoch": 0.25, "learning_rate": 1.7233214933503826e-06, "loss": 0.6712, "step": 6160 }, { "epoch": 0.25, "learning_rate": 1.7228763026130783e-06, "loss": 0.6749, "step": 6165 }, { "epoch": 0.25, "learning_rate": 1.7224308115992443e-06, "loss": 0.647, "step": 6170 }, { "epoch": 0.25, "learning_rate": 1.7219850204939338e-06, "loss": 0.7009, "step": 6175 }, { "epoch": 0.25, "learning_rate": 1.7215389294823243e-06, "loss": 0.6568, "step": 6180 }, { "epoch": 0.25, "learning_rate": 1.7210925387497186e-06, "loss": 0.6501, "step": 6185 }, { "epoch": 0.25, "learning_rate": 1.7206458484815429e-06, "loss": 0.6926, "step": 6190 }, { "epoch": 0.25, "learning_rate": 1.7201988588633489e-06, "loss": 0.6973, "step": 6195 }, { "epoch": 0.25, "learning_rate": 1.7197515700808124e-06, "loss": 0.6979, "step": 6200 }, { "epoch": 0.25, "learning_rate": 1.7193039823197323e-06, "loss": 0.6634, "step": 6205 }, { "epoch": 0.25, "learning_rate": 1.7188560957660338e-06, "loss": 0.6958, "step": 6210 }, { "epoch": 0.25, "learning_rate": 1.7184079106057645e-06, "loss": 0.7073, "step": 6215 }, { "epoch": 0.25, "learning_rate": 1.7179594270250965e-06, "loss": 0.7147, "step": 6220 }, { "epoch": 0.25, "learning_rate": 1.7175106452103268e-06, "loss": 0.7319, "step": 6225 }, { "epoch": 0.25, "learning_rate": 1.7170615653478742e-06, "loss": 0.6786, "step": 6230 }, { "epoch": 0.25, "learning_rate": 1.7166121876242837e-06, "loss": 0.6643, "step": 6235 }, { "epoch": 0.25, "learning_rate": 1.716162512226222e-06, "loss": 0.7524, "step": 6240 }, { "epoch": 0.25, "learning_rate": 1.715712539340481e-06, "loss": 0.6517, "step": 6245 }, { "epoch": 0.25, "learning_rate": 1.715262269153975e-06, "loss": 0.6999, "step": 6250 }, { "epoch": 0.25, "learning_rate": 1.7148117018537432e-06, "loss": 0.6756, "step": 6255 }, { "epoch": 0.25, "learning_rate": 1.7143608376269462e-06, "loss": 0.7208, "step": 6260 }, { "epoch": 0.25, "learning_rate": 1.71390967666087e-06, "loss": 0.6793, "step": 6265 }, { "epoch": 0.25, "learning_rate": 1.7134582191429223e-06, "loss": 0.68, "step": 6270 }, { "epoch": 0.25, "learning_rate": 1.7130064652606352e-06, "loss": 0.6666, "step": 6275 }, { "epoch": 0.26, "learning_rate": 1.7125544152016627e-06, "loss": 0.6714, "step": 6280 }, { "epoch": 0.26, "learning_rate": 1.7121020691537831e-06, "loss": 0.7034, "step": 6285 }, { "epoch": 0.26, "learning_rate": 1.7116494273048966e-06, "loss": 0.6819, "step": 6290 }, { "epoch": 0.26, "learning_rate": 1.7111964898430266e-06, "loss": 0.66, "step": 6295 }, { "epoch": 0.26, "learning_rate": 1.71074325695632e-06, "loss": 0.7156, "step": 6300 }, { "epoch": 0.26, "learning_rate": 1.7102897288330454e-06, "loss": 0.6804, "step": 6305 }, { "epoch": 0.26, "learning_rate": 1.7098359056615942e-06, "loss": 0.6807, "step": 6310 }, { "epoch": 0.26, "learning_rate": 1.7093817876304807e-06, "loss": 0.6701, "step": 6315 }, { "epoch": 0.26, "learning_rate": 1.7089273749283418e-06, "loss": 0.6785, "step": 6320 }, { "epoch": 0.26, "learning_rate": 1.7084726677439364e-06, "loss": 0.6941, "step": 6325 }, { "epoch": 0.26, "learning_rate": 1.7080176662661463e-06, "loss": 0.6811, "step": 6330 }, { "epoch": 0.26, "learning_rate": 1.7075623706839745e-06, "loss": 0.6797, "step": 6335 }, { "epoch": 0.26, "learning_rate": 1.7071067811865474e-06, "loss": 0.6913, "step": 6340 }, { "epoch": 0.26, "learning_rate": 1.7066508979631129e-06, "loss": 0.6917, "step": 6345 }, { "epoch": 0.26, "learning_rate": 1.7061947212030402e-06, "loss": 0.6709, "step": 6350 }, { "epoch": 0.26, "learning_rate": 1.705738251095822e-06, "loss": 0.7023, "step": 6355 }, { "epoch": 0.26, "learning_rate": 1.7052814878310718e-06, "loss": 0.7109, "step": 6360 }, { "epoch": 0.26, "learning_rate": 1.7048244315985247e-06, "loss": 0.6795, "step": 6365 }, { "epoch": 0.26, "learning_rate": 1.7043670825880384e-06, "loss": 0.7013, "step": 6370 }, { "epoch": 0.26, "learning_rate": 1.7039094409895914e-06, "loss": 0.703, "step": 6375 }, { "epoch": 0.26, "learning_rate": 1.703451506993284e-06, "loss": 0.732, "step": 6380 }, { "epoch": 0.26, "learning_rate": 1.7029932807893382e-06, "loss": 0.6905, "step": 6385 }, { "epoch": 0.26, "learning_rate": 1.702534762568097e-06, "loss": 0.7172, "step": 6390 }, { "epoch": 0.26, "learning_rate": 1.7020759525200253e-06, "loss": 0.6674, "step": 6395 }, { "epoch": 0.26, "learning_rate": 1.701616850835708e-06, "loss": 0.6694, "step": 6400 }, { "epoch": 0.26, "eval_loss": 0.6586260795593262, "eval_runtime": 139.2072, "eval_samples_per_second": 16.996, "eval_steps_per_second": 2.837, "step": 6400 }, { "epoch": 0.26, "learning_rate": 1.7011574577058525e-06, "loss": 0.7282, "step": 6405 }, { "epoch": 0.26, "learning_rate": 1.7006977733212867e-06, "loss": 0.6736, "step": 6410 }, { "epoch": 0.26, "learning_rate": 1.7002377978729596e-06, "loss": 0.6672, "step": 6415 }, { "epoch": 0.26, "learning_rate": 1.6997775315519408e-06, "loss": 0.712, "step": 6420 }, { "epoch": 0.26, "learning_rate": 1.6993169745494209e-06, "loss": 0.6949, "step": 6425 }, { "epoch": 0.26, "learning_rate": 1.6988561270567115e-06, "loss": 0.6951, "step": 6430 }, { "epoch": 0.26, "learning_rate": 1.698394989265244e-06, "loss": 0.7019, "step": 6435 }, { "epoch": 0.26, "learning_rate": 1.6979335613665717e-06, "loss": 0.7143, "step": 6440 }, { "epoch": 0.26, "learning_rate": 1.6974718435523678e-06, "loss": 0.7253, "step": 6445 }, { "epoch": 0.26, "learning_rate": 1.6970098360144253e-06, "loss": 0.7103, "step": 6450 }, { "epoch": 0.26, "learning_rate": 1.6965475389446586e-06, "loss": 0.6953, "step": 6455 }, { "epoch": 0.26, "learning_rate": 1.6960849525351018e-06, "loss": 0.7247, "step": 6460 }, { "epoch": 0.26, "learning_rate": 1.6956220769779088e-06, "loss": 0.7015, "step": 6465 }, { "epoch": 0.26, "learning_rate": 1.6951589124653547e-06, "loss": 0.704, "step": 6470 }, { "epoch": 0.26, "learning_rate": 1.6946954591898336e-06, "loss": 0.7054, "step": 6475 }, { "epoch": 0.26, "learning_rate": 1.6942317173438604e-06, "loss": 0.6704, "step": 6480 }, { "epoch": 0.26, "learning_rate": 1.693767687120069e-06, "loss": 0.6564, "step": 6485 }, { "epoch": 0.26, "learning_rate": 1.6933033687112134e-06, "loss": 0.7054, "step": 6490 }, { "epoch": 0.26, "learning_rate": 1.6928387623101681e-06, "loss": 0.676, "step": 6495 }, { "epoch": 0.26, "learning_rate": 1.692373868109926e-06, "loss": 0.7034, "step": 6500 }, { "epoch": 0.26, "learning_rate": 1.6919086863036003e-06, "loss": 0.6781, "step": 6505 }, { "epoch": 0.26, "learning_rate": 1.6914432170844233e-06, "loss": 0.7057, "step": 6510 }, { "epoch": 0.26, "learning_rate": 1.690977460645747e-06, "loss": 0.6929, "step": 6515 }, { "epoch": 0.26, "learning_rate": 1.6905114171810429e-06, "loss": 0.6816, "step": 6520 }, { "epoch": 0.27, "learning_rate": 1.6900450868839009e-06, "loss": 0.6845, "step": 6525 }, { "epoch": 0.27, "learning_rate": 1.6895784699480306e-06, "loss": 0.6665, "step": 6530 }, { "epoch": 0.27, "learning_rate": 1.6891115665672608e-06, "loss": 0.6597, "step": 6535 }, { "epoch": 0.27, "learning_rate": 1.6886443769355393e-06, "loss": 0.6623, "step": 6540 }, { "epoch": 0.27, "learning_rate": 1.688176901246932e-06, "loss": 0.6535, "step": 6545 }, { "epoch": 0.27, "learning_rate": 1.6877091396956247e-06, "loss": 0.6902, "step": 6550 }, { "epoch": 0.27, "learning_rate": 1.6872410924759215e-06, "loss": 0.6447, "step": 6555 }, { "epoch": 0.27, "learning_rate": 1.686772759782245e-06, "loss": 0.7013, "step": 6560 }, { "epoch": 0.27, "learning_rate": 1.6863041418091366e-06, "loss": 0.7158, "step": 6565 }, { "epoch": 0.27, "learning_rate": 1.685835238751256e-06, "loss": 0.7109, "step": 6570 }, { "epoch": 0.27, "learning_rate": 1.6853660508033816e-06, "loss": 0.7231, "step": 6575 }, { "epoch": 0.27, "learning_rate": 1.6848965781604099e-06, "loss": 0.7305, "step": 6580 }, { "epoch": 0.27, "learning_rate": 1.6844268210173556e-06, "loss": 0.7013, "step": 6585 }, { "epoch": 0.27, "learning_rate": 1.6839567795693524e-06, "loss": 0.6849, "step": 6590 }, { "epoch": 0.27, "learning_rate": 1.6834864540116506e-06, "loss": 0.7316, "step": 6595 }, { "epoch": 0.27, "learning_rate": 1.6830158445396196e-06, "loss": 0.7193, "step": 6600 }, { "epoch": 0.27, "learning_rate": 1.6825449513487466e-06, "loss": 0.7277, "step": 6605 }, { "epoch": 0.27, "learning_rate": 1.6820737746346368e-06, "loss": 0.7037, "step": 6610 }, { "epoch": 0.27, "learning_rate": 1.6816023145930123e-06, "loss": 0.7078, "step": 6615 }, { "epoch": 0.27, "learning_rate": 1.681130571419714e-06, "loss": 0.6651, "step": 6620 }, { "epoch": 0.27, "learning_rate": 1.6806585453106997e-06, "loss": 0.6851, "step": 6625 }, { "epoch": 0.27, "learning_rate": 1.680186236462045e-06, "loss": 0.6877, "step": 6630 }, { "epoch": 0.27, "learning_rate": 1.6797136450699427e-06, "loss": 0.6976, "step": 6635 }, { "epoch": 0.27, "learning_rate": 1.6792407713307036e-06, "loss": 0.7244, "step": 6640 }, { "epoch": 0.27, "learning_rate": 1.678767615440755e-06, "loss": 0.6734, "step": 6645 }, { "epoch": 0.27, "learning_rate": 1.6782941775966416e-06, "loss": 0.6733, "step": 6650 }, { "epoch": 0.27, "learning_rate": 1.6778204579950255e-06, "loss": 0.6836, "step": 6655 }, { "epoch": 0.27, "learning_rate": 1.6773464568326859e-06, "loss": 0.7101, "step": 6660 }, { "epoch": 0.27, "learning_rate": 1.6768721743065186e-06, "loss": 0.695, "step": 6665 }, { "epoch": 0.27, "learning_rate": 1.6763976106135366e-06, "loss": 0.6774, "step": 6670 }, { "epoch": 0.27, "learning_rate": 1.6759227659508692e-06, "loss": 0.674, "step": 6675 }, { "epoch": 0.27, "learning_rate": 1.6754476405157631e-06, "loss": 0.6887, "step": 6680 }, { "epoch": 0.27, "learning_rate": 1.674972234505581e-06, "loss": 0.689, "step": 6685 }, { "epoch": 0.27, "learning_rate": 1.6744965481178026e-06, "loss": 0.6877, "step": 6690 }, { "epoch": 0.27, "learning_rate": 1.6740205815500236e-06, "loss": 0.6872, "step": 6695 }, { "epoch": 0.27, "learning_rate": 1.673544334999957e-06, "loss": 0.6999, "step": 6700 }, { "epoch": 0.27, "learning_rate": 1.6730678086654306e-06, "loss": 0.6983, "step": 6705 }, { "epoch": 0.27, "learning_rate": 1.6725910027443902e-06, "loss": 0.6488, "step": 6710 }, { "epoch": 0.27, "learning_rate": 1.6721139174348964e-06, "loss": 0.6741, "step": 6715 }, { "epoch": 0.27, "learning_rate": 1.671636552935126e-06, "loss": 0.6809, "step": 6720 }, { "epoch": 0.27, "learning_rate": 1.6711589094433725e-06, "loss": 0.7326, "step": 6725 }, { "epoch": 0.27, "learning_rate": 1.6706809871580446e-06, "loss": 0.6757, "step": 6730 }, { "epoch": 0.27, "learning_rate": 1.670202786277667e-06, "loss": 0.6515, "step": 6735 }, { "epoch": 0.27, "learning_rate": 1.6697243070008805e-06, "loss": 0.6747, "step": 6740 }, { "epoch": 0.27, "learning_rate": 1.6692455495264413e-06, "loss": 0.6816, "step": 6745 }, { "epoch": 0.27, "learning_rate": 1.6687665140532209e-06, "loss": 0.6918, "step": 6750 }, { "epoch": 0.27, "learning_rate": 1.6682872007802062e-06, "loss": 0.7191, "step": 6755 }, { "epoch": 0.27, "learning_rate": 1.6678076099064999e-06, "loss": 0.6649, "step": 6760 }, { "epoch": 0.27, "learning_rate": 1.66732774163132e-06, "loss": 0.6889, "step": 6765 }, { "epoch": 0.27, "learning_rate": 1.666847596154e-06, "loss": 0.6917, "step": 6770 }, { "epoch": 0.28, "learning_rate": 1.6663671736739874e-06, "loss": 0.6717, "step": 6775 }, { "epoch": 0.28, "learning_rate": 1.665886474390846e-06, "loss": 0.7177, "step": 6780 }, { "epoch": 0.28, "learning_rate": 1.6654054985042538e-06, "loss": 0.6676, "step": 6785 }, { "epoch": 0.28, "learning_rate": 1.6649242462140044e-06, "loss": 0.708, "step": 6790 }, { "epoch": 0.28, "learning_rate": 1.6644427177200053e-06, "loss": 0.6958, "step": 6795 }, { "epoch": 0.28, "learning_rate": 1.66396091322228e-06, "loss": 0.6862, "step": 6800 }, { "epoch": 0.28, "learning_rate": 1.663478832920965e-06, "loss": 0.6462, "step": 6805 }, { "epoch": 0.28, "learning_rate": 1.6629964770163128e-06, "loss": 0.6668, "step": 6810 }, { "epoch": 0.28, "learning_rate": 1.6625138457086897e-06, "loss": 0.6926, "step": 6815 }, { "epoch": 0.28, "learning_rate": 1.6620309391985767e-06, "loss": 0.6942, "step": 6820 }, { "epoch": 0.28, "learning_rate": 1.661547757686569e-06, "loss": 0.6988, "step": 6825 }, { "epoch": 0.28, "learning_rate": 1.6610643013733756e-06, "loss": 0.6548, "step": 6830 }, { "epoch": 0.28, "learning_rate": 1.6605805704598206e-06, "loss": 0.6827, "step": 6835 }, { "epoch": 0.28, "learning_rate": 1.660096565146841e-06, "loss": 0.6939, "step": 6840 }, { "epoch": 0.28, "learning_rate": 1.6596122856354885e-06, "loss": 0.728, "step": 6845 }, { "epoch": 0.28, "learning_rate": 1.6591277321269295e-06, "loss": 0.6762, "step": 6850 }, { "epoch": 0.28, "learning_rate": 1.6586429048224422e-06, "loss": 0.6697, "step": 6855 }, { "epoch": 0.28, "learning_rate": 1.6581578039234203e-06, "loss": 0.7086, "step": 6860 }, { "epoch": 0.28, "learning_rate": 1.6576724296313697e-06, "loss": 0.7378, "step": 6865 }, { "epoch": 0.28, "learning_rate": 1.657186782147912e-06, "loss": 0.6861, "step": 6870 }, { "epoch": 0.28, "learning_rate": 1.6567008616747797e-06, "loss": 0.6749, "step": 6875 }, { "epoch": 0.28, "learning_rate": 1.6562146684138205e-06, "loss": 0.67, "step": 6880 }, { "epoch": 0.28, "learning_rate": 1.655728202566995e-06, "loss": 0.6561, "step": 6885 }, { "epoch": 0.28, "learning_rate": 1.6552414643363766e-06, "loss": 0.7061, "step": 6890 }, { "epoch": 0.28, "learning_rate": 1.6547544539241516e-06, "loss": 0.6779, "step": 6895 }, { "epoch": 0.28, "learning_rate": 1.6542671715326209e-06, "loss": 0.6927, "step": 6900 }, { "epoch": 0.28, "learning_rate": 1.653779617364197e-06, "loss": 0.6633, "step": 6905 }, { "epoch": 0.28, "learning_rate": 1.6532917916214055e-06, "loss": 0.6755, "step": 6910 }, { "epoch": 0.28, "learning_rate": 1.6528036945068852e-06, "loss": 0.6774, "step": 6915 }, { "epoch": 0.28, "learning_rate": 1.652315326223387e-06, "loss": 0.7033, "step": 6920 }, { "epoch": 0.28, "learning_rate": 1.6518266869737754e-06, "loss": 0.6823, "step": 6925 }, { "epoch": 0.28, "learning_rate": 1.6513377769610264e-06, "loss": 0.6952, "step": 6930 }, { "epoch": 0.28, "learning_rate": 1.6508485963882293e-06, "loss": 0.676, "step": 6935 }, { "epoch": 0.28, "learning_rate": 1.650359145458585e-06, "loss": 0.7026, "step": 6940 }, { "epoch": 0.28, "learning_rate": 1.6498694243754075e-06, "loss": 0.6872, "step": 6945 }, { "epoch": 0.28, "learning_rate": 1.6493794333421228e-06, "loss": 0.6846, "step": 6950 }, { "epoch": 0.28, "learning_rate": 1.6488891725622688e-06, "loss": 0.6825, "step": 6955 }, { "epoch": 0.28, "learning_rate": 1.6483986422394955e-06, "loss": 0.723, "step": 6960 }, { "epoch": 0.28, "learning_rate": 1.6479078425775653e-06, "loss": 0.7463, "step": 6965 }, { "epoch": 0.28, "learning_rate": 1.6474167737803514e-06, "loss": 0.7006, "step": 6970 }, { "epoch": 0.28, "learning_rate": 1.64692543605184e-06, "loss": 0.6745, "step": 6975 }, { "epoch": 0.28, "learning_rate": 1.6464338295961283e-06, "loss": 0.6935, "step": 6980 }, { "epoch": 0.28, "learning_rate": 1.6459419546174253e-06, "loss": 0.7056, "step": 6985 }, { "epoch": 0.28, "learning_rate": 1.6454498113200521e-06, "loss": 0.6763, "step": 6990 }, { "epoch": 0.28, "learning_rate": 1.6449573999084404e-06, "loss": 0.68, "step": 6995 }, { "epoch": 0.28, "learning_rate": 1.6444647205871332e-06, "loss": 0.6595, "step": 7000 }, { "epoch": 0.28, "learning_rate": 1.6439717735607856e-06, "loss": 0.6996, "step": 7005 }, { "epoch": 0.28, "learning_rate": 1.643478559034164e-06, "loss": 0.7008, "step": 7010 }, { "epoch": 0.28, "learning_rate": 1.6429850772121446e-06, "loss": 0.6907, "step": 7015 }, { "epoch": 0.29, "learning_rate": 1.6424913282997159e-06, "loss": 0.6627, "step": 7020 }, { "epoch": 0.29, "learning_rate": 1.641997312501977e-06, "loss": 0.6939, "step": 7025 }, { "epoch": 0.29, "learning_rate": 1.6415030300241371e-06, "loss": 0.7002, "step": 7030 }, { "epoch": 0.29, "learning_rate": 1.6410084810715177e-06, "loss": 0.7098, "step": 7035 }, { "epoch": 0.29, "learning_rate": 1.6405136658495496e-06, "loss": 0.7114, "step": 7040 }, { "epoch": 0.29, "learning_rate": 1.640018584563775e-06, "loss": 0.6951, "step": 7045 }, { "epoch": 0.29, "learning_rate": 1.639523237419846e-06, "loss": 0.7112, "step": 7050 }, { "epoch": 0.29, "learning_rate": 1.6390276246235257e-06, "loss": 0.6827, "step": 7055 }, { "epoch": 0.29, "learning_rate": 1.6385317463806878e-06, "loss": 0.7133, "step": 7060 }, { "epoch": 0.29, "learning_rate": 1.6380356028973152e-06, "loss": 0.6766, "step": 7065 }, { "epoch": 0.29, "learning_rate": 1.6375391943795015e-06, "loss": 0.6767, "step": 7070 }, { "epoch": 0.29, "learning_rate": 1.6370425210334514e-06, "loss": 0.6731, "step": 7075 }, { "epoch": 0.29, "learning_rate": 1.6365455830654775e-06, "loss": 0.6974, "step": 7080 }, { "epoch": 0.29, "learning_rate": 1.6360483806820043e-06, "loss": 0.6972, "step": 7085 }, { "epoch": 0.29, "learning_rate": 1.635550914089565e-06, "loss": 0.6974, "step": 7090 }, { "epoch": 0.29, "learning_rate": 1.635053183494803e-06, "loss": 0.6649, "step": 7095 }, { "epoch": 0.29, "learning_rate": 1.6345551891044713e-06, "loss": 0.6754, "step": 7100 }, { "epoch": 0.29, "learning_rate": 1.6340569311254323e-06, "loss": 0.6877, "step": 7105 }, { "epoch": 0.29, "learning_rate": 1.6335584097646585e-06, "loss": 0.6841, "step": 7110 }, { "epoch": 0.29, "learning_rate": 1.6330596252292309e-06, "loss": 0.6675, "step": 7115 }, { "epoch": 0.29, "learning_rate": 1.63256057772634e-06, "loss": 0.6743, "step": 7120 }, { "epoch": 0.29, "learning_rate": 1.6320612674632864e-06, "loss": 0.7045, "step": 7125 }, { "epoch": 0.29, "learning_rate": 1.631561694647479e-06, "loss": 0.6891, "step": 7130 }, { "epoch": 0.29, "learning_rate": 1.6310618594864355e-06, "loss": 0.6722, "step": 7135 }, { "epoch": 0.29, "learning_rate": 1.6305617621877841e-06, "loss": 0.7012, "step": 7140 }, { "epoch": 0.29, "learning_rate": 1.6300614029592602e-06, "loss": 0.665, "step": 7145 }, { "epoch": 0.29, "learning_rate": 1.6295607820087084e-06, "loss": 0.6967, "step": 7150 }, { "epoch": 0.29, "learning_rate": 1.6290598995440835e-06, "loss": 0.7089, "step": 7155 }, { "epoch": 0.29, "learning_rate": 1.628558755773446e-06, "loss": 0.7176, "step": 7160 }, { "epoch": 0.29, "learning_rate": 1.6280573509049679e-06, "loss": 0.6847, "step": 7165 }, { "epoch": 0.29, "learning_rate": 1.6275556851469284e-06, "loss": 0.6968, "step": 7170 }, { "epoch": 0.29, "learning_rate": 1.6270537587077145e-06, "loss": 0.6586, "step": 7175 }, { "epoch": 0.29, "learning_rate": 1.6265515717958222e-06, "loss": 0.7039, "step": 7180 }, { "epoch": 0.29, "learning_rate": 1.6260491246198563e-06, "loss": 0.725, "step": 7185 }, { "epoch": 0.29, "learning_rate": 1.6255464173885275e-06, "loss": 0.7166, "step": 7190 }, { "epoch": 0.29, "learning_rate": 1.6250434503106578e-06, "loss": 0.67, "step": 7195 }, { "epoch": 0.29, "learning_rate": 1.6245402235951742e-06, "loss": 0.6697, "step": 7200 }, { "epoch": 0.29, "eval_loss": 0.6546275019645691, "eval_runtime": 138.8622, "eval_samples_per_second": 17.038, "eval_steps_per_second": 2.845, "step": 7200 }, { "epoch": 0.29, "learning_rate": 1.624036737451113e-06, "loss": 0.6907, "step": 7205 }, { "epoch": 0.29, "learning_rate": 1.623532992087618e-06, "loss": 0.7066, "step": 7210 }, { "epoch": 0.29, "learning_rate": 1.6230289877139403e-06, "loss": 0.7354, "step": 7215 }, { "epoch": 0.29, "learning_rate": 1.6225247245394393e-06, "loss": 0.672, "step": 7220 }, { "epoch": 0.29, "learning_rate": 1.622020202773582e-06, "loss": 0.676, "step": 7225 }, { "epoch": 0.29, "learning_rate": 1.6215154226259414e-06, "loss": 0.6798, "step": 7230 }, { "epoch": 0.29, "learning_rate": 1.621010384306199e-06, "loss": 0.689, "step": 7235 }, { "epoch": 0.29, "learning_rate": 1.620505088024144e-06, "loss": 0.7003, "step": 7240 }, { "epoch": 0.29, "learning_rate": 1.619999533989671e-06, "loss": 0.6669, "step": 7245 }, { "epoch": 0.29, "learning_rate": 1.6194937224127837e-06, "loss": 0.6849, "step": 7250 }, { "epoch": 0.29, "learning_rate": 1.6189876535035919e-06, "loss": 0.6997, "step": 7255 }, { "epoch": 0.29, "learning_rate": 1.6184813274723113e-06, "loss": 0.7292, "step": 7260 }, { "epoch": 0.3, "learning_rate": 1.6179747445292659e-06, "loss": 0.6555, "step": 7265 }, { "epoch": 0.3, "learning_rate": 1.6174679048848856e-06, "loss": 0.672, "step": 7270 }, { "epoch": 0.3, "learning_rate": 1.6169608087497077e-06, "loss": 0.6803, "step": 7275 }, { "epoch": 0.3, "learning_rate": 1.6164534563343752e-06, "loss": 0.6701, "step": 7280 }, { "epoch": 0.3, "learning_rate": 1.615945847849638e-06, "loss": 0.6493, "step": 7285 }, { "epoch": 0.3, "learning_rate": 1.615437983506352e-06, "loss": 0.7276, "step": 7290 }, { "epoch": 0.3, "learning_rate": 1.6149298635154795e-06, "loss": 0.6832, "step": 7295 }, { "epoch": 0.3, "learning_rate": 1.6144214880880895e-06, "loss": 0.6981, "step": 7300 }, { "epoch": 0.3, "learning_rate": 1.6139128574353568e-06, "loss": 0.6715, "step": 7305 }, { "epoch": 0.3, "learning_rate": 1.613403971768562e-06, "loss": 0.6732, "step": 7310 }, { "epoch": 0.3, "learning_rate": 1.6128948312990916e-06, "loss": 0.6906, "step": 7315 }, { "epoch": 0.3, "learning_rate": 1.6123854362384384e-06, "loss": 0.682, "step": 7320 }, { "epoch": 0.3, "learning_rate": 1.6118757867982002e-06, "loss": 0.701, "step": 7325 }, { "epoch": 0.3, "learning_rate": 1.6113658831900816e-06, "loss": 0.6969, "step": 7330 }, { "epoch": 0.3, "learning_rate": 1.6108557256258916e-06, "loss": 0.6774, "step": 7335 }, { "epoch": 0.3, "learning_rate": 1.6103453143175458e-06, "loss": 0.6849, "step": 7340 }, { "epoch": 0.3, "learning_rate": 1.6098346494770642e-06, "loss": 0.6714, "step": 7345 }, { "epoch": 0.3, "learning_rate": 1.6093237313165722e-06, "loss": 0.6781, "step": 7350 }, { "epoch": 0.3, "learning_rate": 1.6088125600483014e-06, "loss": 0.657, "step": 7355 }, { "epoch": 0.3, "learning_rate": 1.6083011358845878e-06, "loss": 0.6867, "step": 7360 }, { "epoch": 0.3, "learning_rate": 1.6077894590378722e-06, "loss": 0.6977, "step": 7365 }, { "epoch": 0.3, "learning_rate": 1.607277529720701e-06, "loss": 0.6775, "step": 7370 }, { "epoch": 0.3, "learning_rate": 1.6067653481457251e-06, "loss": 0.6962, "step": 7375 }, { "epoch": 0.3, "learning_rate": 1.6062529145257e-06, "loss": 0.6801, "step": 7380 }, { "epoch": 0.3, "learning_rate": 1.6057402290734867e-06, "loss": 0.6917, "step": 7385 }, { "epoch": 0.3, "learning_rate": 1.6052272920020502e-06, "loss": 0.6858, "step": 7390 }, { "epoch": 0.3, "learning_rate": 1.6047141035244596e-06, "loss": 0.689, "step": 7395 }, { "epoch": 0.3, "learning_rate": 1.6042006638538893e-06, "loss": 0.684, "step": 7400 }, { "epoch": 0.3, "learning_rate": 1.6036869732036175e-06, "loss": 0.6685, "step": 7405 }, { "epoch": 0.3, "learning_rate": 1.603173031787027e-06, "loss": 0.7001, "step": 7410 }, { "epoch": 0.3, "learning_rate": 1.602658839817605e-06, "loss": 0.6947, "step": 7415 }, { "epoch": 0.3, "learning_rate": 1.6021443975089415e-06, "loss": 0.6384, "step": 7420 }, { "epoch": 0.3, "learning_rate": 1.601629705074732e-06, "loss": 0.6828, "step": 7425 }, { "epoch": 0.3, "learning_rate": 1.6011147627287746e-06, "loss": 0.6856, "step": 7430 }, { "epoch": 0.3, "learning_rate": 1.6005995706849726e-06, "loss": 0.6655, "step": 7435 }, { "epoch": 0.3, "learning_rate": 1.6000841291573322e-06, "loss": 0.7021, "step": 7440 }, { "epoch": 0.3, "learning_rate": 1.599568438359963e-06, "loss": 0.6889, "step": 7445 }, { "epoch": 0.3, "learning_rate": 1.5990524985070785e-06, "loss": 0.6917, "step": 7450 }, { "epoch": 0.3, "learning_rate": 1.598536309812996e-06, "loss": 0.7008, "step": 7455 }, { "epoch": 0.3, "learning_rate": 1.598019872492135e-06, "loss": 0.6614, "step": 7460 }, { "epoch": 0.3, "learning_rate": 1.59750318675902e-06, "loss": 0.6887, "step": 7465 }, { "epoch": 0.3, "learning_rate": 1.5969862528282771e-06, "loss": 0.6733, "step": 7470 }, { "epoch": 0.3, "learning_rate": 1.5964690709146367e-06, "loss": 0.6935, "step": 7475 }, { "epoch": 0.3, "learning_rate": 1.5959516412329314e-06, "loss": 0.6731, "step": 7480 }, { "epoch": 0.3, "learning_rate": 1.5954339639980967e-06, "loss": 0.6846, "step": 7485 }, { "epoch": 0.3, "learning_rate": 1.5949160394251718e-06, "loss": 0.7281, "step": 7490 }, { "epoch": 0.3, "learning_rate": 1.5943978677292976e-06, "loss": 0.7238, "step": 7495 }, { "epoch": 0.3, "learning_rate": 1.593879449125718e-06, "loss": 0.6682, "step": 7500 }, { "epoch": 0.3, "learning_rate": 1.5933607838297804e-06, "loss": 0.6864, "step": 7505 }, { "epoch": 0.31, "learning_rate": 1.5928418720569332e-06, "loss": 0.6775, "step": 7510 }, { "epoch": 0.31, "learning_rate": 1.5923227140227278e-06, "loss": 0.7329, "step": 7515 }, { "epoch": 0.31, "learning_rate": 1.5918033099428182e-06, "loss": 0.6964, "step": 7520 }, { "epoch": 0.31, "learning_rate": 1.59128366003296e-06, "loss": 0.6862, "step": 7525 }, { "epoch": 0.31, "learning_rate": 1.5907637645090117e-06, "loss": 0.6552, "step": 7530 }, { "epoch": 0.31, "learning_rate": 1.5902436235869333e-06, "loss": 0.6738, "step": 7535 }, { "epoch": 0.31, "learning_rate": 1.5897232374827862e-06, "loss": 0.7062, "step": 7540 }, { "epoch": 0.31, "learning_rate": 1.589202606412735e-06, "loss": 0.6975, "step": 7545 }, { "epoch": 0.31, "learning_rate": 1.5886817305930452e-06, "loss": 0.6795, "step": 7550 }, { "epoch": 0.31, "learning_rate": 1.5881606102400836e-06, "loss": 0.6633, "step": 7555 }, { "epoch": 0.31, "learning_rate": 1.5876392455703198e-06, "loss": 0.6959, "step": 7560 }, { "epoch": 0.31, "learning_rate": 1.5871176368003231e-06, "loss": 0.6719, "step": 7565 }, { "epoch": 0.31, "learning_rate": 1.5865957841467666e-06, "loss": 0.7142, "step": 7570 }, { "epoch": 0.31, "learning_rate": 1.5860736878264222e-06, "loss": 0.6729, "step": 7575 }, { "epoch": 0.31, "learning_rate": 1.5855513480561649e-06, "loss": 0.6777, "step": 7580 }, { "epoch": 0.31, "learning_rate": 1.5850287650529698e-06, "loss": 0.6729, "step": 7585 }, { "epoch": 0.31, "learning_rate": 1.5845059390339134e-06, "loss": 0.7104, "step": 7590 }, { "epoch": 0.31, "learning_rate": 1.5839828702161727e-06, "loss": 0.6844, "step": 7595 }, { "epoch": 0.31, "learning_rate": 1.5834595588170266e-06, "loss": 0.6837, "step": 7600 }, { "epoch": 0.31, "learning_rate": 1.5829360050538537e-06, "loss": 0.7076, "step": 7605 }, { "epoch": 0.31, "learning_rate": 1.5824122091441337e-06, "loss": 0.6906, "step": 7610 }, { "epoch": 0.31, "learning_rate": 1.5818881713054469e-06, "loss": 0.7267, "step": 7615 }, { "epoch": 0.31, "learning_rate": 1.5813638917554742e-06, "loss": 0.7293, "step": 7620 }, { "epoch": 0.31, "learning_rate": 1.5808393707119967e-06, "loss": 0.693, "step": 7625 }, { "epoch": 0.31, "learning_rate": 1.5803146083928956e-06, "loss": 0.7122, "step": 7630 }, { "epoch": 0.31, "learning_rate": 1.579789605016153e-06, "loss": 0.6895, "step": 7635 }, { "epoch": 0.31, "learning_rate": 1.5792643607998506e-06, "loss": 0.6916, "step": 7640 }, { "epoch": 0.31, "learning_rate": 1.57873887596217e-06, "loss": 0.6745, "step": 7645 }, { "epoch": 0.31, "learning_rate": 1.5782131507213934e-06, "loss": 0.7242, "step": 7650 }, { "epoch": 0.31, "learning_rate": 1.5776871852959026e-06, "loss": 0.714, "step": 7655 }, { "epoch": 0.31, "learning_rate": 1.5771609799041788e-06, "loss": 0.7086, "step": 7660 }, { "epoch": 0.31, "learning_rate": 1.576634534764803e-06, "loss": 0.738, "step": 7665 }, { "epoch": 0.31, "learning_rate": 1.5761078500964562e-06, "loss": 0.669, "step": 7670 }, { "epoch": 0.31, "learning_rate": 1.5755809261179185e-06, "loss": 0.6587, "step": 7675 }, { "epoch": 0.31, "learning_rate": 1.5750537630480696e-06, "loss": 0.676, "step": 7680 }, { "epoch": 0.31, "learning_rate": 1.5745263611058886e-06, "loss": 0.6838, "step": 7685 }, { "epoch": 0.31, "learning_rate": 1.5739987205104535e-06, "loss": 0.6182, "step": 7690 }, { "epoch": 0.31, "learning_rate": 1.5734708414809415e-06, "loss": 0.6497, "step": 7695 }, { "epoch": 0.31, "learning_rate": 1.572942724236629e-06, "loss": 0.6681, "step": 7700 }, { "epoch": 0.31, "learning_rate": 1.5724143689968915e-06, "loss": 0.7043, "step": 7705 }, { "epoch": 0.31, "learning_rate": 1.5718857759812033e-06, "loss": 0.6924, "step": 7710 }, { "epoch": 0.31, "learning_rate": 1.571356945409137e-06, "loss": 0.6852, "step": 7715 }, { "epoch": 0.31, "learning_rate": 1.570827877500364e-06, "loss": 0.7008, "step": 7720 }, { "epoch": 0.31, "learning_rate": 1.5702985724746552e-06, "loss": 0.7029, "step": 7725 }, { "epoch": 0.31, "learning_rate": 1.5697690305518787e-06, "loss": 0.6684, "step": 7730 }, { "epoch": 0.31, "learning_rate": 1.569239251952002e-06, "loss": 0.7089, "step": 7735 }, { "epoch": 0.31, "learning_rate": 1.5687092368950908e-06, "loss": 0.6756, "step": 7740 }, { "epoch": 0.31, "learning_rate": 1.5681789856013076e-06, "loss": 0.6874, "step": 7745 }, { "epoch": 0.31, "learning_rate": 1.5676484982909154e-06, "loss": 0.7055, "step": 7750 }, { "epoch": 0.31, "learning_rate": 1.5671177751842733e-06, "loss": 0.7159, "step": 7755 }, { "epoch": 0.32, "learning_rate": 1.5665868165018395e-06, "loss": 0.6968, "step": 7760 }, { "epoch": 0.32, "learning_rate": 1.566055622464169e-06, "loss": 0.6533, "step": 7765 }, { "epoch": 0.32, "learning_rate": 1.565524193291916e-06, "loss": 0.6589, "step": 7770 }, { "epoch": 0.32, "learning_rate": 1.564992529205831e-06, "loss": 0.7165, "step": 7775 }, { "epoch": 0.32, "learning_rate": 1.5644606304267627e-06, "loss": 0.6588, "step": 7780 }, { "epoch": 0.32, "learning_rate": 1.5639284971756574e-06, "loss": 0.7004, "step": 7785 }, { "epoch": 0.32, "learning_rate": 1.5633961296735585e-06, "loss": 0.7019, "step": 7790 }, { "epoch": 0.32, "learning_rate": 1.562863528141607e-06, "loss": 0.6711, "step": 7795 }, { "epoch": 0.32, "learning_rate": 1.5623306928010408e-06, "loss": 0.6558, "step": 7800 }, { "epoch": 0.32, "learning_rate": 1.561797623873195e-06, "loss": 0.7162, "step": 7805 }, { "epoch": 0.32, "learning_rate": 1.5612643215795017e-06, "loss": 0.6986, "step": 7810 }, { "epoch": 0.32, "learning_rate": 1.5607307861414905e-06, "loss": 0.717, "step": 7815 }, { "epoch": 0.32, "learning_rate": 1.560197017780787e-06, "loss": 0.7249, "step": 7820 }, { "epoch": 0.32, "learning_rate": 1.5596630167191138e-06, "loss": 0.6965, "step": 7825 }, { "epoch": 0.32, "learning_rate": 1.5591287831782908e-06, "loss": 0.679, "step": 7830 }, { "epoch": 0.32, "learning_rate": 1.5585943173802333e-06, "loss": 0.6812, "step": 7835 }, { "epoch": 0.32, "learning_rate": 1.5580596195469547e-06, "loss": 0.6957, "step": 7840 }, { "epoch": 0.32, "learning_rate": 1.5575246899005629e-06, "loss": 0.6792, "step": 7845 }, { "epoch": 0.32, "learning_rate": 1.5569895286632634e-06, "loss": 0.6675, "step": 7850 }, { "epoch": 0.32, "learning_rate": 1.5564541360573578e-06, "loss": 0.6931, "step": 7855 }, { "epoch": 0.32, "learning_rate": 1.5559185123052427e-06, "loss": 0.7044, "step": 7860 }, { "epoch": 0.32, "learning_rate": 1.5553826576294127e-06, "loss": 0.6978, "step": 7865 }, { "epoch": 0.32, "learning_rate": 1.5548465722524561e-06, "loss": 0.693, "step": 7870 }, { "epoch": 0.32, "learning_rate": 1.554310256397059e-06, "loss": 0.7044, "step": 7875 }, { "epoch": 0.32, "learning_rate": 1.5537737102860015e-06, "loss": 0.6841, "step": 7880 }, { "epoch": 0.32, "learning_rate": 1.5532369341421609e-06, "loss": 0.674, "step": 7885 }, { "epoch": 0.32, "learning_rate": 1.5526999281885088e-06, "loss": 0.6544, "step": 7890 }, { "epoch": 0.32, "learning_rate": 1.552162692648113e-06, "loss": 0.6951, "step": 7895 }, { "epoch": 0.32, "learning_rate": 1.551625227744137e-06, "loss": 0.6969, "step": 7900 }, { "epoch": 0.32, "learning_rate": 1.5510875336998382e-06, "loss": 0.6827, "step": 7905 }, { "epoch": 0.32, "learning_rate": 1.5505496107385704e-06, "loss": 0.6296, "step": 7910 }, { "epoch": 0.32, "learning_rate": 1.550011459083782e-06, "loss": 0.6884, "step": 7915 }, { "epoch": 0.32, "learning_rate": 1.549473078959017e-06, "loss": 0.6726, "step": 7920 }, { "epoch": 0.32, "learning_rate": 1.548934470587913e-06, "loss": 0.6732, "step": 7925 }, { "epoch": 0.32, "learning_rate": 1.548395634194204e-06, "loss": 0.6986, "step": 7930 }, { "epoch": 0.32, "learning_rate": 1.5478565700017174e-06, "loss": 0.7457, "step": 7935 }, { "epoch": 0.32, "learning_rate": 1.547317278234376e-06, "loss": 0.713, "step": 7940 }, { "epoch": 0.32, "learning_rate": 1.5467777591161973e-06, "loss": 0.6842, "step": 7945 }, { "epoch": 0.32, "learning_rate": 1.5462380128712921e-06, "loss": 0.7125, "step": 7950 }, { "epoch": 0.32, "learning_rate": 1.545698039723867e-06, "loss": 0.7007, "step": 7955 }, { "epoch": 0.32, "learning_rate": 1.5451578398982216e-06, "loss": 0.6896, "step": 7960 }, { "epoch": 0.32, "learning_rate": 1.5446174136187503e-06, "loss": 0.7371, "step": 7965 }, { "epoch": 0.32, "learning_rate": 1.544076761109942e-06, "loss": 0.6922, "step": 7970 }, { "epoch": 0.32, "learning_rate": 1.5435358825963784e-06, "loss": 0.7154, "step": 7975 }, { "epoch": 0.32, "learning_rate": 1.542994778302736e-06, "loss": 0.7194, "step": 7980 }, { "epoch": 0.32, "learning_rate": 1.5424534484537847e-06, "loss": 0.6813, "step": 7985 }, { "epoch": 0.32, "learning_rate": 1.5419118932743883e-06, "loss": 0.7203, "step": 7990 }, { "epoch": 0.32, "learning_rate": 1.5413701129895045e-06, "loss": 0.6675, "step": 7995 }, { "epoch": 0.32, "learning_rate": 1.5408281078241835e-06, "loss": 0.7152, "step": 8000 }, { "epoch": 0.32, "eval_loss": 0.6539024114608765, "eval_runtime": 138.566, "eval_samples_per_second": 17.075, "eval_steps_per_second": 2.851, "step": 8000 }, { "epoch": 0.33, "learning_rate": 1.5402858780035697e-06, "loss": 0.6859, "step": 8005 }, { "epoch": 0.33, "learning_rate": 1.5397434237529012e-06, "loss": 0.7315, "step": 8010 }, { "epoch": 0.33, "learning_rate": 1.5392007452975077e-06, "loss": 0.7081, "step": 8015 }, { "epoch": 0.33, "learning_rate": 1.5386578428628142e-06, "loss": 0.682, "step": 8020 }, { "epoch": 0.33, "learning_rate": 1.5381147166743369e-06, "loss": 0.6741, "step": 8025 }, { "epoch": 0.33, "learning_rate": 1.5375713669576857e-06, "loss": 0.6979, "step": 8030 }, { "epoch": 0.33, "learning_rate": 1.5370277939385644e-06, "loss": 0.6927, "step": 8035 }, { "epoch": 0.33, "learning_rate": 1.536483997842767e-06, "loss": 0.6856, "step": 8040 }, { "epoch": 0.33, "learning_rate": 1.5359399788961826e-06, "loss": 0.6683, "step": 8045 }, { "epoch": 0.33, "learning_rate": 1.5353957373247917e-06, "loss": 0.6646, "step": 8050 }, { "epoch": 0.33, "learning_rate": 1.5348512733546674e-06, "loss": 0.6684, "step": 8055 }, { "epoch": 0.33, "learning_rate": 1.5343065872119759e-06, "loss": 0.6741, "step": 8060 }, { "epoch": 0.33, "learning_rate": 1.5337616791229744e-06, "loss": 0.6936, "step": 8065 }, { "epoch": 0.33, "learning_rate": 1.5332165493140133e-06, "loss": 0.6646, "step": 8070 }, { "epoch": 0.33, "learning_rate": 1.5326711980115343e-06, "loss": 0.6936, "step": 8075 }, { "epoch": 0.33, "learning_rate": 1.5321256254420724e-06, "loss": 0.7009, "step": 8080 }, { "epoch": 0.33, "learning_rate": 1.5315798318322532e-06, "loss": 0.7183, "step": 8085 }, { "epoch": 0.33, "learning_rate": 1.5310338174087946e-06, "loss": 0.6853, "step": 8090 }, { "epoch": 0.33, "learning_rate": 1.5304875823985066e-06, "loss": 0.6851, "step": 8095 }, { "epoch": 0.33, "learning_rate": 1.5299411270282898e-06, "loss": 0.6607, "step": 8100 }, { "epoch": 0.33, "learning_rate": 1.5293944515251376e-06, "loss": 0.6715, "step": 8105 }, { "epoch": 0.33, "learning_rate": 1.5288475561161342e-06, "loss": 0.6525, "step": 8110 }, { "epoch": 0.33, "learning_rate": 1.5283004410284549e-06, "loss": 0.6958, "step": 8115 }, { "epoch": 0.33, "learning_rate": 1.5277531064893669e-06, "loss": 0.7071, "step": 8120 }, { "epoch": 0.33, "learning_rate": 1.5272055527262278e-06, "loss": 0.683, "step": 8125 }, { "epoch": 0.33, "learning_rate": 1.526657779966487e-06, "loss": 0.71, "step": 8130 }, { "epoch": 0.33, "learning_rate": 1.5261097884376848e-06, "loss": 0.683, "step": 8135 }, { "epoch": 0.33, "learning_rate": 1.5255615783674512e-06, "loss": 0.6911, "step": 8140 }, { "epoch": 0.33, "learning_rate": 1.5250131499835088e-06, "loss": 0.7089, "step": 8145 }, { "epoch": 0.33, "learning_rate": 1.5244645035136694e-06, "loss": 0.6528, "step": 8150 }, { "epoch": 0.33, "learning_rate": 1.5239156391858363e-06, "loss": 0.6626, "step": 8155 }, { "epoch": 0.33, "learning_rate": 1.523366557228003e-06, "loss": 0.6674, "step": 8160 }, { "epoch": 0.33, "learning_rate": 1.5228172578682531e-06, "loss": 0.6858, "step": 8165 }, { "epoch": 0.33, "learning_rate": 1.5222677413347612e-06, "loss": 0.6944, "step": 8170 }, { "epoch": 0.33, "learning_rate": 1.521718007855791e-06, "loss": 0.6623, "step": 8175 }, { "epoch": 0.33, "learning_rate": 1.5211680576596976e-06, "loss": 0.6764, "step": 8180 }, { "epoch": 0.33, "learning_rate": 1.5206178909749254e-06, "loss": 0.7022, "step": 8185 }, { "epoch": 0.33, "learning_rate": 1.5200675080300086e-06, "loss": 0.6437, "step": 8190 }, { "epoch": 0.33, "learning_rate": 1.519516909053572e-06, "loss": 0.6888, "step": 8195 }, { "epoch": 0.33, "learning_rate": 1.518966094274329e-06, "loss": 0.6837, "step": 8200 }, { "epoch": 0.33, "learning_rate": 1.518415063921084e-06, "loss": 0.7228, "step": 8205 }, { "epoch": 0.33, "learning_rate": 1.5178638182227292e-06, "loss": 0.6647, "step": 8210 }, { "epoch": 0.33, "learning_rate": 1.5173123574082482e-06, "loss": 0.6992, "step": 8215 }, { "epoch": 0.33, "learning_rate": 1.5167606817067129e-06, "loss": 0.6828, "step": 8220 }, { "epoch": 0.33, "learning_rate": 1.5162087913472844e-06, "loss": 0.7303, "step": 8225 }, { "epoch": 0.33, "learning_rate": 1.5156566865592128e-06, "loss": 0.6794, "step": 8230 }, { "epoch": 0.33, "learning_rate": 1.5151043675718383e-06, "loss": 0.6442, "step": 8235 }, { "epoch": 0.33, "learning_rate": 1.5145518346145887e-06, "loss": 0.6669, "step": 8240 }, { "epoch": 0.33, "learning_rate": 1.5139990879169822e-06, "loss": 0.6902, "step": 8245 }, { "epoch": 0.34, "learning_rate": 1.513446127708624e-06, "loss": 0.6503, "step": 8250 }, { "epoch": 0.34, "learning_rate": 1.5128929542192102e-06, "loss": 0.6971, "step": 8255 }, { "epoch": 0.34, "learning_rate": 1.512339567678523e-06, "loss": 0.7052, "step": 8260 }, { "epoch": 0.34, "learning_rate": 1.511785968316435e-06, "loss": 0.6673, "step": 8265 }, { "epoch": 0.34, "learning_rate": 1.5112321563629066e-06, "loss": 0.6816, "step": 8270 }, { "epoch": 0.34, "learning_rate": 1.5106781320479862e-06, "loss": 0.6708, "step": 8275 }, { "epoch": 0.34, "learning_rate": 1.5101238956018109e-06, "loss": 0.7032, "step": 8280 }, { "epoch": 0.34, "learning_rate": 1.5095694472546058e-06, "loss": 0.7008, "step": 8285 }, { "epoch": 0.34, "learning_rate": 1.5090147872366835e-06, "loss": 0.719, "step": 8290 }, { "epoch": 0.34, "learning_rate": 1.5084599157784453e-06, "loss": 0.7094, "step": 8295 }, { "epoch": 0.34, "learning_rate": 1.5079048331103797e-06, "loss": 0.7306, "step": 8300 }, { "epoch": 0.34, "learning_rate": 1.5073495394630636e-06, "loss": 0.7004, "step": 8305 }, { "epoch": 0.34, "learning_rate": 1.5067940350671606e-06, "loss": 0.7107, "step": 8310 }, { "epoch": 0.34, "learning_rate": 1.506238320153423e-06, "loss": 0.7033, "step": 8315 }, { "epoch": 0.34, "learning_rate": 1.5056823949526898e-06, "loss": 0.6855, "step": 8320 }, { "epoch": 0.34, "learning_rate": 1.5051262596958872e-06, "loss": 0.7119, "step": 8325 }, { "epoch": 0.34, "learning_rate": 1.5045699146140289e-06, "loss": 0.6853, "step": 8330 }, { "epoch": 0.34, "learning_rate": 1.5040133599382162e-06, "loss": 0.71, "step": 8335 }, { "epoch": 0.34, "learning_rate": 1.503456595899637e-06, "loss": 0.6527, "step": 8340 }, { "epoch": 0.34, "learning_rate": 1.5028996227295664e-06, "loss": 0.6692, "step": 8345 }, { "epoch": 0.34, "learning_rate": 1.5023424406593654e-06, "loss": 0.6568, "step": 8350 }, { "epoch": 0.34, "learning_rate": 1.5017850499204835e-06, "loss": 0.658, "step": 8355 }, { "epoch": 0.34, "learning_rate": 1.501227450744455e-06, "loss": 0.6895, "step": 8360 }, { "epoch": 0.34, "learning_rate": 1.5006696433629032e-06, "loss": 0.7062, "step": 8365 }, { "epoch": 0.34, "learning_rate": 1.5001116280075353e-06, "loss": 0.6883, "step": 8370 }, { "epoch": 0.34, "learning_rate": 1.499553404910146e-06, "loss": 0.666, "step": 8375 }, { "epoch": 0.34, "learning_rate": 1.4989949743026169e-06, "loss": 0.709, "step": 8380 }, { "epoch": 0.34, "learning_rate": 1.4984363364169145e-06, "loss": 0.7225, "step": 8385 }, { "epoch": 0.34, "learning_rate": 1.4978774914850933e-06, "loss": 0.6657, "step": 8390 }, { "epoch": 0.34, "learning_rate": 1.4973184397392915e-06, "loss": 0.6774, "step": 8395 }, { "epoch": 0.34, "learning_rate": 1.4967591814117347e-06, "loss": 0.6617, "step": 8400 }, { "epoch": 0.34, "learning_rate": 1.496199716734734e-06, "loss": 0.669, "step": 8405 }, { "epoch": 0.34, "learning_rate": 1.4956400459406862e-06, "loss": 0.6798, "step": 8410 }, { "epoch": 0.34, "learning_rate": 1.4950801692620735e-06, "loss": 0.6421, "step": 8415 }, { "epoch": 0.34, "learning_rate": 1.494520086931464e-06, "loss": 0.6742, "step": 8420 }, { "epoch": 0.34, "learning_rate": 1.4939597991815107e-06, "loss": 0.6478, "step": 8425 }, { "epoch": 0.34, "learning_rate": 1.493399306244953e-06, "loss": 0.6817, "step": 8430 }, { "epoch": 0.34, "learning_rate": 1.492838608354614e-06, "loss": 0.7469, "step": 8435 }, { "epoch": 0.34, "learning_rate": 1.4922777057434031e-06, "loss": 0.6713, "step": 8440 }, { "epoch": 0.34, "learning_rate": 1.4917165986443142e-06, "loss": 0.7093, "step": 8445 }, { "epoch": 0.34, "learning_rate": 1.4911552872904266e-06, "loss": 0.6825, "step": 8450 }, { "epoch": 0.34, "learning_rate": 1.4905937719149035e-06, "loss": 0.664, "step": 8455 }, { "epoch": 0.34, "learning_rate": 1.4900320527509942e-06, "loss": 0.6827, "step": 8460 }, { "epoch": 0.34, "learning_rate": 1.489470130032032e-06, "loss": 0.7093, "step": 8465 }, { "epoch": 0.34, "learning_rate": 1.488908003991434e-06, "loss": 0.7162, "step": 8470 }, { "epoch": 0.34, "learning_rate": 1.4883456748627032e-06, "loss": 0.6738, "step": 8475 }, { "epoch": 0.34, "learning_rate": 1.4877831428794258e-06, "loss": 0.6977, "step": 8480 }, { "epoch": 0.34, "learning_rate": 1.4872204082752728e-06, "loss": 0.7097, "step": 8485 }, { "epoch": 0.34, "learning_rate": 1.4866574712839994e-06, "loss": 0.6716, "step": 8490 }, { "epoch": 0.35, "learning_rate": 1.4860943321394443e-06, "loss": 0.6891, "step": 8495 }, { "epoch": 0.35, "learning_rate": 1.4855309910755313e-06, "loss": 0.6471, "step": 8500 }, { "epoch": 0.35, "learning_rate": 1.4849674483262668e-06, "loss": 0.6544, "step": 8505 }, { "epoch": 0.35, "learning_rate": 1.4844037041257416e-06, "loss": 0.6554, "step": 8510 }, { "epoch": 0.35, "learning_rate": 1.4838397587081307e-06, "loss": 0.694, "step": 8515 }, { "epoch": 0.35, "learning_rate": 1.4832756123076912e-06, "loss": 0.6488, "step": 8520 }, { "epoch": 0.35, "learning_rate": 1.4827112651587656e-06, "loss": 0.6885, "step": 8525 }, { "epoch": 0.35, "learning_rate": 1.482146717495778e-06, "loss": 0.6827, "step": 8530 }, { "epoch": 0.35, "learning_rate": 1.481581969553237e-06, "loss": 0.7017, "step": 8535 }, { "epoch": 0.35, "learning_rate": 1.481017021565734e-06, "loss": 0.681, "step": 8540 }, { "epoch": 0.35, "learning_rate": 1.4804518737679432e-06, "loss": 0.6228, "step": 8545 }, { "epoch": 0.35, "learning_rate": 1.4798865263946223e-06, "loss": 0.7208, "step": 8550 }, { "epoch": 0.35, "learning_rate": 1.4793209796806117e-06, "loss": 0.6627, "step": 8555 }, { "epoch": 0.35, "learning_rate": 1.4787552338608341e-06, "loss": 0.6698, "step": 8560 }, { "epoch": 0.35, "learning_rate": 1.4781892891702965e-06, "loss": 0.6631, "step": 8565 }, { "epoch": 0.35, "learning_rate": 1.4776231458440862e-06, "loss": 0.6991, "step": 8570 }, { "epoch": 0.35, "learning_rate": 1.477056804117375e-06, "loss": 0.65, "step": 8575 }, { "epoch": 0.35, "learning_rate": 1.476490264225416e-06, "loss": 0.6846, "step": 8580 }, { "epoch": 0.35, "learning_rate": 1.475923526403545e-06, "loss": 0.6916, "step": 8585 }, { "epoch": 0.35, "learning_rate": 1.47535659088718e-06, "loss": 0.654, "step": 8590 }, { "epoch": 0.35, "learning_rate": 1.4747894579118208e-06, "loss": 0.7077, "step": 8595 }, { "epoch": 0.35, "learning_rate": 1.47422212771305e-06, "loss": 0.6887, "step": 8600 }, { "epoch": 0.35, "learning_rate": 1.4736546005265314e-06, "loss": 0.6919, "step": 8605 }, { "epoch": 0.35, "learning_rate": 1.4730868765880109e-06, "loss": 0.6471, "step": 8610 }, { "epoch": 0.35, "learning_rate": 1.4725189561333158e-06, "loss": 0.706, "step": 8615 }, { "epoch": 0.35, "learning_rate": 1.4719508393983555e-06, "loss": 0.6916, "step": 8620 }, { "epoch": 0.35, "learning_rate": 1.471382526619121e-06, "loss": 0.6624, "step": 8625 }, { "epoch": 0.35, "learning_rate": 1.4708140180316843e-06, "loss": 0.6888, "step": 8630 }, { "epoch": 0.35, "learning_rate": 1.470245313872199e-06, "loss": 0.6796, "step": 8635 }, { "epoch": 0.35, "learning_rate": 1.4696764143768997e-06, "loss": 0.6776, "step": 8640 }, { "epoch": 0.35, "learning_rate": 1.469107319782102e-06, "loss": 0.7217, "step": 8645 }, { "epoch": 0.35, "learning_rate": 1.4685380303242037e-06, "loss": 0.6858, "step": 8650 }, { "epoch": 0.35, "learning_rate": 1.4679685462396817e-06, "loss": 0.6937, "step": 8655 }, { "epoch": 0.35, "learning_rate": 1.467398867765096e-06, "loss": 0.6747, "step": 8660 }, { "epoch": 0.35, "learning_rate": 1.4668289951370848e-06, "loss": 0.6699, "step": 8665 }, { "epoch": 0.35, "learning_rate": 1.4662589285923686e-06, "loss": 0.7111, "step": 8670 }, { "epoch": 0.35, "learning_rate": 1.4656886683677486e-06, "loss": 0.667, "step": 8675 }, { "epoch": 0.35, "learning_rate": 1.4651182147001055e-06, "loss": 0.6586, "step": 8680 }, { "epoch": 0.35, "learning_rate": 1.464547567826401e-06, "loss": 0.6996, "step": 8685 }, { "epoch": 0.35, "learning_rate": 1.4639767279836766e-06, "loss": 0.7181, "step": 8690 }, { "epoch": 0.35, "learning_rate": 1.463405695409054e-06, "loss": 0.6878, "step": 8695 }, { "epoch": 0.35, "learning_rate": 1.462834470339736e-06, "loss": 0.6247, "step": 8700 }, { "epoch": 0.35, "learning_rate": 1.4622630530130037e-06, "loss": 0.7024, "step": 8705 }, { "epoch": 0.35, "learning_rate": 1.4616914436662195e-06, "loss": 0.6748, "step": 8710 }, { "epoch": 0.35, "learning_rate": 1.4611196425368247e-06, "loss": 0.6877, "step": 8715 }, { "epoch": 0.35, "learning_rate": 1.46054764986234e-06, "loss": 0.663, "step": 8720 }, { "epoch": 0.35, "learning_rate": 1.4599754658803671e-06, "loss": 0.6649, "step": 8725 }, { "epoch": 0.35, "learning_rate": 1.4594030908285858e-06, "loss": 0.6701, "step": 8730 }, { "epoch": 0.35, "learning_rate": 1.4588305249447557e-06, "loss": 0.6684, "step": 8735 }, { "epoch": 0.36, "learning_rate": 1.4582577684667156e-06, "loss": 0.7104, "step": 8740 }, { "epoch": 0.36, "learning_rate": 1.457684821632384e-06, "loss": 0.7183, "step": 8745 }, { "epoch": 0.36, "learning_rate": 1.457111684679757e-06, "loss": 0.7016, "step": 8750 }, { "epoch": 0.36, "learning_rate": 1.4565383578469119e-06, "loss": 0.6959, "step": 8755 }, { "epoch": 0.36, "learning_rate": 1.4559648413720033e-06, "loss": 0.7299, "step": 8760 }, { "epoch": 0.36, "learning_rate": 1.4553911354932646e-06, "loss": 0.6539, "step": 8765 }, { "epoch": 0.36, "learning_rate": 1.4548172404490089e-06, "loss": 0.685, "step": 8770 }, { "epoch": 0.36, "learning_rate": 1.4542431564776265e-06, "loss": 0.7004, "step": 8775 }, { "epoch": 0.36, "learning_rate": 1.453668883817587e-06, "loss": 0.6742, "step": 8780 }, { "epoch": 0.36, "learning_rate": 1.453094422707439e-06, "loss": 0.6744, "step": 8785 }, { "epoch": 0.36, "learning_rate": 1.4525197733858077e-06, "loss": 0.6871, "step": 8790 }, { "epoch": 0.36, "learning_rate": 1.451944936091398e-06, "loss": 0.6567, "step": 8795 }, { "epoch": 0.36, "learning_rate": 1.4513699110629921e-06, "loss": 0.6838, "step": 8800 }, { "epoch": 0.36, "eval_loss": 0.6491459012031555, "eval_runtime": 138.8275, "eval_samples_per_second": 17.043, "eval_steps_per_second": 2.845, "step": 8800 }, { "epoch": 0.36, "learning_rate": 1.45079469853945e-06, "loss": 0.6431, "step": 8805 }, { "epoch": 0.36, "learning_rate": 1.4502192987597113e-06, "loss": 0.6606, "step": 8810 }, { "epoch": 0.36, "learning_rate": 1.4496437119627905e-06, "loss": 0.6917, "step": 8815 }, { "epoch": 0.36, "learning_rate": 1.4490679383877825e-06, "loss": 0.6826, "step": 8820 }, { "epoch": 0.36, "learning_rate": 1.4484919782738581e-06, "loss": 0.6904, "step": 8825 }, { "epoch": 0.36, "learning_rate": 1.4479158318602658e-06, "loss": 0.6781, "step": 8830 }, { "epoch": 0.36, "learning_rate": 1.4473394993863325e-06, "loss": 0.7099, "step": 8835 }, { "epoch": 0.36, "learning_rate": 1.4467629810914615e-06, "loss": 0.6993, "step": 8840 }, { "epoch": 0.36, "learning_rate": 1.4461862772151333e-06, "loss": 0.6606, "step": 8845 }, { "epoch": 0.36, "learning_rate": 1.4456093879969057e-06, "loss": 0.7129, "step": 8850 }, { "epoch": 0.36, "learning_rate": 1.4450323136764136e-06, "loss": 0.6789, "step": 8855 }, { "epoch": 0.36, "learning_rate": 1.4444550544933684e-06, "loss": 0.6828, "step": 8860 }, { "epoch": 0.36, "learning_rate": 1.443877610687559e-06, "loss": 0.701, "step": 8865 }, { "epoch": 0.36, "learning_rate": 1.4432999824988503e-06, "loss": 0.6962, "step": 8870 }, { "epoch": 0.36, "learning_rate": 1.442722170167184e-06, "loss": 0.6399, "step": 8875 }, { "epoch": 0.36, "learning_rate": 1.4421441739325782e-06, "loss": 0.6943, "step": 8880 }, { "epoch": 0.36, "learning_rate": 1.4415659940351275e-06, "loss": 0.6932, "step": 8885 }, { "epoch": 0.36, "learning_rate": 1.440987630715003e-06, "loss": 0.7096, "step": 8890 }, { "epoch": 0.36, "learning_rate": 1.4404090842124519e-06, "loss": 0.6652, "step": 8895 }, { "epoch": 0.36, "learning_rate": 1.439830354767797e-06, "loss": 0.6988, "step": 8900 }, { "epoch": 0.36, "learning_rate": 1.4392514426214378e-06, "loss": 0.6862, "step": 8905 }, { "epoch": 0.36, "learning_rate": 1.4386723480138491e-06, "loss": 0.7313, "step": 8910 }, { "epoch": 0.36, "learning_rate": 1.438093071185582e-06, "loss": 0.6876, "step": 8915 }, { "epoch": 0.36, "learning_rate": 1.437513612377263e-06, "loss": 0.664, "step": 8920 }, { "epoch": 0.36, "learning_rate": 1.436933971829594e-06, "loss": 0.6918, "step": 8925 }, { "epoch": 0.36, "learning_rate": 1.4363541497833534e-06, "loss": 0.7093, "step": 8930 }, { "epoch": 0.36, "learning_rate": 1.4357741464793932e-06, "loss": 0.7008, "step": 8935 }, { "epoch": 0.36, "learning_rate": 1.4351939621586424e-06, "loss": 0.6459, "step": 8940 }, { "epoch": 0.36, "learning_rate": 1.4346135970621045e-06, "loss": 0.7083, "step": 8945 }, { "epoch": 0.36, "learning_rate": 1.4340330514308576e-06, "loss": 0.6674, "step": 8950 }, { "epoch": 0.36, "learning_rate": 1.4334523255060563e-06, "loss": 0.6906, "step": 8955 }, { "epoch": 0.36, "learning_rate": 1.432871419528928e-06, "loss": 0.6976, "step": 8960 }, { "epoch": 0.36, "learning_rate": 1.432290333740776e-06, "loss": 0.6487, "step": 8965 }, { "epoch": 0.36, "learning_rate": 1.4317090683829797e-06, "loss": 0.6716, "step": 8970 }, { "epoch": 0.36, "learning_rate": 1.43112762369699e-06, "loss": 0.6578, "step": 8975 }, { "epoch": 0.36, "learning_rate": 1.4305459999243353e-06, "loss": 0.6437, "step": 8980 }, { "epoch": 0.36, "learning_rate": 1.429964197306616e-06, "loss": 0.6527, "step": 8985 }, { "epoch": 0.37, "learning_rate": 1.4293822160855083e-06, "loss": 0.7384, "step": 8990 }, { "epoch": 0.37, "learning_rate": 1.4288000565027623e-06, "loss": 0.6817, "step": 8995 }, { "epoch": 0.37, "learning_rate": 1.4282177188002016e-06, "loss": 0.6805, "step": 9000 }, { "epoch": 0.37, "learning_rate": 1.427635203219725e-06, "loss": 0.6908, "step": 9005 }, { "epoch": 0.37, "learning_rate": 1.4270525100033036e-06, "loss": 0.6745, "step": 9010 }, { "epoch": 0.37, "learning_rate": 1.4264696393929832e-06, "loss": 0.6801, "step": 9015 }, { "epoch": 0.37, "learning_rate": 1.4258865916308834e-06, "loss": 0.6856, "step": 9020 }, { "epoch": 0.37, "learning_rate": 1.4253033669591971e-06, "loss": 0.6676, "step": 9025 }, { "epoch": 0.37, "learning_rate": 1.424719965620191e-06, "loss": 0.6524, "step": 9030 }, { "epoch": 0.37, "learning_rate": 1.4241363878562046e-06, "loss": 0.6867, "step": 9035 }, { "epoch": 0.37, "learning_rate": 1.4235526339096514e-06, "loss": 0.6674, "step": 9040 }, { "epoch": 0.37, "learning_rate": 1.422968704023017e-06, "loss": 0.659, "step": 9045 }, { "epoch": 0.37, "learning_rate": 1.4223845984388613e-06, "loss": 0.6706, "step": 9050 }, { "epoch": 0.37, "learning_rate": 1.421800317399817e-06, "loss": 0.6867, "step": 9055 }, { "epoch": 0.37, "learning_rate": 1.421215861148589e-06, "loss": 0.7136, "step": 9060 }, { "epoch": 0.37, "learning_rate": 1.420631229927955e-06, "loss": 0.7003, "step": 9065 }, { "epoch": 0.37, "learning_rate": 1.4200464239807664e-06, "loss": 0.6764, "step": 9070 }, { "epoch": 0.37, "learning_rate": 1.4194614435499458e-06, "loss": 0.6954, "step": 9075 }, { "epoch": 0.37, "learning_rate": 1.4188762888784897e-06, "loss": 0.6641, "step": 9080 }, { "epoch": 0.37, "learning_rate": 1.4182909602094662e-06, "loss": 0.6822, "step": 9085 }, { "epoch": 0.37, "learning_rate": 1.417705457786015e-06, "loss": 0.7138, "step": 9090 }, { "epoch": 0.37, "learning_rate": 1.4171197818513495e-06, "loss": 0.7152, "step": 9095 }, { "epoch": 0.37, "learning_rate": 1.4165339326487535e-06, "loss": 0.6324, "step": 9100 }, { "epoch": 0.37, "learning_rate": 1.4159479104215846e-06, "loss": 0.6677, "step": 9105 }, { "epoch": 0.37, "learning_rate": 1.4153617154132712e-06, "loss": 0.6521, "step": 9110 }, { "epoch": 0.37, "learning_rate": 1.414775347867313e-06, "loss": 0.7191, "step": 9115 }, { "epoch": 0.37, "learning_rate": 1.4141888080272825e-06, "loss": 0.6545, "step": 9120 }, { "epoch": 0.37, "learning_rate": 1.4136020961368228e-06, "loss": 0.7033, "step": 9125 }, { "epoch": 0.37, "learning_rate": 1.4130152124396497e-06, "loss": 0.6917, "step": 9130 }, { "epoch": 0.37, "learning_rate": 1.412428157179549e-06, "loss": 0.7083, "step": 9135 }, { "epoch": 0.37, "learning_rate": 1.4118409306003784e-06, "loss": 0.6786, "step": 9140 }, { "epoch": 0.37, "learning_rate": 1.4112535329460671e-06, "loss": 0.677, "step": 9145 }, { "epoch": 0.37, "learning_rate": 1.4106659644606139e-06, "loss": 0.6903, "step": 9150 }, { "epoch": 0.37, "learning_rate": 1.410078225388091e-06, "loss": 0.659, "step": 9155 }, { "epoch": 0.37, "learning_rate": 1.409490315972639e-06, "loss": 0.7028, "step": 9160 }, { "epoch": 0.37, "learning_rate": 1.4089022364584712e-06, "loss": 0.6607, "step": 9165 }, { "epoch": 0.37, "learning_rate": 1.4083139870898706e-06, "loss": 0.6595, "step": 9170 }, { "epoch": 0.37, "learning_rate": 1.4077255681111903e-06, "loss": 0.6557, "step": 9175 }, { "epoch": 0.37, "learning_rate": 1.4071369797668545e-06, "loss": 0.659, "step": 9180 }, { "epoch": 0.37, "learning_rate": 1.4065482223013585e-06, "loss": 0.6879, "step": 9185 }, { "epoch": 0.37, "learning_rate": 1.405959295959266e-06, "loss": 0.6708, "step": 9190 }, { "epoch": 0.37, "learning_rate": 1.4053702009852128e-06, "loss": 0.6839, "step": 9195 }, { "epoch": 0.37, "learning_rate": 1.4047809376239034e-06, "loss": 0.6835, "step": 9200 }, { "epoch": 0.37, "learning_rate": 1.4041915061201122e-06, "loss": 0.7229, "step": 9205 }, { "epoch": 0.37, "learning_rate": 1.4036019067186843e-06, "loss": 0.6944, "step": 9210 }, { "epoch": 0.37, "learning_rate": 1.4030121396645349e-06, "loss": 0.6232, "step": 9215 }, { "epoch": 0.37, "learning_rate": 1.402422205202647e-06, "loss": 0.7058, "step": 9220 }, { "epoch": 0.37, "learning_rate": 1.4018321035780747e-06, "loss": 0.6815, "step": 9225 }, { "epoch": 0.37, "learning_rate": 1.4012418350359414e-06, "loss": 0.6449, "step": 9230 }, { "epoch": 0.38, "learning_rate": 1.4006513998214386e-06, "loss": 0.6611, "step": 9235 }, { "epoch": 0.38, "learning_rate": 1.4000607981798292e-06, "loss": 0.6489, "step": 9240 }, { "epoch": 0.38, "learning_rate": 1.3994700303564431e-06, "loss": 0.6866, "step": 9245 }, { "epoch": 0.38, "learning_rate": 1.3988790965966801e-06, "loss": 0.6553, "step": 9250 }, { "epoch": 0.38, "learning_rate": 1.3982879971460096e-06, "loss": 0.6936, "step": 9255 }, { "epoch": 0.38, "learning_rate": 1.3976967322499683e-06, "loss": 0.6741, "step": 9260 }, { "epoch": 0.38, "learning_rate": 1.3971053021541633e-06, "loss": 0.6491, "step": 9265 }, { "epoch": 0.38, "learning_rate": 1.396513707104269e-06, "loss": 0.6819, "step": 9270 }, { "epoch": 0.38, "learning_rate": 1.395921947346029e-06, "loss": 0.6472, "step": 9275 }, { "epoch": 0.38, "learning_rate": 1.3953300231252554e-06, "loss": 0.6588, "step": 9280 }, { "epoch": 0.38, "learning_rate": 1.394737934687828e-06, "loss": 0.6795, "step": 9285 }, { "epoch": 0.38, "learning_rate": 1.3941456822796954e-06, "loss": 0.6876, "step": 9290 }, { "epoch": 0.38, "learning_rate": 1.393553266146874e-06, "loss": 0.6439, "step": 9295 }, { "epoch": 0.38, "learning_rate": 1.3929606865354484e-06, "loss": 0.6678, "step": 9300 }, { "epoch": 0.38, "learning_rate": 1.3923679436915711e-06, "loss": 0.6646, "step": 9305 }, { "epoch": 0.38, "learning_rate": 1.3917750378614619e-06, "loss": 0.6721, "step": 9310 }, { "epoch": 0.38, "learning_rate": 1.391181969291409e-06, "loss": 0.6741, "step": 9315 }, { "epoch": 0.38, "learning_rate": 1.3905887382277683e-06, "loss": 0.6298, "step": 9320 }, { "epoch": 0.38, "learning_rate": 1.3899953449169622e-06, "loss": 0.6536, "step": 9325 }, { "epoch": 0.38, "learning_rate": 1.3894017896054814e-06, "loss": 0.7037, "step": 9330 }, { "epoch": 0.38, "learning_rate": 1.3888080725398836e-06, "loss": 0.6777, "step": 9335 }, { "epoch": 0.38, "learning_rate": 1.3882141939667937e-06, "loss": 0.6805, "step": 9340 }, { "epoch": 0.38, "learning_rate": 1.3876201541329038e-06, "loss": 0.666, "step": 9345 }, { "epoch": 0.38, "learning_rate": 1.3870259532849726e-06, "loss": 0.6619, "step": 9350 }, { "epoch": 0.38, "learning_rate": 1.3864315916698264e-06, "loss": 0.7109, "step": 9355 }, { "epoch": 0.38, "learning_rate": 1.3858370695343573e-06, "loss": 0.6914, "step": 9360 }, { "epoch": 0.38, "learning_rate": 1.3852423871255252e-06, "loss": 0.6847, "step": 9365 }, { "epoch": 0.38, "learning_rate": 1.3846475446903555e-06, "loss": 0.6819, "step": 9370 }, { "epoch": 0.38, "learning_rate": 1.384052542475941e-06, "loss": 0.7004, "step": 9375 }, { "epoch": 0.38, "learning_rate": 1.3834573807294404e-06, "loss": 0.7206, "step": 9380 }, { "epoch": 0.38, "learning_rate": 1.382862059698078e-06, "loss": 0.7003, "step": 9385 }, { "epoch": 0.38, "learning_rate": 1.3822665796291459e-06, "loss": 0.686, "step": 9390 }, { "epoch": 0.38, "learning_rate": 1.3816709407700007e-06, "loss": 0.682, "step": 9395 }, { "epoch": 0.38, "learning_rate": 1.3810751433680662e-06, "loss": 0.6414, "step": 9400 }, { "epoch": 0.38, "learning_rate": 1.380479187670831e-06, "loss": 0.6494, "step": 9405 }, { "epoch": 0.38, "learning_rate": 1.37988307392585e-06, "loss": 0.6997, "step": 9410 }, { "epoch": 0.38, "learning_rate": 1.3792868023807441e-06, "loss": 0.6835, "step": 9415 }, { "epoch": 0.38, "learning_rate": 1.3786903732831982e-06, "loss": 0.6819, "step": 9420 }, { "epoch": 0.38, "learning_rate": 1.378093786880965e-06, "loss": 0.6568, "step": 9425 }, { "epoch": 0.38, "learning_rate": 1.3774970434218609e-06, "loss": 0.6811, "step": 9430 }, { "epoch": 0.38, "learning_rate": 1.3769001431537675e-06, "loss": 0.6692, "step": 9435 }, { "epoch": 0.38, "learning_rate": 1.3763030863246326e-06, "loss": 0.6732, "step": 9440 }, { "epoch": 0.38, "learning_rate": 1.3757058731824679e-06, "loss": 0.6602, "step": 9445 }, { "epoch": 0.38, "learning_rate": 1.3751085039753508e-06, "loss": 0.6597, "step": 9450 }, { "epoch": 0.38, "learning_rate": 1.3745109789514233e-06, "loss": 0.6906, "step": 9455 }, { "epoch": 0.38, "learning_rate": 1.3739132983588921e-06, "loss": 0.6466, "step": 9460 }, { "epoch": 0.38, "learning_rate": 1.3733154624460283e-06, "loss": 0.6613, "step": 9465 }, { "epoch": 0.38, "learning_rate": 1.372717471461168e-06, "loss": 0.6821, "step": 9470 }, { "epoch": 0.38, "learning_rate": 1.3721193256527116e-06, "loss": 0.6539, "step": 9475 }, { "epoch": 0.39, "learning_rate": 1.3715210252691232e-06, "loss": 0.6994, "step": 9480 }, { "epoch": 0.39, "learning_rate": 1.3709225705589318e-06, "loss": 0.6556, "step": 9485 }, { "epoch": 0.39, "learning_rate": 1.3703239617707306e-06, "loss": 0.7155, "step": 9490 }, { "epoch": 0.39, "learning_rate": 1.3697251991531756e-06, "loss": 0.6621, "step": 9495 }, { "epoch": 0.39, "learning_rate": 1.3691262829549883e-06, "loss": 0.6771, "step": 9500 }, { "epoch": 0.39, "learning_rate": 1.3685272134249535e-06, "loss": 0.7038, "step": 9505 }, { "epoch": 0.39, "learning_rate": 1.3679279908119187e-06, "loss": 0.6903, "step": 9510 }, { "epoch": 0.39, "learning_rate": 1.3673286153647967e-06, "loss": 0.7236, "step": 9515 }, { "epoch": 0.39, "learning_rate": 1.3667290873325618e-06, "loss": 0.6958, "step": 9520 }, { "epoch": 0.39, "learning_rate": 1.3661294069642537e-06, "loss": 0.653, "step": 9525 }, { "epoch": 0.39, "learning_rate": 1.3655295745089736e-06, "loss": 0.6925, "step": 9530 }, { "epoch": 0.39, "learning_rate": 1.3649295902158873e-06, "loss": 0.6607, "step": 9535 }, { "epoch": 0.39, "learning_rate": 1.3643294543342228e-06, "loss": 0.7378, "step": 9540 }, { "epoch": 0.39, "learning_rate": 1.363729167113271e-06, "loss": 0.6789, "step": 9545 }, { "epoch": 0.39, "learning_rate": 1.3631287288023866e-06, "loss": 0.6862, "step": 9550 }, { "epoch": 0.39, "learning_rate": 1.362528139650986e-06, "loss": 0.7204, "step": 9555 }, { "epoch": 0.39, "learning_rate": 1.361927399908549e-06, "loss": 0.6474, "step": 9560 }, { "epoch": 0.39, "learning_rate": 1.3613265098246176e-06, "loss": 0.6564, "step": 9565 }, { "epoch": 0.39, "learning_rate": 1.3607254696487962e-06, "loss": 0.6762, "step": 9570 }, { "epoch": 0.39, "learning_rate": 1.360124279630752e-06, "loss": 0.6903, "step": 9575 }, { "epoch": 0.39, "learning_rate": 1.3595229400202134e-06, "loss": 0.6778, "step": 9580 }, { "epoch": 0.39, "learning_rate": 1.3589214510669722e-06, "loss": 0.7117, "step": 9585 }, { "epoch": 0.39, "learning_rate": 1.3583198130208819e-06, "loss": 0.6531, "step": 9590 }, { "epoch": 0.39, "learning_rate": 1.3577180261318569e-06, "loss": 0.7066, "step": 9595 }, { "epoch": 0.39, "learning_rate": 1.357116090649875e-06, "loss": 0.7132, "step": 9600 }, { "epoch": 0.39, "eval_loss": 0.6456849575042725, "eval_runtime": 138.6043, "eval_samples_per_second": 17.07, "eval_steps_per_second": 2.85, "step": 9600 }, { "epoch": 0.39, "learning_rate": 1.3565140068249746e-06, "loss": 0.6633, "step": 9605 }, { "epoch": 0.39, "learning_rate": 1.3559117749072559e-06, "loss": 0.6652, "step": 9610 }, { "epoch": 0.39, "learning_rate": 1.3553093951468817e-06, "loss": 0.6863, "step": 9615 }, { "epoch": 0.39, "learning_rate": 1.354706867794074e-06, "loss": 0.6849, "step": 9620 }, { "epoch": 0.39, "learning_rate": 1.3541041930991187e-06, "loss": 0.6647, "step": 9625 }, { "epoch": 0.39, "learning_rate": 1.3535013713123606e-06, "loss": 0.6668, "step": 9630 }, { "epoch": 0.39, "learning_rate": 1.352898402684207e-06, "loss": 0.6574, "step": 9635 }, { "epoch": 0.39, "learning_rate": 1.3522952874651262e-06, "loss": 0.6735, "step": 9640 }, { "epoch": 0.39, "learning_rate": 1.3516920259056467e-06, "loss": 0.688, "step": 9645 }, { "epoch": 0.39, "learning_rate": 1.351088618256358e-06, "loss": 0.6553, "step": 9650 }, { "epoch": 0.39, "learning_rate": 1.3504850647679104e-06, "loss": 0.6818, "step": 9655 }, { "epoch": 0.39, "learning_rate": 1.349881365691015e-06, "loss": 0.6596, "step": 9660 }, { "epoch": 0.39, "learning_rate": 1.349277521276443e-06, "loss": 0.6655, "step": 9665 }, { "epoch": 0.39, "learning_rate": 1.3486735317750257e-06, "loss": 0.6465, "step": 9670 }, { "epoch": 0.39, "learning_rate": 1.3480693974376561e-06, "loss": 0.6647, "step": 9675 }, { "epoch": 0.39, "learning_rate": 1.3474651185152854e-06, "loss": 0.6955, "step": 9680 }, { "epoch": 0.39, "learning_rate": 1.3468606952589265e-06, "loss": 0.6851, "step": 9685 }, { "epoch": 0.39, "learning_rate": 1.346256127919651e-06, "loss": 0.6865, "step": 9690 }, { "epoch": 0.39, "learning_rate": 1.345651416748591e-06, "loss": 0.6518, "step": 9695 }, { "epoch": 0.39, "learning_rate": 1.3450465619969386e-06, "loss": 0.67, "step": 9700 }, { "epoch": 0.39, "learning_rate": 1.344441563915945e-06, "loss": 0.7331, "step": 9705 }, { "epoch": 0.39, "learning_rate": 1.3438364227569215e-06, "loss": 0.7002, "step": 9710 }, { "epoch": 0.39, "learning_rate": 1.3432311387712377e-06, "loss": 0.6777, "step": 9715 }, { "epoch": 0.39, "learning_rate": 1.3426257122103237e-06, "loss": 0.7075, "step": 9720 }, { "epoch": 0.4, "learning_rate": 1.3420201433256689e-06, "loss": 0.707, "step": 9725 }, { "epoch": 0.4, "learning_rate": 1.3414144323688204e-06, "loss": 0.67, "step": 9730 }, { "epoch": 0.4, "learning_rate": 1.3408085795913862e-06, "loss": 0.6853, "step": 9735 }, { "epoch": 0.4, "learning_rate": 1.3402025852450317e-06, "loss": 0.6859, "step": 9740 }, { "epoch": 0.4, "learning_rate": 1.3395964495814815e-06, "loss": 0.7109, "step": 9745 }, { "epoch": 0.4, "learning_rate": 1.33899017285252e-06, "loss": 0.7126, "step": 9750 }, { "epoch": 0.4, "learning_rate": 1.3383837553099882e-06, "loss": 0.6782, "step": 9755 }, { "epoch": 0.4, "learning_rate": 1.3377771972057876e-06, "loss": 0.6577, "step": 9760 }, { "epoch": 0.4, "learning_rate": 1.3371704987918763e-06, "loss": 0.6859, "step": 9765 }, { "epoch": 0.4, "learning_rate": 1.3365636603202724e-06, "loss": 0.705, "step": 9770 }, { "epoch": 0.4, "learning_rate": 1.3359566820430509e-06, "loss": 0.6359, "step": 9775 }, { "epoch": 0.4, "learning_rate": 1.335349564212345e-06, "loss": 0.6926, "step": 9780 }, { "epoch": 0.4, "learning_rate": 1.3347423070803465e-06, "loss": 0.6667, "step": 9785 }, { "epoch": 0.4, "learning_rate": 1.3341349108993047e-06, "loss": 0.7033, "step": 9790 }, { "epoch": 0.4, "learning_rate": 1.3335273759215269e-06, "loss": 0.6923, "step": 9795 }, { "epoch": 0.4, "learning_rate": 1.3329197023993774e-06, "loss": 0.706, "step": 9800 }, { "epoch": 0.4, "learning_rate": 1.3323118905852789e-06, "loss": 0.6059, "step": 9805 }, { "epoch": 0.4, "learning_rate": 1.331703940731711e-06, "loss": 0.7087, "step": 9810 }, { "epoch": 0.4, "learning_rate": 1.3310958530912106e-06, "loss": 0.7124, "step": 9815 }, { "epoch": 0.4, "learning_rate": 1.3304876279163725e-06, "loss": 0.6984, "step": 9820 }, { "epoch": 0.4, "learning_rate": 1.3298792654598474e-06, "loss": 0.6993, "step": 9825 }, { "epoch": 0.4, "learning_rate": 1.3292707659743442e-06, "loss": 0.7156, "step": 9830 }, { "epoch": 0.4, "learning_rate": 1.3286621297126285e-06, "loss": 0.7062, "step": 9835 }, { "epoch": 0.4, "learning_rate": 1.3280533569275223e-06, "loss": 0.6437, "step": 9840 }, { "epoch": 0.4, "learning_rate": 1.3274444478719048e-06, "loss": 0.6651, "step": 9845 }, { "epoch": 0.4, "learning_rate": 1.3268354027987108e-06, "loss": 0.6647, "step": 9850 }, { "epoch": 0.4, "learning_rate": 1.3262262219609329e-06, "loss": 0.6911, "step": 9855 }, { "epoch": 0.4, "learning_rate": 1.3256169056116195e-06, "loss": 0.6931, "step": 9860 }, { "epoch": 0.4, "learning_rate": 1.3250074540038751e-06, "loss": 0.6979, "step": 9865 }, { "epoch": 0.4, "learning_rate": 1.324397867390861e-06, "loss": 0.6642, "step": 9870 }, { "epoch": 0.4, "learning_rate": 1.3237881460257933e-06, "loss": 0.6457, "step": 9875 }, { "epoch": 0.4, "learning_rate": 1.3231782901619458e-06, "loss": 0.6684, "step": 9880 }, { "epoch": 0.4, "learning_rate": 1.3225683000526474e-06, "loss": 0.6744, "step": 9885 }, { "epoch": 0.4, "learning_rate": 1.321958175951282e-06, "loss": 0.6504, "step": 9890 }, { "epoch": 0.4, "learning_rate": 1.3213479181112903e-06, "loss": 0.7047, "step": 9895 }, { "epoch": 0.4, "learning_rate": 1.320737526786168e-06, "loss": 0.6802, "step": 9900 }, { "epoch": 0.4, "learning_rate": 1.320127002229466e-06, "loss": 0.6668, "step": 9905 }, { "epoch": 0.4, "learning_rate": 1.319516344694792e-06, "loss": 0.6759, "step": 9910 }, { "epoch": 0.4, "learning_rate": 1.3189055544358065e-06, "loss": 0.6614, "step": 9915 }, { "epoch": 0.4, "learning_rate": 1.3182946317062271e-06, "loss": 0.6613, "step": 9920 }, { "epoch": 0.4, "learning_rate": 1.3176835767598259e-06, "loss": 0.7015, "step": 9925 }, { "epoch": 0.4, "learning_rate": 1.3170723898504293e-06, "loss": 0.6607, "step": 9930 }, { "epoch": 0.4, "learning_rate": 1.3164610712319197e-06, "loss": 0.7008, "step": 9935 }, { "epoch": 0.4, "learning_rate": 1.3158496211582328e-06, "loss": 0.6727, "step": 9940 }, { "epoch": 0.4, "learning_rate": 1.3152380398833604e-06, "loss": 0.6481, "step": 9945 }, { "epoch": 0.4, "learning_rate": 1.3146263276613474e-06, "loss": 0.6803, "step": 9950 }, { "epoch": 0.4, "learning_rate": 1.3140144847462943e-06, "loss": 0.6999, "step": 9955 }, { "epoch": 0.4, "learning_rate": 1.313402511392355e-06, "loss": 0.7099, "step": 9960 }, { "epoch": 0.4, "learning_rate": 1.3127904078537378e-06, "loss": 0.6695, "step": 9965 }, { "epoch": 0.4, "learning_rate": 1.3121781743847054e-06, "loss": 0.6898, "step": 9970 }, { "epoch": 0.41, "learning_rate": 1.311565811239574e-06, "loss": 0.6724, "step": 9975 }, { "epoch": 0.41, "learning_rate": 1.3109533186727145e-06, "loss": 0.6905, "step": 9980 }, { "epoch": 0.41, "learning_rate": 1.3103406969385502e-06, "loss": 0.6442, "step": 9985 }, { "epoch": 0.41, "learning_rate": 1.3097279462915594e-06, "loss": 0.6735, "step": 9990 }, { "epoch": 0.41, "learning_rate": 1.3091150669862734e-06, "loss": 0.6962, "step": 9995 }, { "epoch": 0.41, "learning_rate": 1.3085020592772767e-06, "loss": 0.7271, "step": 10000 }, { "epoch": 0.41, "learning_rate": 1.3078889234192074e-06, "loss": 0.7095, "step": 10005 }, { "epoch": 0.41, "learning_rate": 1.3072756596667569e-06, "loss": 0.6467, "step": 10010 }, { "epoch": 0.41, "learning_rate": 1.3066622682746693e-06, "loss": 0.6728, "step": 10015 }, { "epoch": 0.41, "learning_rate": 1.3060487494977426e-06, "loss": 0.6724, "step": 10020 }, { "epoch": 0.41, "learning_rate": 1.3054351035908269e-06, "loss": 0.6336, "step": 10025 }, { "epoch": 0.41, "learning_rate": 1.3048213308088253e-06, "loss": 0.677, "step": 10030 }, { "epoch": 0.41, "learning_rate": 1.3042074314066937e-06, "loss": 0.7052, "step": 10035 }, { "epoch": 0.41, "learning_rate": 1.3035934056394404e-06, "loss": 0.6877, "step": 10040 }, { "epoch": 0.41, "learning_rate": 1.3029792537621269e-06, "loss": 0.6824, "step": 10045 }, { "epoch": 0.41, "learning_rate": 1.302364976029866e-06, "loss": 0.6759, "step": 10050 }, { "epoch": 0.41, "learning_rate": 1.3017505726978239e-06, "loss": 0.7549, "step": 10055 }, { "epoch": 0.41, "learning_rate": 1.3011360440212178e-06, "loss": 0.686, "step": 10060 }, { "epoch": 0.41, "learning_rate": 1.3005213902553177e-06, "loss": 0.6835, "step": 10065 }, { "epoch": 0.41, "learning_rate": 1.2999066116554457e-06, "loss": 0.7057, "step": 10070 }, { "epoch": 0.41, "learning_rate": 1.2992917084769755e-06, "loss": 0.6839, "step": 10075 }, { "epoch": 0.41, "learning_rate": 1.2986766809753322e-06, "loss": 0.676, "step": 10080 }, { "epoch": 0.41, "learning_rate": 1.298061529405993e-06, "loss": 0.7068, "step": 10085 }, { "epoch": 0.41, "learning_rate": 1.2974462540244868e-06, "loss": 0.7212, "step": 10090 }, { "epoch": 0.41, "learning_rate": 1.2968308550863932e-06, "loss": 0.6873, "step": 10095 }, { "epoch": 0.41, "learning_rate": 1.2962153328473435e-06, "loss": 0.6773, "step": 10100 }, { "epoch": 0.41, "learning_rate": 1.295599687563021e-06, "loss": 0.6805, "step": 10105 }, { "epoch": 0.41, "learning_rate": 1.2949839194891587e-06, "loss": 0.6523, "step": 10110 }, { "epoch": 0.41, "learning_rate": 1.2943680288815418e-06, "loss": 0.6765, "step": 10115 }, { "epoch": 0.41, "learning_rate": 1.2937520159960054e-06, "loss": 0.6897, "step": 10120 }, { "epoch": 0.41, "learning_rate": 1.2931358810884362e-06, "loss": 0.6637, "step": 10125 }, { "epoch": 0.41, "learning_rate": 1.2925196244147712e-06, "loss": 0.6691, "step": 10130 }, { "epoch": 0.41, "learning_rate": 1.291903246230998e-06, "loss": 0.6603, "step": 10135 }, { "epoch": 0.41, "learning_rate": 1.2912867467931551e-06, "loss": 0.7122, "step": 10140 }, { "epoch": 0.41, "learning_rate": 1.2906701263573306e-06, "loss": 0.6517, "step": 10145 }, { "epoch": 0.41, "learning_rate": 1.2900533851796632e-06, "loss": 0.7098, "step": 10150 }, { "epoch": 0.41, "learning_rate": 1.2894365235163425e-06, "loss": 0.6925, "step": 10155 }, { "epoch": 0.41, "learning_rate": 1.2888195416236065e-06, "loss": 0.6564, "step": 10160 }, { "epoch": 0.41, "learning_rate": 1.288202439757745e-06, "loss": 0.6779, "step": 10165 }, { "epoch": 0.41, "learning_rate": 1.2875852181750964e-06, "loss": 0.7095, "step": 10170 }, { "epoch": 0.41, "learning_rate": 1.2869678771320484e-06, "loss": 0.6668, "step": 10175 }, { "epoch": 0.41, "learning_rate": 1.2863504168850406e-06, "loss": 0.6538, "step": 10180 }, { "epoch": 0.41, "learning_rate": 1.2857328376905593e-06, "loss": 0.663, "step": 10185 }, { "epoch": 0.41, "learning_rate": 1.2851151398051427e-06, "loss": 0.648, "step": 10190 }, { "epoch": 0.41, "learning_rate": 1.2844973234853762e-06, "loss": 0.6934, "step": 10195 }, { "epoch": 0.41, "learning_rate": 1.2838793889878955e-06, "loss": 0.6829, "step": 10200 }, { "epoch": 0.41, "learning_rate": 1.2832613365693857e-06, "loss": 0.6484, "step": 10205 }, { "epoch": 0.41, "learning_rate": 1.2826431664865795e-06, "loss": 0.6949, "step": 10210 }, { "epoch": 0.41, "learning_rate": 1.2820248789962605e-06, "loss": 0.6975, "step": 10215 }, { "epoch": 0.42, "learning_rate": 1.281406474355259e-06, "loss": 0.6907, "step": 10220 }, { "epoch": 0.42, "learning_rate": 1.2807879528204554e-06, "loss": 0.6599, "step": 10225 }, { "epoch": 0.42, "learning_rate": 1.280169314648778e-06, "loss": 0.6655, "step": 10230 }, { "epoch": 0.42, "learning_rate": 1.2795505600972036e-06, "loss": 0.6827, "step": 10235 }, { "epoch": 0.42, "learning_rate": 1.2789316894227582e-06, "loss": 0.6679, "step": 10240 }, { "epoch": 0.42, "learning_rate": 1.2783127028825143e-06, "loss": 0.6862, "step": 10245 }, { "epoch": 0.42, "learning_rate": 1.2776936007335942e-06, "loss": 0.6659, "step": 10250 }, { "epoch": 0.42, "learning_rate": 1.2770743832331667e-06, "loss": 0.7268, "step": 10255 }, { "epoch": 0.42, "learning_rate": 1.2764550506384504e-06, "loss": 0.6597, "step": 10260 }, { "epoch": 0.42, "learning_rate": 1.2758356032067103e-06, "loss": 0.6416, "step": 10265 }, { "epoch": 0.42, "learning_rate": 1.2752160411952592e-06, "loss": 0.7008, "step": 10270 }, { "epoch": 0.42, "learning_rate": 1.2745963648614583e-06, "loss": 0.6704, "step": 10275 }, { "epoch": 0.42, "learning_rate": 1.2739765744627154e-06, "loss": 0.6444, "step": 10280 }, { "epoch": 0.42, "learning_rate": 1.2733566702564855e-06, "loss": 0.6594, "step": 10285 }, { "epoch": 0.42, "learning_rate": 1.2727366525002728e-06, "loss": 0.6597, "step": 10290 }, { "epoch": 0.42, "learning_rate": 1.2721165214516259e-06, "loss": 0.6624, "step": 10295 }, { "epoch": 0.42, "learning_rate": 1.2714962773681427e-06, "loss": 0.6726, "step": 10300 }, { "epoch": 0.42, "learning_rate": 1.270875920507467e-06, "loss": 0.6924, "step": 10305 }, { "epoch": 0.42, "learning_rate": 1.2702554511272894e-06, "loss": 0.631, "step": 10310 }, { "epoch": 0.42, "learning_rate": 1.2696348694853476e-06, "loss": 0.6702, "step": 10315 }, { "epoch": 0.42, "learning_rate": 1.2690141758394259e-06, "loss": 0.6808, "step": 10320 }, { "epoch": 0.42, "learning_rate": 1.2683933704473552e-06, "loss": 0.7162, "step": 10325 }, { "epoch": 0.42, "learning_rate": 1.2677724535670126e-06, "loss": 0.6696, "step": 10330 }, { "epoch": 0.42, "learning_rate": 1.267151425456321e-06, "loss": 0.6856, "step": 10335 }, { "epoch": 0.42, "learning_rate": 1.2665302863732508e-06, "loss": 0.6957, "step": 10340 }, { "epoch": 0.42, "learning_rate": 1.2659090365758174e-06, "loss": 0.6934, "step": 10345 }, { "epoch": 0.42, "learning_rate": 1.2652876763220828e-06, "loss": 0.6666, "step": 10350 }, { "epoch": 0.42, "learning_rate": 1.2646662058701547e-06, "loss": 0.7049, "step": 10355 }, { "epoch": 0.42, "learning_rate": 1.2640446254781855e-06, "loss": 0.685, "step": 10360 }, { "epoch": 0.42, "learning_rate": 1.263422935404376e-06, "loss": 0.6696, "step": 10365 }, { "epoch": 0.42, "learning_rate": 1.2628011359069694e-06, "loss": 0.6905, "step": 10370 }, { "epoch": 0.42, "learning_rate": 1.2621792272442569e-06, "loss": 0.7034, "step": 10375 }, { "epoch": 0.42, "learning_rate": 1.2615572096745732e-06, "loss": 0.6746, "step": 10380 }, { "epoch": 0.42, "learning_rate": 1.2609350834562992e-06, "loss": 0.6676, "step": 10385 }, { "epoch": 0.42, "learning_rate": 1.2603128488478605e-06, "loss": 0.6716, "step": 10390 }, { "epoch": 0.42, "learning_rate": 1.2596905061077283e-06, "loss": 0.6554, "step": 10395 }, { "epoch": 0.42, "learning_rate": 1.2590680554944184e-06, "loss": 0.6564, "step": 10400 }, { "epoch": 0.42, "eval_loss": 0.6432496309280396, "eval_runtime": 144.6412, "eval_samples_per_second": 16.358, "eval_steps_per_second": 2.731, "step": 10400 }, { "epoch": 0.42, "learning_rate": 1.2584454972664912e-06, "loss": 0.6727, "step": 10405 }, { "epoch": 0.42, "learning_rate": 1.257822831682552e-06, "loss": 0.6545, "step": 10410 }, { "epoch": 0.42, "learning_rate": 1.2572000590012505e-06, "loss": 0.6794, "step": 10415 }, { "epoch": 0.42, "learning_rate": 1.2565771794812812e-06, "loss": 0.682, "step": 10420 }, { "epoch": 0.42, "learning_rate": 1.255954193381383e-06, "loss": 0.6566, "step": 10425 }, { "epoch": 0.42, "learning_rate": 1.2553311009603389e-06, "loss": 0.6396, "step": 10430 }, { "epoch": 0.42, "learning_rate": 1.2547079024769756e-06, "loss": 0.6685, "step": 10435 }, { "epoch": 0.42, "learning_rate": 1.2540845981901648e-06, "loss": 0.6409, "step": 10440 }, { "epoch": 0.42, "learning_rate": 1.2534611883588213e-06, "loss": 0.6928, "step": 10445 }, { "epoch": 0.42, "learning_rate": 1.2528376732419048e-06, "loss": 0.7162, "step": 10450 }, { "epoch": 0.42, "learning_rate": 1.2522140530984173e-06, "loss": 0.6575, "step": 10455 }, { "epoch": 0.42, "learning_rate": 1.2515903281874057e-06, "loss": 0.7134, "step": 10460 }, { "epoch": 0.43, "learning_rate": 1.2509664987679599e-06, "loss": 0.6521, "step": 10465 }, { "epoch": 0.43, "learning_rate": 1.2503425650992124e-06, "loss": 0.6655, "step": 10470 }, { "epoch": 0.43, "learning_rate": 1.2497185274403407e-06, "loss": 0.6606, "step": 10475 }, { "epoch": 0.43, "learning_rate": 1.2490943860505645e-06, "loss": 0.6419, "step": 10480 }, { "epoch": 0.43, "learning_rate": 1.2484701411891465e-06, "loss": 0.6822, "step": 10485 }, { "epoch": 0.43, "learning_rate": 1.2478457931153927e-06, "loss": 0.6705, "step": 10490 }, { "epoch": 0.43, "learning_rate": 1.2472213420886518e-06, "loss": 0.638, "step": 10495 }, { "epoch": 0.43, "learning_rate": 1.2465967883683152e-06, "loss": 0.6684, "step": 10500 }, { "epoch": 0.43, "learning_rate": 1.2459721322138173e-06, "loss": 0.668, "step": 10505 }, { "epoch": 0.43, "learning_rate": 1.245347373884635e-06, "loss": 0.6665, "step": 10510 }, { "epoch": 0.43, "learning_rate": 1.244722513640287e-06, "loss": 0.6971, "step": 10515 }, { "epoch": 0.43, "learning_rate": 1.2440975517403352e-06, "loss": 0.6807, "step": 10520 }, { "epoch": 0.43, "learning_rate": 1.243472488444383e-06, "loss": 0.6471, "step": 10525 }, { "epoch": 0.43, "learning_rate": 1.2428473240120763e-06, "loss": 0.658, "step": 10530 }, { "epoch": 0.43, "learning_rate": 1.2422220587031033e-06, "loss": 0.7214, "step": 10535 }, { "epoch": 0.43, "learning_rate": 1.2415966927771938e-06, "loss": 0.6445, "step": 10540 }, { "epoch": 0.43, "learning_rate": 1.2409712264941189e-06, "loss": 0.6899, "step": 10545 }, { "epoch": 0.43, "learning_rate": 1.240345660113692e-06, "loss": 0.7066, "step": 10550 }, { "epoch": 0.43, "learning_rate": 1.2397199938957678e-06, "loss": 0.6361, "step": 10555 }, { "epoch": 0.43, "learning_rate": 1.2390942281002435e-06, "loss": 0.6871, "step": 10560 }, { "epoch": 0.43, "learning_rate": 1.238468362987056e-06, "loss": 0.6544, "step": 10565 }, { "epoch": 0.43, "learning_rate": 1.237842398816184e-06, "loss": 0.6198, "step": 10570 }, { "epoch": 0.43, "learning_rate": 1.237216335847648e-06, "loss": 0.6907, "step": 10575 }, { "epoch": 0.43, "learning_rate": 1.236590174341509e-06, "loss": 0.6411, "step": 10580 }, { "epoch": 0.43, "learning_rate": 1.2359639145578693e-06, "loss": 0.6724, "step": 10585 }, { "epoch": 0.43, "learning_rate": 1.2353375567568715e-06, "loss": 0.6606, "step": 10590 }, { "epoch": 0.43, "learning_rate": 1.234711101198699e-06, "loss": 0.6745, "step": 10595 }, { "epoch": 0.43, "learning_rate": 1.2340845481435764e-06, "loss": 0.6417, "step": 10600 }, { "epoch": 0.43, "learning_rate": 1.233457897851768e-06, "loss": 0.6824, "step": 10605 }, { "epoch": 0.43, "learning_rate": 1.2328311505835794e-06, "loss": 0.6726, "step": 10610 }, { "epoch": 0.43, "learning_rate": 1.2322043065993554e-06, "loss": 0.692, "step": 10615 }, { "epoch": 0.43, "learning_rate": 1.2315773661594817e-06, "loss": 0.7019, "step": 10620 }, { "epoch": 0.43, "learning_rate": 1.2309503295243844e-06, "loss": 0.6775, "step": 10625 }, { "epoch": 0.43, "learning_rate": 1.230323196954528e-06, "loss": 0.6774, "step": 10630 }, { "epoch": 0.43, "learning_rate": 1.229695968710419e-06, "loss": 0.7227, "step": 10635 }, { "epoch": 0.43, "learning_rate": 1.2290686450526021e-06, "loss": 0.6676, "step": 10640 }, { "epoch": 0.43, "learning_rate": 1.2284412262416621e-06, "loss": 0.6781, "step": 10645 }, { "epoch": 0.43, "learning_rate": 1.2278137125382235e-06, "loss": 0.666, "step": 10650 }, { "epoch": 0.43, "learning_rate": 1.2271861042029496e-06, "loss": 0.6926, "step": 10655 }, { "epoch": 0.43, "learning_rate": 1.2265584014965439e-06, "loss": 0.6458, "step": 10660 }, { "epoch": 0.43, "learning_rate": 1.2259306046797486e-06, "loss": 0.6518, "step": 10665 }, { "epoch": 0.43, "learning_rate": 1.2253027140133444e-06, "loss": 0.6911, "step": 10670 }, { "epoch": 0.43, "learning_rate": 1.2246747297581527e-06, "loss": 0.6738, "step": 10675 }, { "epoch": 0.43, "learning_rate": 1.2240466521750318e-06, "loss": 0.6601, "step": 10680 }, { "epoch": 0.43, "learning_rate": 1.22341848152488e-06, "loss": 0.6561, "step": 10685 }, { "epoch": 0.43, "learning_rate": 1.222790218068634e-06, "loss": 0.6899, "step": 10690 }, { "epoch": 0.43, "learning_rate": 1.2221618620672689e-06, "loss": 0.6685, "step": 10695 }, { "epoch": 0.43, "learning_rate": 1.2215334137817985e-06, "loss": 0.6926, "step": 10700 }, { "epoch": 0.43, "learning_rate": 1.2209048734732742e-06, "loss": 0.6241, "step": 10705 }, { "epoch": 0.44, "learning_rate": 1.2202762414027865e-06, "loss": 0.6556, "step": 10710 }, { "epoch": 0.44, "learning_rate": 1.219647517831464e-06, "loss": 0.6612, "step": 10715 }, { "epoch": 0.44, "learning_rate": 1.2190187030204727e-06, "loss": 0.6931, "step": 10720 }, { "epoch": 0.44, "learning_rate": 1.2183897972310168e-06, "loss": 0.6806, "step": 10725 }, { "epoch": 0.44, "learning_rate": 1.2177608007243384e-06, "loss": 0.7048, "step": 10730 }, { "epoch": 0.44, "learning_rate": 1.2171317137617172e-06, "loss": 0.6722, "step": 10735 }, { "epoch": 0.44, "learning_rate": 1.2165025366044698e-06, "loss": 0.7012, "step": 10740 }, { "epoch": 0.44, "learning_rate": 1.2158732695139523e-06, "loss": 0.6476, "step": 10745 }, { "epoch": 0.44, "learning_rate": 1.2152439127515557e-06, "loss": 0.661, "step": 10750 }, { "epoch": 0.44, "learning_rate": 1.2146144665787094e-06, "loss": 0.6862, "step": 10755 }, { "epoch": 0.44, "learning_rate": 1.2139849312568803e-06, "loss": 0.6949, "step": 10760 }, { "epoch": 0.44, "learning_rate": 1.2133553070475714e-06, "loss": 0.6565, "step": 10765 }, { "epoch": 0.44, "learning_rate": 1.2127255942123236e-06, "loss": 0.6803, "step": 10770 }, { "epoch": 0.44, "learning_rate": 1.2120957930127139e-06, "loss": 0.6909, "step": 10775 }, { "epoch": 0.44, "learning_rate": 1.211465903710356e-06, "loss": 0.6885, "step": 10780 }, { "epoch": 0.44, "learning_rate": 1.210835926566901e-06, "loss": 0.6837, "step": 10785 }, { "epoch": 0.44, "learning_rate": 1.2102058618440354e-06, "loss": 0.676, "step": 10790 }, { "epoch": 0.44, "learning_rate": 1.2095757098034829e-06, "loss": 0.6624, "step": 10795 }, { "epoch": 0.44, "learning_rate": 1.2089454707070033e-06, "loss": 0.6218, "step": 10800 }, { "epoch": 0.44, "learning_rate": 1.208315144816392e-06, "loss": 0.6528, "step": 10805 }, { "epoch": 0.44, "learning_rate": 1.2076847323934813e-06, "loss": 0.6933, "step": 10810 }, { "epoch": 0.44, "learning_rate": 1.2070542337001385e-06, "loss": 0.6646, "step": 10815 }, { "epoch": 0.44, "learning_rate": 1.206423648998268e-06, "loss": 0.6935, "step": 10820 }, { "epoch": 0.44, "learning_rate": 1.205792978549809e-06, "loss": 0.6853, "step": 10825 }, { "epoch": 0.44, "learning_rate": 1.2051622226167362e-06, "loss": 0.6648, "step": 10830 }, { "epoch": 0.44, "learning_rate": 1.2045313814610602e-06, "loss": 0.6587, "step": 10835 }, { "epoch": 0.44, "learning_rate": 1.203900455344827e-06, "loss": 0.6662, "step": 10840 }, { "epoch": 0.44, "learning_rate": 1.2032694445301182e-06, "loss": 0.6504, "step": 10845 }, { "epoch": 0.44, "learning_rate": 1.2026383492790495e-06, "loss": 0.7228, "step": 10850 }, { "epoch": 0.44, "learning_rate": 1.2020071698537727e-06, "loss": 0.6674, "step": 10855 }, { "epoch": 0.44, "learning_rate": 1.2013759065164748e-06, "loss": 0.7009, "step": 10860 }, { "epoch": 0.44, "learning_rate": 1.200744559529376e-06, "loss": 0.6588, "step": 10865 }, { "epoch": 0.44, "learning_rate": 1.2001131291547335e-06, "loss": 0.651, "step": 10870 }, { "epoch": 0.44, "learning_rate": 1.1994816156548368e-06, "loss": 0.644, "step": 10875 }, { "epoch": 0.44, "learning_rate": 1.1988500192920122e-06, "loss": 0.6669, "step": 10880 }, { "epoch": 0.44, "learning_rate": 1.1982183403286186e-06, "loss": 0.6718, "step": 10885 }, { "epoch": 0.44, "learning_rate": 1.1975865790270503e-06, "loss": 0.6594, "step": 10890 }, { "epoch": 0.44, "learning_rate": 1.1969547356497356e-06, "loss": 0.6279, "step": 10895 }, { "epoch": 0.44, "learning_rate": 1.1963228104591362e-06, "loss": 0.6866, "step": 10900 }, { "epoch": 0.44, "learning_rate": 1.1956908037177484e-06, "loss": 0.6836, "step": 10905 }, { "epoch": 0.44, "learning_rate": 1.1950587156881032e-06, "loss": 0.6323, "step": 10910 }, { "epoch": 0.44, "learning_rate": 1.1944265466327634e-06, "loss": 0.6714, "step": 10915 }, { "epoch": 0.44, "learning_rate": 1.193794296814327e-06, "loss": 0.6849, "step": 10920 }, { "epoch": 0.44, "learning_rate": 1.193161966495425e-06, "loss": 0.6654, "step": 10925 }, { "epoch": 0.44, "learning_rate": 1.1925295559387221e-06, "loss": 0.7011, "step": 10930 }, { "epoch": 0.44, "learning_rate": 1.191897065406916e-06, "loss": 0.6452, "step": 10935 }, { "epoch": 0.44, "learning_rate": 1.1912644951627375e-06, "loss": 0.6537, "step": 10940 }, { "epoch": 0.44, "learning_rate": 1.1906318454689512e-06, "loss": 0.6852, "step": 10945 }, { "epoch": 0.44, "learning_rate": 1.189999116588354e-06, "loss": 0.6316, "step": 10950 }, { "epoch": 0.44, "learning_rate": 1.189366308783776e-06, "loss": 0.6368, "step": 10955 }, { "epoch": 0.45, "learning_rate": 1.1887334223180804e-06, "loss": 0.6621, "step": 10960 }, { "epoch": 0.45, "learning_rate": 1.1881004574541625e-06, "loss": 0.6432, "step": 10965 }, { "epoch": 0.45, "learning_rate": 1.18746741445495e-06, "loss": 0.6771, "step": 10970 }, { "epoch": 0.45, "learning_rate": 1.1868342935834041e-06, "loss": 0.6967, "step": 10975 }, { "epoch": 0.45, "learning_rate": 1.1862010951025175e-06, "loss": 0.7035, "step": 10980 }, { "epoch": 0.45, "learning_rate": 1.1855678192753151e-06, "loss": 0.6717, "step": 10985 }, { "epoch": 0.45, "learning_rate": 1.1849344663648543e-06, "loss": 0.6407, "step": 10990 }, { "epoch": 0.45, "learning_rate": 1.1843010366342248e-06, "loss": 0.6802, "step": 10995 }, { "epoch": 0.45, "learning_rate": 1.1836675303465472e-06, "loss": 0.6332, "step": 11000 }, { "epoch": 0.45, "learning_rate": 1.1830339477649749e-06, "loss": 0.6221, "step": 11005 }, { "epoch": 0.45, "learning_rate": 1.1824002891526923e-06, "loss": 0.6445, "step": 11010 }, { "epoch": 0.45, "learning_rate": 1.1817665547729157e-06, "loss": 0.6818, "step": 11015 }, { "epoch": 0.45, "learning_rate": 1.181132744888893e-06, "loss": 0.6772, "step": 11020 }, { "epoch": 0.45, "learning_rate": 1.1804988597639034e-06, "loss": 0.6692, "step": 11025 }, { "epoch": 0.45, "learning_rate": 1.1798648996612572e-06, "loss": 0.6727, "step": 11030 }, { "epoch": 0.45, "learning_rate": 1.1792308648442958e-06, "loss": 0.7129, "step": 11035 }, { "epoch": 0.45, "learning_rate": 1.1785967555763915e-06, "loss": 0.636, "step": 11040 }, { "epoch": 0.45, "learning_rate": 1.1779625721209484e-06, "loss": 0.6769, "step": 11045 }, { "epoch": 0.45, "learning_rate": 1.1773283147414007e-06, "loss": 0.663, "step": 11050 }, { "epoch": 0.45, "learning_rate": 1.1766939837012128e-06, "loss": 0.6988, "step": 11055 }, { "epoch": 0.45, "learning_rate": 1.1760595792638807e-06, "loss": 0.6399, "step": 11060 }, { "epoch": 0.45, "learning_rate": 1.1754251016929307e-06, "loss": 0.6709, "step": 11065 }, { "epoch": 0.45, "learning_rate": 1.174790551251919e-06, "loss": 0.661, "step": 11070 }, { "epoch": 0.45, "learning_rate": 1.1741559282044322e-06, "loss": 0.6604, "step": 11075 }, { "epoch": 0.45, "learning_rate": 1.1735212328140876e-06, "loss": 0.6601, "step": 11080 }, { "epoch": 0.45, "learning_rate": 1.1728864653445314e-06, "loss": 0.6732, "step": 11085 }, { "epoch": 0.45, "learning_rate": 1.1722516260594412e-06, "loss": 0.6564, "step": 11090 }, { "epoch": 0.45, "learning_rate": 1.1716167152225237e-06, "loss": 0.6819, "step": 11095 }, { "epoch": 0.45, "learning_rate": 1.170981733097515e-06, "loss": 0.6739, "step": 11100 }, { "epoch": 0.45, "learning_rate": 1.1703466799481817e-06, "loss": 0.6658, "step": 11105 }, { "epoch": 0.45, "learning_rate": 1.1697115560383185e-06, "loss": 0.6999, "step": 11110 }, { "epoch": 0.45, "learning_rate": 1.1690763616317507e-06, "loss": 0.6797, "step": 11115 }, { "epoch": 0.45, "learning_rate": 1.168441096992333e-06, "loss": 0.6541, "step": 11120 }, { "epoch": 0.45, "learning_rate": 1.1678057623839484e-06, "loss": 0.6586, "step": 11125 }, { "epoch": 0.45, "learning_rate": 1.1671703580705094e-06, "loss": 0.6609, "step": 11130 }, { "epoch": 0.45, "learning_rate": 1.1665348843159574e-06, "loss": 0.6493, "step": 11135 }, { "epoch": 0.45, "learning_rate": 1.1658993413842624e-06, "loss": 0.6771, "step": 11140 }, { "epoch": 0.45, "learning_rate": 1.1652637295394244e-06, "loss": 0.6806, "step": 11145 }, { "epoch": 0.45, "learning_rate": 1.1646280490454696e-06, "loss": 0.6739, "step": 11150 }, { "epoch": 0.45, "learning_rate": 1.1639923001664555e-06, "loss": 0.6482, "step": 11155 }, { "epoch": 0.45, "learning_rate": 1.1633564831664656e-06, "loss": 0.6759, "step": 11160 }, { "epoch": 0.45, "learning_rate": 1.1627205983096135e-06, "loss": 0.6722, "step": 11165 }, { "epoch": 0.45, "learning_rate": 1.16208464586004e-06, "loss": 0.6531, "step": 11170 }, { "epoch": 0.45, "learning_rate": 1.1614486260819138e-06, "loss": 0.6934, "step": 11175 }, { "epoch": 0.45, "learning_rate": 1.1608125392394327e-06, "loss": 0.6637, "step": 11180 }, { "epoch": 0.45, "learning_rate": 1.1601763855968212e-06, "loss": 0.6879, "step": 11185 }, { "epoch": 0.45, "learning_rate": 1.1595401654183322e-06, "loss": 0.6499, "step": 11190 }, { "epoch": 0.45, "learning_rate": 1.1589038789682456e-06, "loss": 0.7014, "step": 11195 }, { "epoch": 0.45, "learning_rate": 1.1582675265108699e-06, "loss": 0.6136, "step": 11200 }, { "epoch": 0.45, "eval_loss": 0.6401504874229431, "eval_runtime": 144.3337, "eval_samples_per_second": 16.393, "eval_steps_per_second": 2.737, "step": 11200 }, { "epoch": 0.46, "learning_rate": 1.15763110831054e-06, "loss": 0.6646, "step": 11205 }, { "epoch": 0.46, "learning_rate": 1.1569946246316182e-06, "loss": 0.7085, "step": 11210 }, { "epoch": 0.46, "learning_rate": 1.156358075738495e-06, "loss": 0.6621, "step": 11215 }, { "epoch": 0.46, "learning_rate": 1.1557214618955868e-06, "loss": 0.6703, "step": 11220 }, { "epoch": 0.46, "learning_rate": 1.1550847833673374e-06, "loss": 0.7204, "step": 11225 }, { "epoch": 0.46, "learning_rate": 1.154448040418218e-06, "loss": 0.6923, "step": 11230 }, { "epoch": 0.46, "learning_rate": 1.1538112333127253e-06, "loss": 0.6608, "step": 11235 }, { "epoch": 0.46, "learning_rate": 1.1531743623153842e-06, "loss": 0.6824, "step": 11240 }, { "epoch": 0.46, "learning_rate": 1.1525374276907449e-06, "loss": 0.7322, "step": 11245 }, { "epoch": 0.46, "learning_rate": 1.1519004297033847e-06, "loss": 0.6432, "step": 11250 }, { "epoch": 0.46, "learning_rate": 1.1512633686179071e-06, "loss": 0.6795, "step": 11255 }, { "epoch": 0.46, "learning_rate": 1.1506262446989417e-06, "loss": 0.7229, "step": 11260 }, { "epoch": 0.46, "learning_rate": 1.149989058211144e-06, "loss": 0.6954, "step": 11265 }, { "epoch": 0.46, "learning_rate": 1.149351809419196e-06, "loss": 0.6879, "step": 11270 }, { "epoch": 0.46, "learning_rate": 1.148714498587805e-06, "loss": 0.6642, "step": 11275 }, { "epoch": 0.46, "learning_rate": 1.1480771259817048e-06, "loss": 0.7015, "step": 11280 }, { "epoch": 0.46, "learning_rate": 1.147439691865654e-06, "loss": 0.6467, "step": 11285 }, { "epoch": 0.46, "learning_rate": 1.1468021965044377e-06, "loss": 0.7045, "step": 11290 }, { "epoch": 0.46, "learning_rate": 1.1461646401628654e-06, "loss": 0.6635, "step": 11295 }, { "epoch": 0.46, "learning_rate": 1.1455270231057728e-06, "loss": 0.6943, "step": 11300 }, { "epoch": 0.46, "learning_rate": 1.14488934559802e-06, "loss": 0.6626, "step": 11305 }, { "epoch": 0.46, "learning_rate": 1.1442516079044932e-06, "loss": 0.6716, "step": 11310 }, { "epoch": 0.46, "learning_rate": 1.1436138102901031e-06, "loss": 0.6748, "step": 11315 }, { "epoch": 0.46, "learning_rate": 1.142975953019785e-06, "loss": 0.7028, "step": 11320 }, { "epoch": 0.46, "learning_rate": 1.1423380363584999e-06, "loss": 0.6044, "step": 11325 }, { "epoch": 0.46, "learning_rate": 1.1417000605712316e-06, "loss": 0.6831, "step": 11330 }, { "epoch": 0.46, "learning_rate": 1.1410620259229908e-06, "loss": 0.6632, "step": 11335 }, { "epoch": 0.46, "learning_rate": 1.1404239326788115e-06, "loss": 0.6393, "step": 11340 }, { "epoch": 0.46, "learning_rate": 1.1397857811037512e-06, "loss": 0.6501, "step": 11345 }, { "epoch": 0.46, "learning_rate": 1.1391475714628932e-06, "loss": 0.6398, "step": 11350 }, { "epoch": 0.46, "learning_rate": 1.138509304021344e-06, "loss": 0.6784, "step": 11355 }, { "epoch": 0.46, "learning_rate": 1.1378709790442346e-06, "loss": 0.7065, "step": 11360 }, { "epoch": 0.46, "learning_rate": 1.1372325967967196e-06, "loss": 0.6189, "step": 11365 }, { "epoch": 0.46, "learning_rate": 1.1365941575439772e-06, "loss": 0.6652, "step": 11370 }, { "epoch": 0.46, "learning_rate": 1.1359556615512099e-06, "loss": 0.6752, "step": 11375 }, { "epoch": 0.46, "learning_rate": 1.1353171090836427e-06, "loss": 0.6668, "step": 11380 }, { "epoch": 0.46, "learning_rate": 1.134678500406525e-06, "loss": 0.6587, "step": 11385 }, { "epoch": 0.46, "learning_rate": 1.13403983578513e-06, "loss": 0.6873, "step": 11390 }, { "epoch": 0.46, "learning_rate": 1.1334011154847527e-06, "loss": 0.6975, "step": 11395 }, { "epoch": 0.46, "learning_rate": 1.1327623397707122e-06, "loss": 0.6784, "step": 11400 }, { "epoch": 0.46, "learning_rate": 1.1321235089083502e-06, "loss": 0.6643, "step": 11405 }, { "epoch": 0.46, "learning_rate": 1.1314846231630315e-06, "loss": 0.6754, "step": 11410 }, { "epoch": 0.46, "learning_rate": 1.1308456828001441e-06, "loss": 0.6689, "step": 11415 }, { "epoch": 0.46, "learning_rate": 1.1302066880850975e-06, "loss": 0.6594, "step": 11420 }, { "epoch": 0.46, "learning_rate": 1.1295676392833253e-06, "loss": 0.6416, "step": 11425 }, { "epoch": 0.46, "learning_rate": 1.1289285366602826e-06, "loss": 0.7223, "step": 11430 }, { "epoch": 0.46, "learning_rate": 1.1282893804814468e-06, "loss": 0.6944, "step": 11435 }, { "epoch": 0.46, "learning_rate": 1.127650171012318e-06, "loss": 0.6598, "step": 11440 }, { "epoch": 0.46, "learning_rate": 1.1270109085184182e-06, "loss": 0.7102, "step": 11445 }, { "epoch": 0.47, "learning_rate": 1.1263715932652919e-06, "loss": 0.6803, "step": 11450 }, { "epoch": 0.47, "learning_rate": 1.1257322255185044e-06, "loss": 0.65, "step": 11455 }, { "epoch": 0.47, "learning_rate": 1.1250928055436443e-06, "loss": 0.7018, "step": 11460 }, { "epoch": 0.47, "learning_rate": 1.12445333360632e-06, "loss": 0.6409, "step": 11465 }, { "epoch": 0.47, "learning_rate": 1.1238138099721634e-06, "loss": 0.6724, "step": 11470 }, { "epoch": 0.47, "learning_rate": 1.1231742349068271e-06, "loss": 0.6854, "step": 11475 }, { "epoch": 0.47, "learning_rate": 1.1225346086759846e-06, "loss": 0.7102, "step": 11480 }, { "epoch": 0.47, "learning_rate": 1.1218949315453314e-06, "loss": 0.6584, "step": 11485 }, { "epoch": 0.47, "learning_rate": 1.1212552037805836e-06, "loss": 0.6584, "step": 11490 }, { "epoch": 0.47, "learning_rate": 1.1206154256474786e-06, "loss": 0.6846, "step": 11495 }, { "epoch": 0.47, "learning_rate": 1.119975597411775e-06, "loss": 0.7037, "step": 11500 }, { "epoch": 0.47, "learning_rate": 1.1193357193392512e-06, "loss": 0.6689, "step": 11505 }, { "epoch": 0.47, "learning_rate": 1.1186957916957078e-06, "loss": 0.6556, "step": 11510 }, { "epoch": 0.47, "learning_rate": 1.1180558147469642e-06, "loss": 0.6557, "step": 11515 }, { "epoch": 0.47, "learning_rate": 1.1174157887588623e-06, "loss": 0.6662, "step": 11520 }, { "epoch": 0.47, "learning_rate": 1.1167757139972626e-06, "loss": 0.6702, "step": 11525 }, { "epoch": 0.47, "learning_rate": 1.116135590728047e-06, "loss": 0.6682, "step": 11530 }, { "epoch": 0.47, "learning_rate": 1.115495419217117e-06, "loss": 0.6855, "step": 11535 }, { "epoch": 0.47, "learning_rate": 1.114855199730394e-06, "loss": 0.6796, "step": 11540 }, { "epoch": 0.47, "learning_rate": 1.1142149325338199e-06, "loss": 0.6481, "step": 11545 }, { "epoch": 0.47, "learning_rate": 1.1135746178933563e-06, "loss": 0.7167, "step": 11550 }, { "epoch": 0.47, "learning_rate": 1.112934256074984e-06, "loss": 0.6602, "step": 11555 }, { "epoch": 0.47, "learning_rate": 1.1122938473447038e-06, "loss": 0.6848, "step": 11560 }, { "epoch": 0.47, "learning_rate": 1.1116533919685361e-06, "loss": 0.6628, "step": 11565 }, { "epoch": 0.47, "learning_rate": 1.1110128902125201e-06, "loss": 0.6826, "step": 11570 }, { "epoch": 0.47, "learning_rate": 1.1103723423427153e-06, "loss": 0.6527, "step": 11575 }, { "epoch": 0.47, "learning_rate": 1.1097317486251992e-06, "loss": 0.6737, "step": 11580 }, { "epoch": 0.47, "learning_rate": 1.109091109326069e-06, "loss": 0.6854, "step": 11585 }, { "epoch": 0.47, "learning_rate": 1.1084504247114406e-06, "loss": 0.7145, "step": 11590 }, { "epoch": 0.47, "learning_rate": 1.107809695047449e-06, "loss": 0.6756, "step": 11595 }, { "epoch": 0.47, "learning_rate": 1.1071689206002474e-06, "loss": 0.6725, "step": 11600 }, { "epoch": 0.47, "learning_rate": 1.1065281016360083e-06, "loss": 0.7145, "step": 11605 }, { "epoch": 0.47, "learning_rate": 1.1058872384209224e-06, "loss": 0.6899, "step": 11610 }, { "epoch": 0.47, "learning_rate": 1.1052463312211983e-06, "loss": 0.6344, "step": 11615 }, { "epoch": 0.47, "learning_rate": 1.1046053803030637e-06, "loss": 0.654, "step": 11620 }, { "epoch": 0.47, "learning_rate": 1.1039643859327635e-06, "loss": 0.6741, "step": 11625 }, { "epoch": 0.47, "learning_rate": 1.1033233483765615e-06, "loss": 0.6814, "step": 11630 }, { "epoch": 0.47, "learning_rate": 1.1026822679007395e-06, "loss": 0.6565, "step": 11635 }, { "epoch": 0.47, "learning_rate": 1.1020411447715961e-06, "loss": 0.6916, "step": 11640 }, { "epoch": 0.47, "learning_rate": 1.1013999792554486e-06, "loss": 0.6894, "step": 11645 }, { "epoch": 0.47, "learning_rate": 1.1007587716186317e-06, "loss": 0.6698, "step": 11650 }, { "epoch": 0.47, "learning_rate": 1.1001175221274968e-06, "loss": 0.7096, "step": 11655 }, { "epoch": 0.47, "learning_rate": 1.0994762310484142e-06, "loss": 0.6887, "step": 11660 }, { "epoch": 0.47, "learning_rate": 1.0988348986477705e-06, "loss": 0.671, "step": 11665 }, { "epoch": 0.47, "learning_rate": 1.0981935251919693e-06, "loss": 0.6727, "step": 11670 }, { "epoch": 0.47, "learning_rate": 1.0975521109474318e-06, "loss": 0.6777, "step": 11675 }, { "epoch": 0.47, "learning_rate": 1.0969106561805952e-06, "loss": 0.6661, "step": 11680 }, { "epoch": 0.47, "learning_rate": 1.0962691611579154e-06, "loss": 0.6576, "step": 11685 }, { "epoch": 0.47, "learning_rate": 1.0956276261458629e-06, "loss": 0.6415, "step": 11690 }, { "epoch": 0.48, "learning_rate": 1.0949860514109264e-06, "loss": 0.6485, "step": 11695 }, { "epoch": 0.48, "learning_rate": 1.09434443721961e-06, "loss": 0.6966, "step": 11700 }, { "epoch": 0.48, "learning_rate": 1.0937027838384345e-06, "loss": 0.6746, "step": 11705 }, { "epoch": 0.48, "learning_rate": 1.093061091533938e-06, "loss": 0.6565, "step": 11710 }, { "epoch": 0.48, "learning_rate": 1.0924193605726733e-06, "loss": 0.7127, "step": 11715 }, { "epoch": 0.48, "learning_rate": 1.0917775912212099e-06, "loss": 0.6738, "step": 11720 }, { "epoch": 0.48, "learning_rate": 1.0911357837461332e-06, "loss": 0.6712, "step": 11725 }, { "epoch": 0.48, "learning_rate": 1.0904939384140445e-06, "loss": 0.6617, "step": 11730 }, { "epoch": 0.48, "learning_rate": 1.0898520554915607e-06, "loss": 0.6919, "step": 11735 }, { "epoch": 0.48, "learning_rate": 1.0892101352453142e-06, "loss": 0.6731, "step": 11740 }, { "epoch": 0.48, "learning_rate": 1.0885681779419537e-06, "loss": 0.6931, "step": 11745 }, { "epoch": 0.48, "learning_rate": 1.0879261838481426e-06, "loss": 0.631, "step": 11750 }, { "epoch": 0.48, "learning_rate": 1.0872841532305587e-06, "loss": 0.6515, "step": 11755 }, { "epoch": 0.48, "learning_rate": 1.0866420863558969e-06, "loss": 0.6674, "step": 11760 }, { "epoch": 0.48, "learning_rate": 1.0859999834908657e-06, "loss": 0.6514, "step": 11765 }, { "epoch": 0.48, "learning_rate": 1.0853578449021896e-06, "loss": 0.6393, "step": 11770 }, { "epoch": 0.48, "learning_rate": 1.084715670856607e-06, "loss": 0.6941, "step": 11775 }, { "epoch": 0.48, "learning_rate": 1.0840734616208712e-06, "loss": 0.664, "step": 11780 }, { "epoch": 0.48, "learning_rate": 1.0834312174617508e-06, "loss": 0.6636, "step": 11785 }, { "epoch": 0.48, "learning_rate": 1.0827889386460281e-06, "loss": 0.6756, "step": 11790 }, { "epoch": 0.48, "learning_rate": 1.0821466254405004e-06, "loss": 0.7116, "step": 11795 }, { "epoch": 0.48, "learning_rate": 1.0815042781119788e-06, "loss": 0.6647, "step": 11800 }, { "epoch": 0.48, "learning_rate": 1.0808618969272888e-06, "loss": 0.6404, "step": 11805 }, { "epoch": 0.48, "learning_rate": 1.0802194821532702e-06, "loss": 0.6711, "step": 11810 }, { "epoch": 0.48, "learning_rate": 1.079577034056776e-06, "loss": 0.6717, "step": 11815 }, { "epoch": 0.48, "learning_rate": 1.078934552904674e-06, "loss": 0.6705, "step": 11820 }, { "epoch": 0.48, "learning_rate": 1.0782920389638452e-06, "loss": 0.6713, "step": 11825 }, { "epoch": 0.48, "learning_rate": 1.0776494925011846e-06, "loss": 0.6474, "step": 11830 }, { "epoch": 0.48, "learning_rate": 1.0770069137836e-06, "loss": 0.6745, "step": 11835 }, { "epoch": 0.48, "learning_rate": 1.0763643030780126e-06, "loss": 0.6773, "step": 11840 }, { "epoch": 0.48, "learning_rate": 1.075721660651358e-06, "loss": 0.6767, "step": 11845 }, { "epoch": 0.48, "learning_rate": 1.0750789867705843e-06, "loss": 0.6758, "step": 11850 }, { "epoch": 0.48, "learning_rate": 1.0744362817026524e-06, "loss": 0.686, "step": 11855 }, { "epoch": 0.48, "learning_rate": 1.0737935457145364e-06, "loss": 0.6736, "step": 11860 }, { "epoch": 0.48, "learning_rate": 1.073150779073223e-06, "loss": 0.668, "step": 11865 }, { "epoch": 0.48, "learning_rate": 1.0725079820457123e-06, "loss": 0.7003, "step": 11870 }, { "epoch": 0.48, "learning_rate": 1.0718651548990163e-06, "loss": 0.682, "step": 11875 }, { "epoch": 0.48, "learning_rate": 1.0712222979001602e-06, "loss": 0.6445, "step": 11880 }, { "epoch": 0.48, "learning_rate": 1.0705794113161808e-06, "loss": 0.6872, "step": 11885 }, { "epoch": 0.48, "learning_rate": 1.0699364954141276e-06, "loss": 0.6936, "step": 11890 }, { "epoch": 0.48, "learning_rate": 1.0692935504610625e-06, "loss": 0.6195, "step": 11895 }, { "epoch": 0.48, "learning_rate": 1.068650576724059e-06, "loss": 0.6658, "step": 11900 }, { "epoch": 0.48, "learning_rate": 1.0680075744702034e-06, "loss": 0.6799, "step": 11905 }, { "epoch": 0.48, "learning_rate": 1.0673645439665925e-06, "loss": 0.6823, "step": 11910 }, { "epoch": 0.48, "learning_rate": 1.0667214854803357e-06, "loss": 0.6677, "step": 11915 }, { "epoch": 0.48, "learning_rate": 1.0660783992785541e-06, "loss": 0.6648, "step": 11920 }, { "epoch": 0.48, "learning_rate": 1.06543528562838e-06, "loss": 0.6313, "step": 11925 }, { "epoch": 0.48, "learning_rate": 1.0647921447969577e-06, "loss": 0.667, "step": 11930 }, { "epoch": 0.48, "learning_rate": 1.0641489770514418e-06, "loss": 0.6567, "step": 11935 }, { "epoch": 0.48, "learning_rate": 1.0635057826589987e-06, "loss": 0.6727, "step": 11940 }, { "epoch": 0.49, "learning_rate": 1.0628625618868056e-06, "loss": 0.6835, "step": 11945 }, { "epoch": 0.49, "learning_rate": 1.062219315002051e-06, "loss": 0.6329, "step": 11950 }, { "epoch": 0.49, "learning_rate": 1.061576042271934e-06, "loss": 0.6823, "step": 11955 }, { "epoch": 0.49, "learning_rate": 1.0609327439636647e-06, "loss": 0.6514, "step": 11960 }, { "epoch": 0.49, "learning_rate": 1.0602894203444633e-06, "loss": 0.716, "step": 11965 }, { "epoch": 0.49, "learning_rate": 1.0596460716815612e-06, "loss": 0.6778, "step": 11970 }, { "epoch": 0.49, "learning_rate": 1.059002698242199e-06, "loss": 0.6671, "step": 11975 }, { "epoch": 0.49, "learning_rate": 1.0583593002936298e-06, "loss": 0.6936, "step": 11980 }, { "epoch": 0.49, "learning_rate": 1.0577158781031147e-06, "loss": 0.7211, "step": 11985 }, { "epoch": 0.49, "learning_rate": 1.0570724319379254e-06, "loss": 0.6667, "step": 11990 }, { "epoch": 0.49, "learning_rate": 1.0564289620653446e-06, "loss": 0.7181, "step": 11995 }, { "epoch": 0.49, "learning_rate": 1.0557854687526632e-06, "loss": 0.6431, "step": 12000 }, { "epoch": 0.49, "eval_loss": 0.6379530429840088, "eval_runtime": 144.8913, "eval_samples_per_second": 16.329, "eval_steps_per_second": 2.726, "step": 12000 } ], "logging_steps": 5, "max_steps": 24619, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 400, "total_flos": 1676879253282816.0, "trial_name": null, "trial_params": null }